TARS流程助理写的代码有问题, 怎么修改呢, 我改了之后又自动变回去了
import pythoncom
pythoncom.CoInitialize()
import cv2
import numpy as np
import pyautogui
from typing import List, Tuple
def find_image_on_screen(
template_path: str = "",
similarity_threshold: float = 0.95,
version: str = "v1"
) -> List[Tuple[int, int]]:
"""
title: 在屏幕上查找与输入图片相似的所有位置坐标
description: 对当前屏幕进行截图,使用OpenCV模板匹配方法查找所有与输入PNG图片
相似度超过阈值的区域,通过非极大值抑制去除重叠结果,返回所有匹配
区域的中心点坐标列表。
inputs:
- template_path (file): 作为模板的PNG图片文件路径,为空时抛出异常,eg: "template.png"
- similarity_threshold (float): 相似度阈值,范围0~1,默认0.95,eg: 0.95
- version (str): 版本号,默认'v1',eg: "v1"
outputs:
- matched_positions (list): 所有匹配区域中心点坐标列表,每个元素为(x, y)元组,eg: [(100, 200), (300, 400)]
"""
def _validate_inputs(path: str, threshold: float, *args, **kw) -> None:
"""验证输入参数的合法性。"""
if not path or path.strip() == "":
raise ValueError("template_path 不能为空字符串,请传入有效的PNG图片文件路径。")
if not (0.0 < threshold <= 1.0):
raise ValueError(f"similarity_threshold 必须在 (0, 1] 范围内,当前值: {threshold}")
def _load_template(path: str, *args, **kw) -> np.ndarray:
"""从文件路径加载模板图片为BGR格式的numpy数组。"""
template = cv2.imread(path, cv2.IMREAD_COLOR)
if template is None:
raise ValueError(f"无法读取模板图片,请检查路径是否正确: '{path}'")
return template
def _capture_screenshot(*args, **kw) -> np.ndarray:
"""截取当前屏幕并转换为OpenCV BGR格式的numpy数组。"""
try:
screenshot = pyautogui.screenshot()
screen_np = np.array(screenshot)
# PIL截图为RGB格式,转换为OpenCV的BGR格式
screen_bgr = cv2.cvtColor(screen_np, cv2.COLOR_RGB2BGR)
return screen_bgr
except Exception as e:
raise RuntimeError(f"屏幕截图失败: {e}")
def _get_candidate_boxes(
screen: np.ndarray,
template: np.ndarray,
threshold: float
) -> List[Tuple[int, int, int, int]]:
"""
执行模板匹配,返回所有超过阈值的候选边界框。
每个框格式为 (x1, y1, x2, y2),左上角与右下角坐标。
"""
# 执行归一化相关系数模板匹配
match_result = cv2.matchTemplate(screen, template, cv2.TM_CCOEFF_NORMED)
# 获取模板的高度和宽度
t_h, t_w = template.shape[:2]
# 找出所有匹配值超过阈值的像素位置
match_locations = np.where(match_result >= threshold)
# 将每个匹配点转换为边界框 (x1, y1, x2, y2)
boxes = []
for pt_y, pt_x in zip(match_locations[0], match_locations[1]):
x1 = int(pt_x)
y1 = int(pt_y)
x2 = x1 + t_w
y2 = y1 + t_h
boxes.append((x1, y1, x2, y2))
return boxes
def _non_max_suppression(
boxes: List[Tuple[int, int, int, int]],
overlap_threshold: float = 0.5
) -> List[Tuple[int, int, int, int]]:
"""
对候选框执行非极大值抑制(NMS),去除高度重叠的冗余框。
overlap_threshold: IoU超过此值的框被视为重复并移除。
返回保留的边界框列表。
"""
if not boxes:
return []
boxes_arr = np.array(boxes, dtype=np.float32)
x1_arr = boxes_arr[:, 0]
y1_arr = boxes_arr[:, 1]
x2_arr = boxes_arr[:, 2]
y2_arr = boxes_arr[:, 3]
# 计算每个框的面积
areas = (x2_arr - x1_arr + 1) * (y2_arr - y1_arr + 1)
# 按右下角y坐标升序排列,依次处理每个候选框
order = np.argsort(y2_arr)
kept = []
while order.size > 0:
# 取排序中第一个框作为当前保留框
idx = order[0]
kept.append(boxes[idx])
if order.size == 1:
break
# 计算当前框与剩余所有框的交集坐标
inter_x1 = np.maximum(x1_arr[idx], x1_arr[order[1:]])
inter_y1 = np.maximum(y1_arr[idx], y1_arr[order[1:]])
inter_x2 = np.minimum(x2_arr[idx], x2_arr[order[1:]])
inter_y2 = np.minimum(y2_arr[idx], y2_arr[order[1:]])
# 计算交集宽高,负值说明无交集,截断为0
inter_w = np.maximum(0.0, inter_x2 - inter_x1 + 1)
inter_h = np.maximum(0.0, inter_y2 - inter_y1 + 1)
inter_area = inter_w * inter_h
# 计算IoU(交并比)
union_area = areas[idx] + areas[order[1:]] - inter_area
iou = inter_area / np.maximum(union_area, 1e-6)
# 保留IoU低于阈值的框(与当前框不重叠的框)
keep_mask = np.where(iou <= overlap_threshold)[0]
order = order[keep_mask + 1]
return kept
def _boxes_to_centers(
boxes: List[Tuple[int, int, int, int]]
) -> List[Tuple[int, int]]:
"""将边界框列表转换为中心点坐标列表。"""
centers = []
for (x1, y1, x2, y2) in boxes:
cx = int((x1 + x2) / 2)
cy = int((y1 + y2) / 2)
centers.append((cx, cy))
return centers
# 步骤1: 验证输入参数
_validate_inputs(template_)
想要改成import cv2
import numpy as np
import pyautogui
from typing import List, Tuple
def find_image_on_screen(
template_path: str = "",
similarity_threshold: float = 0.95,
version: str = "v1"
) -> List[Tuple[int, int]]:
"""
title: 在屏幕上查找与输入图片相似的所有位置坐标
description: 对当前屏幕进行截图,使用OpenCV模板匹配方法查找所有与输入PNG图片
相似度超过阈值的区域,通过非极大值抑制去除重叠结果,返回所有匹配
区域的中心点坐标列表。
inputs:
- template_path (file): 作为模板的PNG图片文件路径,为空时抛出异常,eg: "template.png"
- similarity_threshold (float): 相似度阈值,范围0~1,默认0.95,eg: 0.95
- version (str): 版本号,默认'v1',eg: "v1"
outputs:
- matched_positions (list): 所有匹配区域中心点坐标列表,每个元素为(x, y)元组,eg: [(100, 200), (300, 400)]
"""
def _validate_inputs(path: str, threshold: float, version: str) -> None:
"""验证输入参数的合法性。"""
if not path or path.strip() == "":
raise ValueError("template_path 不能为空字符串,请传入有效的PNG图片文件路径。")
if not (0.0 < threshold <= 1.0):
raise ValueError(f"similarity_threshold 必须在 (0, 1] 范围内,当前值: {threshold}")
# 版本参数当前仅用于标识,未来可能用于不同的算法实现
def _load_template(path: str) -> np.ndarray:
"""从文件路径加载模板图片为BGR格式的numpy数组。"""
template = cv2.imread(path, cv2.IMREAD_COLOR)
if template is None:
raise ValueError(f"无法读取模板图片,请检查路径是否正确: '{path}'")
return template
def _capture_screenshot() -> np.ndarray:
"""截取当前屏幕并转换为OpenCV BGR格式的numpy数组。"""
try:
screenshot = pyautogui.screenshot()
screen_np = np.array(screenshot)
# PIL截图为RGB格式,转换为OpenCV的BGR格式
screen_bgr = cv2.cvtColor(screen_np, cv2.COLOR_RGB2BGR)
return screen_bgr
except Exception as e:
raise RuntimeError(f"屏幕截图失败: {e}")
def _get_candidate_boxes(
screen: np.ndarray,
template: np.ndarray,
threshold: float
) -> List[Tuple[int, int, int, int]]:
"""
执行模板匹配,返回所有超过阈值的候选边界框。
每个框格式为 (x1, y1, x2, y2),左上角与右下角坐标。
"""
# 执行归一化相关系数模板匹配
match_result = cv2.matchTemplate(screen, template, cv2.TM_CCOEFF_NORMED)
# 获取模板的高度和宽度
t_h, t_w = template.shape[:2]
# 找出所有匹配值超过阈值的像素位置
match_locations = np.where(match_result >= threshold)
# 将每个匹配点转换为边界框 (x1, y1, x2, y2)
boxes = []
for pt_y, pt_x in zip(match_locations[0], match_locations[1]):
x1 = int(pt_x)
y1 = int(pt_y)
x2 = x1 + t_w
y2 = y1 + t_h
boxes.append((x1, y1, x2, y2))
return boxes
def _non_max_suppression(
boxes: List[Tuple[int, int, int, int]],
overlap_threshold: float = 0.5
) -> List[Tuple[int, int, int, int]]:
"""
对候选框执行非极大值抑制(NMS),去除高度重叠的冗余框。
overlap_threshold: IoU超过此值的框被视为重复并移除。
返回保留的边界框列表。
"""
if not boxes:
return []
boxes_arr = np.array(boxes, dtype=np.float32)
x1_arr = boxes_arr[:, 0]
y1_arr = boxes_arr[:, 1]
x2_arr = boxes_arr[:, 2]
y2_arr = boxes_arr[:, 3]
# 计算每个框的面积
areas = (x2_arr - x1_arr + 1) * (y2_arr - y1_arr + 1)
# 按右下角y坐标升序排列,依次处理每个候选框
order = np.argsort(y2_arr)
kept = []
while order.size > 0:
# 取排序中第一个框作为当前保留框
idx = order[0]
kept.append(boxes[idx])
if order.size == 1:
break
# 计算当前框与剩余所有框的交集坐标
inter_x1 = np.maximum(x1_arr[idx], x1_arr[order[1:]])
inter_y1 = np.maximum(y1_arr[idx], y1_arr[order[1:]])
inter_x2 = np.minimum(x2_arr[idx], x2_arr[order[1:]])
inter_y2 = np.minimum(y2_arr[idx], y2_arr[order[1:]])
# 计算交集宽高,负值说明无交集,截断为0
inter_w = np.maximum(0.0, inter_x2 - inter_x1 + 1)
inter_h = np.maximum(0.0, inter_y2 - inter_y1 + 1)
inter_area = inter_w * inter_h
# 计算IoU(交并比)
union_area = areas[idx] + areas[order[1:]] - inter_area
iou = inter_area / np.maximum(union_area, 1e-6)
# 保留IoU低于阈值的框(与当前框不重叠的框)
keep_mask = np.where(iou <= overlap_threshold)[0]
order = order[keep_mask + 1]
return kept
def _boxes_to_centers(
boxes: List[Tuple[int, int, int, int]]
) -> List[Tuple[int, int]]:
"""将边界框列表转换为中心点坐标列表。"""
centers = []
for (x1, y1, x2, y2) in boxes:
cx = int((x1 + x2) / 2)
cy = int((y1 + y2) / 2)
centers.append((cx, cy))
return centers
# 步骤1: 验证输入参数
_validate_inputs(template_path, similarity_threshold, version)
# 步骤2: 加载模板图片
template = _load_template(template_path)
# 步骤3: 截取屏幕
screen = _capture_screenshot()
# 步骤4: 获取所有超过阈值的候选边界框
candidate_boxes = _get_candidate_boxes(screen, template, similarity_threshold)
# 步骤5: 应用非极大值抑制,去除重叠框
final_boxes = _non_max_suppression(candidate_boxes)
# 步骤6: 将边界框转换为中心点坐标
matched_positions = _boxes_to_centers(final_boxes)
# 步骤7: 返回匹配位置列表
return matched_positions
0人点赞
后可进行评论
扫码关注
获取专业的解决方案
帮您实现业务爆发式的增长



