动态边界框检测:基于注释文件的Python图像裁剪脚本无法准确捕捉定义的边界框

0 投票
2 回答
70 浏览
提问于 2025-04-14 17:57

我有一些标注过的图片,想根据一个文本文件里的边界框信息来加载和裁剪这些图片。不过,尽管我写了代码,还是在识别图片边界时遇到了问题,结果不太准确。

如果有人能帮我解决这个问题,我会非常感激。

我的代码是:

   import os
import cv2

def crop_image_with_annotation(image_path, annotation_path, output_folder):
    # Read image
    img = cv2.imread(image_path)
    print(f"Image loaded: {image_path}")

    if img is None:
        print(f"Error: Could not read image {image_path}")
        return

    # Read bounding box coordinates from annotation file
    with open(annotation_path, 'r') as f:
        line = f.readline().strip()
        print("Annotation line:", line)

        # Split the line into values
        values = line.split()

        # Check if the line contains the class label
        if len(values) == 5:
            class_label, x_center, y_center, width, height = map(float, values)
            print("Found class label:", class_label)
        elif len(values) == 4:
            x_center, y_center, width, height = map(float, values)
        else:
            print("Invalid annotation format")
            return

    # Convert normalized coordinates to pixel coordinates
    img_height, img_width = img.shape[:2]

    print(f"Image height: {img_height}, Image width: {img_width}")

    x_center = int(x_center * img_width)
    y_center = int(y_center * img_height)
    half_width = int(width * img_width / 2)
    half_height = int(height * img_height / 2)

    # Calculate bounding box coordinates
    x1 = max(0, x_center - half_width)
    y1 = max(0, y_center - half_height)
    x2 = min(img_width, x_center + half_width)
    y2 = min(img_height, y_center + half_height)

    # Print calculated crop coordinates
    print(f"Calculated crop coordinates: (x1, y1), (x2, y2) = ({x1}, {y1}), ({x2}, {y2})")

    # Crop image based on bounding box coordinates
    cropped_img = img[y1:y2, x1:x2]

    # Save cropped image to output folder
    filename = os.path.basename(image_path)
    output_path = os.path.join(output_folder, filename)
    cv2.imwrite(output_path, cropped_img)
    print(f"Cropped image saved: {output_path}")


# Define input and output folders
input_folder = r"C:\Users\Desktop\crop"
output_folder = r"C:\Users\Desktop\crop\new"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Iterate over files in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith(".jpg") or filename.endswith(".jpeg"):  # Check if the file is an image
        # Construct paths for image and annotation file
        image_path = os.path.join(input_folder, filename)
        annotation_path = os.path.join(input_folder, os.path.splitext(filename)[0] + ".txt")

        # Check if the annotation file exists
        if not os.path.exists(annotation_path):
            print(f"Annotation file not found for image {filename}. Skipping...")
            continue

        # Crop image based on annotation
        crop_image_with_annotation(image_path, annotation_path, output_folder)

我的输入文件在桌面上的一个名为“crop”的文件夹里,格式是jpg

输出的裁剪图片是:

我想读取整个车牌:

我的标注文件是.txt格式,里面有这些值:

0 0.5247395833333334 0.687037037037037 0.06822916666666666 0.04814814814814815

其他的图片是:为此我的.txt文件是

3 0.20859375 0.7708333333333334 0.37447916666666664 0.44722222222222224
2 0.55 0.711574074074074 0.20520833333333333 0.5601851851851852
2 0.7627604166666667 0.7435185185185185 0.17239583333333333 0.5055555555555555
1 0.5692708333333333 0.9523148148148148 0.034375 0.05092592592592592

现在我的代码输出是:

我还需要检测到两辆摩托车和它们的头盔。

我期望的输出是:

2 个回答

0

好的,花了一些时间,input.pnginp 文件夹里,还有 annotation.txt

input.png :

这里插入图片描述

annotation.txt:

1 238 230 180 178

用的代码:

import os
import cv2
def crop_image_with_annotation(image_path, annotation_path, output_folder):
    # Read image
    img = cv2.imread(image_path)
    # Read bounding box coordinates from annotation file
    with open(annotation_path, 'r') as f:
        line = f.readline().strip()
        print("Annotation line:", line)  # Debugging line
        print("Image saved:",)
        # Split the line into values
        values = line.split()
        # Check if the line contains the class label
        if len(values) == 5:
            class_label, x_min, y_min, width, height = map(float, values)
            
            print('class_label, x_min, y_min, width, height :' , class_label, x_min, y_min, width, height)
            
        elif len(values) == 4:
            x_min, y_min, width, height = map(float, values)
        else:
            print("Invalid annotation format")
            return
    # Convert normalized coordinates to pixel coordinates
    img_height, img_width = img.shape[:2]
    x1 = int(x_min )
    
    print('x1 : ' , x1)
    
    y1 = int(y_min )
    
    print('y1 : ', y1)
        
    x2 = int((x_min + width))
    y2 = int((y_min + height))
    
    
    # Crop image based on bounding box coordinates
    cropped_img = img[y1:y2, x1:x2]
    # Save cropped image to output folder
    filename = os.path.basename(image_path)
    output_path = os.path.join(output_folder,'output_'+filename)
    cv2.imwrite(output_path, cropped_img)
    
    print('-->' ,filename , output_path)
    
    
# Define input and output folders
input_folder = "inp"
output_folder = "out"



# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)



# Iterate over files in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith(".jpg") or filename.endswith(".png"):  # Check if the file is an image
        # Construct paths for image and annotation file
        image_path = os.path.join(input_folder, filename)
        
        print('image_path : ', image_path)
        # annotation_path = os.path.join(input_folder, os.path.splitext(filename)[0] + ".txt")
        
        annotation_path = os.path.join(input_folder, "annotation.txt")
        
        # Check if the annotation file exists
        if not os.path.exists(annotation_path):
            print(f"Annotation file not found for image {filename}. Skipping...")
            continue
        # Crop image based on annotation
        crop_image_with_annotation(image_path, annotation_path, output_folder)

我在 out 文件夹里得到了文件 output_input.png :

这里插入图片描述

所以我猜问题可能出在 annotation.txt 和这里的坐标之间的关系:

# Convert normalized coordinates to pixel coordinates
    img_height, img_width = img.shape[:2]
    x1 = int(x_min )
    
    print('x1 : ' , x1)
    
    y1 = int(y_min )
    
    print('y1 : ', y1)
        
    x2 = int((x_min + width))
    y2 = int((y_min + height))

或者可能是,annotation.txt :

1 0.264 0.365 0.2 0.284

带有坐标转换:

 # Convert normalized coordinates to pixel coordinates
    img_height, img_width = img.shape[:2]
    
    print('image : img_height, img_width : ', img_height, img_width)
    
    x1 = int(x_min*img_width )
    
    print('x1 : ' , x1)
    
    y1 = int(y_min*img_height)
    
    print('y1 : ', y1)
        
    x2 = int((x1 + width*img_width))
    
    print('x2 : ' , x2)
    
    y2 = int(y1 + height*img_height)
    
    print('y2 : ', y2)

这应该和你的:

 x1 = int(x_min * img_width)
    
    y1 = int(y_min * img_height)
    
    x2 = int((x_min + width) * img_width)
    
    y2 = int((y_min + height) * img_height)

输出:

这里插入图片描述

但没有输入文件,我们很难判断

1

你没有说明这些标注是从哪里来的,或者这些数字具体是什么意思。

它们看起来像是中心点和大小,而不是“左上角和右下角”。

你需要从中心点减去一半的宽度和高度来得到一个角的位置,然后再加上另一半的宽度和高度来得到另一个角的位置。

撰写回答