动态边界框检测:基于注释文件的Python图像裁剪脚本无法准确捕捉定义的边界框
我有一些标注过的图片,想根据一个文本文件里的边界框信息来加载和裁剪这些图片。不过,尽管我写了代码,还是在识别图片边界时遇到了问题,结果不太准确。
如果有人能帮我解决这个问题,我会非常感激。
我的代码是:
import os
import cv2
def crop_image_with_annotation(image_path, annotation_path, output_folder):
# Read image
img = cv2.imread(image_path)
print(f"Image loaded: {image_path}")
if img is None:
print(f"Error: Could not read image {image_path}")
return
# Read bounding box coordinates from annotation file
with open(annotation_path, 'r') as f:
line = f.readline().strip()
print("Annotation line:", line)
# Split the line into values
values = line.split()
# Check if the line contains the class label
if len(values) == 5:
class_label, x_center, y_center, width, height = map(float, values)
print("Found class label:", class_label)
elif len(values) == 4:
x_center, y_center, width, height = map(float, values)
else:
print("Invalid annotation format")
return
# Convert normalized coordinates to pixel coordinates
img_height, img_width = img.shape[:2]
print(f"Image height: {img_height}, Image width: {img_width}")
x_center = int(x_center * img_width)
y_center = int(y_center * img_height)
half_width = int(width * img_width / 2)
half_height = int(height * img_height / 2)
# Calculate bounding box coordinates
x1 = max(0, x_center - half_width)
y1 = max(0, y_center - half_height)
x2 = min(img_width, x_center + half_width)
y2 = min(img_height, y_center + half_height)
# Print calculated crop coordinates
print(f"Calculated crop coordinates: (x1, y1), (x2, y2) = ({x1}, {y1}), ({x2}, {y2})")
# Crop image based on bounding box coordinates
cropped_img = img[y1:y2, x1:x2]
# Save cropped image to output folder
filename = os.path.basename(image_path)
output_path = os.path.join(output_folder, filename)
cv2.imwrite(output_path, cropped_img)
print(f"Cropped image saved: {output_path}")
# Define input and output folders
input_folder = r"C:\Users\Desktop\crop"
output_folder = r"C:\Users\Desktop\crop\new"
# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)
# Iterate over files in the input folder
for filename in os.listdir(input_folder):
if filename.endswith(".jpg") or filename.endswith(".jpeg"): # Check if the file is an image
# Construct paths for image and annotation file
image_path = os.path.join(input_folder, filename)
annotation_path = os.path.join(input_folder, os.path.splitext(filename)[0] + ".txt")
# Check if the annotation file exists
if not os.path.exists(annotation_path):
print(f"Annotation file not found for image {filename}. Skipping...")
continue
# Crop image based on annotation
crop_image_with_annotation(image_path, annotation_path, output_folder)
我的输入文件在桌面上的一个名为“crop”的文件夹里,格式是jpg
:
输出的裁剪图片是:
我想读取整个车牌:
我的标注文件是.txt格式,里面有这些值:
0 0.5247395833333334 0.687037037037037 0.06822916666666666 0.04814814814814815
其他的图片是:为此我的.txt文件是
3 0.20859375 0.7708333333333334 0.37447916666666664 0.44722222222222224
2 0.55 0.711574074074074 0.20520833333333333 0.5601851851851852
2 0.7627604166666667 0.7435185185185185 0.17239583333333333 0.5055555555555555
1 0.5692708333333333 0.9523148148148148 0.034375 0.05092592592592592
现在我的代码输出是:
我还需要检测到两辆摩托车和它们的头盔。
我期望的输出是:
2 个回答
0
好的,花了一些时间,input.png
在 inp
文件夹里,还有 annotation.txt
:
input.png
:
annotation.txt
:
1 238 230 180 178
用的代码:
import os
import cv2
def crop_image_with_annotation(image_path, annotation_path, output_folder):
# Read image
img = cv2.imread(image_path)
# Read bounding box coordinates from annotation file
with open(annotation_path, 'r') as f:
line = f.readline().strip()
print("Annotation line:", line) # Debugging line
print("Image saved:",)
# Split the line into values
values = line.split()
# Check if the line contains the class label
if len(values) == 5:
class_label, x_min, y_min, width, height = map(float, values)
print('class_label, x_min, y_min, width, height :' , class_label, x_min, y_min, width, height)
elif len(values) == 4:
x_min, y_min, width, height = map(float, values)
else:
print("Invalid annotation format")
return
# Convert normalized coordinates to pixel coordinates
img_height, img_width = img.shape[:2]
x1 = int(x_min )
print('x1 : ' , x1)
y1 = int(y_min )
print('y1 : ', y1)
x2 = int((x_min + width))
y2 = int((y_min + height))
# Crop image based on bounding box coordinates
cropped_img = img[y1:y2, x1:x2]
# Save cropped image to output folder
filename = os.path.basename(image_path)
output_path = os.path.join(output_folder,'output_'+filename)
cv2.imwrite(output_path, cropped_img)
print('-->' ,filename , output_path)
# Define input and output folders
input_folder = "inp"
output_folder = "out"
# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)
# Iterate over files in the input folder
for filename in os.listdir(input_folder):
if filename.endswith(".jpg") or filename.endswith(".png"): # Check if the file is an image
# Construct paths for image and annotation file
image_path = os.path.join(input_folder, filename)
print('image_path : ', image_path)
# annotation_path = os.path.join(input_folder, os.path.splitext(filename)[0] + ".txt")
annotation_path = os.path.join(input_folder, "annotation.txt")
# Check if the annotation file exists
if not os.path.exists(annotation_path):
print(f"Annotation file not found for image {filename}. Skipping...")
continue
# Crop image based on annotation
crop_image_with_annotation(image_path, annotation_path, output_folder)
我在 out
文件夹里得到了文件 output_input.png
:
所以我猜问题可能出在 annotation.txt
和这里的坐标之间的关系:
# Convert normalized coordinates to pixel coordinates
img_height, img_width = img.shape[:2]
x1 = int(x_min )
print('x1 : ' , x1)
y1 = int(y_min )
print('y1 : ', y1)
x2 = int((x_min + width))
y2 = int((y_min + height))
或者可能是,annotation.txt
:
1 0.264 0.365 0.2 0.284
带有坐标转换:
# Convert normalized coordinates to pixel coordinates
img_height, img_width = img.shape[:2]
print('image : img_height, img_width : ', img_height, img_width)
x1 = int(x_min*img_width )
print('x1 : ' , x1)
y1 = int(y_min*img_height)
print('y1 : ', y1)
x2 = int((x1 + width*img_width))
print('x2 : ' , x2)
y2 = int(y1 + height*img_height)
print('y2 : ', y2)
这应该和你的:
x1 = int(x_min * img_width)
y1 = int(y_min * img_height)
x2 = int((x_min + width) * img_width)
y2 = int((y_min + height) * img_height)
输出:
但没有输入文件,我们很难判断
1
你没有说明这些标注是从哪里来的,或者这些数字具体是什么意思。
它们看起来像是中心点和大小,而不是“左上角和右下角”。
你需要从中心点减去一半的宽度和高度来得到一个角的位置,然后再加上另一半的宽度和高度来得到另一个角的位置。