如何提高pytesseract在PCB原理图上OCR的结果?
我正在尝试使用OpenCV和Python-tesseract进行OCR(光学字符识别),把下面这张图片转换成文字:
。
import cv2
import pytesseract
import argparse
import numpy as np
if __name__ == "__main__":
# Argument parsing
parser = argparse.ArgumentParser(description="Process images for OCR")
parser.add_argument("input_file", help="Input image file path")
args = parser.parse_args()
# Read the input image
image = cv2.imread(args.input_file)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
orig_image = image
# Normalization
norm_img = np.zeros((image.shape[0], image.shape[1]))
image = cv2.normalize(image, norm_img, 0, 255, cv2.NORM_MINMAX)
# Remove noise
image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 15)
#image = cv2.GaussianBlur(image, (1, 1), 0)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) [1]
image = cv2.bitwise_not(image)
result = pytesseract.image_to_data(image, config=r'--psm 6 --oem 3 -l eng tessedit_char_blacklist=,;:', output_type=pytesseract.Output.DICT)
text_results = result['text']
bounding_boxes = list(zip(result['left'], result['top'], result['width'], result['height']))
unique_results = list(set(zip(text_results, bounding_boxes)))
char_list = ['-', '}', ',', '—', 'nnn', '#', ':', '=', '——', '*', '!', '°', '——=', ':', ';', '+', '-', '©', ',', ',', ',']
ocr_results = []
ocr_results = [element for element in unique_results if element[0].strip() and element[0] not in char_list]
print(len(ocr_results))
for ocr_result in ocr_results:
x, y, w, h = ocr_result[1]
cv2.rectangle(orig_image, (x, y), (x + w, y + h), (255, 0, 255), 2)
# Draw the text on the image
cv2.putText(orig_image, ocr_result[0], (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.namedWindow("image", cv2.WINDOW_NORMAL)
cv2.imshow("image", orig_image)
cv2.waitKey(0)
进行OCR处理后的效果如下。我发现结果还有很大的提升空间。
附上的图片是从谷歌上找的样本图,仅仅是为了说明问题。我使用的原始图片分辨率是300 dpi,质量很好。我的主要问题是如何优化pytesseract的OCR,以便从嵌入式微控制器电路板的示意图中识别出文字数据。
1 个回答
0
你可以试试paddleocr这个工具,我用它测试了一下电路板的图片。
from paddleocr import PaddleOCR,draw_ocr
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(use_angle_cls=False, det=False, lang='en', use_gpu=False) # need to run only once to download and loa
img_path = 'PCB.png'
result = ocr.ocr(img_path, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)
# draw result
from PIL import Image
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./arial.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')