如何提高pytesseract在PCB原理图上OCR的结果?

0 投票
1 回答
36 浏览
提问于 2025-04-14 16:23

我正在尝试使用OpenCV和Python-tesseract进行OCR(光学字符识别),把下面这张图片转换成文字:

处理前的图片

import cv2
import pytesseract
import argparse
import numpy as np

if __name__ == "__main__":
    # Argument parsing
    parser = argparse.ArgumentParser(description="Process images for OCR")
    parser.add_argument("input_file", help="Input image file path")
    args = parser.parse_args()
    
    # Read the input image
    image = cv2.imread(args.input_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    orig_image = image

    # Normalization
    norm_img = np.zeros((image.shape[0], image.shape[1]))
    image = cv2.normalize(image, norm_img, 0, 255, cv2.NORM_MINMAX)

    # Remove noise
    image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 15)

    #image = cv2.GaussianBlur(image, (1, 1), 0)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) [1]

    image = cv2.bitwise_not(image)

    result = pytesseract.image_to_data(image, config=r'--psm 6 --oem 3 -l eng tessedit_char_blacklist=,;:', output_type=pytesseract.Output.DICT)

    text_results = result['text']
    bounding_boxes = list(zip(result['left'], result['top'], result['width'], result['height']))
               
    unique_results = list(set(zip(text_results, bounding_boxes)))
    char_list = ['-', '}', ',', '—', 'nnn', '#', ':', '=', '——', '*', '!', '°', '——=', ':', ';', '+', '-', '©', ',', ',', ',']
    ocr_results = []
    ocr_results = [element for element in unique_results if element[0].strip() and element[0] not in char_list]

    print(len(ocr_results))

    for ocr_result in ocr_results:
        x, y, w, h = ocr_result[1]
        cv2.rectangle(orig_image, (x, y), (x + w, y + h), (255, 0, 255), 2)
        # Draw the text on the image
        cv2.putText(orig_image, ocr_result[0], (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    cv2.namedWindow("image", cv2.WINDOW_NORMAL)
    cv2.imshow("image", orig_image)
    cv2.waitKey(0)

进行OCR处理后的效果如下。我发现结果还有很大的提升空间。

附上的图片是从谷歌上找的样本图,仅仅是为了说明问题。我使用的原始图片分辨率是300 dpi,质量很好。我的主要问题是如何优化pytesseract的OCR,以便从嵌入式微控制器电路板的示意图中识别出文字数据。

处理后的图片

1 个回答

0

你可以试试paddleocr这个工具,我用它测试了一下电路板的图片。

from paddleocr import PaddleOCR,draw_ocr
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(use_angle_cls=False, det=False, lang='en', use_gpu=False) # need to run only once to download and loa
img_path = 'PCB.png'
result = ocr.ocr(img_path, cls=False)
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)


# draw result
from PIL import Image
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./arial.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')

在这里输入图片描述

撰写回答