如何从图像中删除背景线和形状以进行文本提取？

import cv2 import pytesseract import numpy as np from imutils.perspective import four_point_transform pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" # Load image, convert to HSV, color threshold to get mask image = cv2.imread('1.png') hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) lower = np.array([0, 0, 0]) upper = np.array([100, 175, 110]) mask = cv2.inRange(hsv, lower, upper) # Morph close to connect individual text into a single contour kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5)) close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3) # Find rotated bounding box then perspective transform cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] rect = cv2.minAreaRect(cnts[0]) box = cv2.boxPoints(rect) box = np.int0(box) cv2.drawContours(image,[box],0,(36,255,12),2) warped = four_point_transform(255 - mask, box.reshape(4, 2)) # OCR data = pytesseract.image_to_string(warped, lang='eng', config='--psm 6') print(data) cv2.imshow('mask', mask) cv2.imshow('close', close) cv2.imshow('warped', warped) cv2.imshow('image', image) cv2.waitKey()

1条回答

网友

1楼 · 发布于 2024-05-13 20:24:10

由于您的图像中有“完美”的矩形，因此我提出了以下方法：

对输入图像进行灰度和反向二值化，以消除可能的伪影，并在黑色背景上显示白色框和文本
在下面的代码中，template matching将用于查找感兴趣的框的左上角。所以，建立一个模板和面具模仿这些左上角
模板本身类似于长度为50像素、高度为20像素的“角”，因为所有感兴趣的框至少具有以下尺寸：
相应的遮罩将模板限制为沿“角”5像素宽的“条纹”：
由于所有文本与方框边框之间的边距至少为5像素，因此会有“完美”的匹配结果，因为没有文本干扰匹配
根据“完美”匹配结果，导出并迭代每个感兴趣框的(x, y)坐标
框中填充了一些灰色（由于开始时的二值化，图像中只有黑色和白色）
然后用灰色遮罩：
由此确定边界矩形，并将该部分从原始图像复制粘贴到某个干净图像。此外，对内容执行pytesseract

以下是完整的代码：

import cv2
import numpy as np
import pytesseract

# Read image as grayscale
img = cv2.imread('M7X8C.png', cv2.IMREAD_GRAYSCALE)

# Inverse binarize image to get rid of possible artifacts, and to have
# white boxes and text on black background
thr = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)[1]

# Set up a template and mask mimicking the upper left corner of the
# boxes of interest
templ = np.full((20, 50), 255, dtype=np.uint8)
templ[1:, 1:] = 0
mask = np.full_like(templ, 255)
mask[5:, 5:] = 0

# Template matching
res = cv2.matchTemplate(thr, templ, cv2.TM_CCORR_NORMED, mask=mask)

# Extract upper left corners of the boxes of interest
boxes_tl = np.argwhere(res == 1)

# Initialize new clean image
clean = np.full_like(img, 255)

# For each upper left corner...
for i in np.arange(boxes_tl.shape[0]):

    # Get coordinates of upper left corner
    y, x = boxes_tl[i, :]
    print('x: {}, y: {}'.format(x, y))

    # Flood fill inner part of box, and mask that area
    box_mask = cv2.floodFill(thr.copy(), None, (x + 1, y + 1), 128)[1] == 128

    # Extract the bounding rectangle of that area
    x, y, w, h = cv2.boundingRect(box_mask.astype(np.uint8))

    # Copy box content to clean image
    clean[y:y+h, x:x+w] = img[y:y+h, x:x+w]

    # Run pytesseract on box content
    text = pytesseract.image_to_string(thr[y:y+h, x:x+w], config=' psm 6')
    print(text.replace('\f', ''))

# Output
cv2.imshow('clean', clean)
cv2.waitKey(0)

这就是清晰的图像：

这是前两个{}结果：

x: 1, y: 0
PGGEOS KKCI 100600

x: 199, y: 39
ISOL
EMBD
CB
400
XXX

如您所见，结果并不完美（S而不是5），很可能是由于单空格字体。为这种字体获取（或生成）一些Tesseracttraineddata肯定会有助于克服这个问题

                    
System information
                    
Platform:      Windows-10-10.0.19041-SP0
Python:        3.9.1
PyCharm:       2021.1.1
NumPy:         1.19.5
OpenCV:        4.5.2
pytesseract:   5.0.0-alpha.20201127

相关问题更多 >

编程相关推荐

热门问题

热门文章