如何使用方框中的计数识别所有数字

2024-06-09 14:10:38 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在学习openCV来识别来自成绩单图像的所有信息。在No.列(最左边),我使用countours获取(x,y,x_max,y_max)并裁剪灰度图像下的每个框,但无法识别少数框中的所有数字。我只认得这个数字中的一位。这是我的密码:

import cv2

img = cv2.imread("original_img.jpg",0)
blur = cv2.GaussianBlur(img,(5,5),5)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV,11,2)

horizal = thresh
vertical = thresh

scale_height = 30 
scale_long = 20

long = int(img.shape[1]/scale_long)
height = int(img.shape[0]/scale_height)

horizalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (long, 1))
horizal = cv2.erode(horizal, horizalStructure, (-1, -1))
horizal = cv2.dilate(horizal, horizalStructure, (-1, -1))

verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, height))
vertical = cv2.erode(vertical, verticalStructure, (-1, -1))
vertical = cv2.dilate(vertical, verticalStructure, (-1, -1))

mask = vertical + horizal
contours, hierarchy = cv2.findContours(mask,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)

max = -1
for cnt in contours:
    x, y, w, h = cv2.boundingRect(cnt)
    if cv2.contourArea(cnt) > max:
        x_max, y_max, w_max, h_max = x, y, w, h
        max = cv2.contourArea(cnt)

table = img[y_max:y_max+h_max, x_max:x_max+w_max]
cropped_thresh_img = []
cropped_origin_img = []
countours_img = []

NUM_ROWS = 33
START_ROW = 1
for i in range(START_ROW, NUM_ROWS):
    thresh1 = thresh[y_max + round(i*h_max/NUM_ROWS):y_max + round((i+1)*h_max/NUM_ROWS), x_max + round(0*w_max/16):x_max +round(1*w_max/16)]
    contours_thresh1, hierarchy_thresh1 = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    origin1 = img[y_max + round(i*h_max/NUM_ROWS):y_max + round((i+1)*h_max/NUM_ROWS), x_max + round(0*w_max/16):x_max +round(1*w_max/16)]

    cropped_thresh_img.append(thresh1)
    cropped_origin_img.append(origin1)
    countours_img.append(contours_thresh1)
cv2.imshow("thresh", cropped_thresh_img[20])
result = []
for i, countour_img in enumerate(countours_img):
    for cnt in countour_img:
        if cv2.contourArea(cnt) > 20:
             x,y,w,h = cv2.boundingRect(cnt)
             if x > cropped_origin_img[i].shape[1]*0.1 and x < cropped_origin_img[i].shape[1]*0.9:
                answer = cropped_origin_img[i][y:y+h, x:x+w]
                answer = cv2.threshold(answer, 80, 255, cv2.THRESH_BINARY_INV)[1]
                break
    result.append(answer)
cv2.imshow("No.21", result[20])

轮廓后的图像:

Img after countours

原始图像:

original_img


Tags: 图像imgorigincv2nummaxrowsvertical
1条回答
网友
1楼 · 发布于 2024-06-09 14:10:38

你为什么不把你想得到数据的那部分图像拿出来

import cv2
import pytesseract

img = cv2.imread("gKGc9.png")
img = img[120:620, 25:45]  # (632, 445, 3)

Result

部分结果是:

enter image description here

您已经应用了阈值并找到了轮廓

import cv2
import pytesseract

img = cv2.imread("gKGc9.png")
img = img[120:620, 25:45]  # (632, 445, 3)
cv2.imwrite("/Users/ahx/Desktop/res.png", img)
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.threshold(gry, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cnt = cv2.findContours(thr, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]

现在,对于每个检测到的轮廓,需要增加比例。(即1.5像素)

for c in cnt:
    x, y, w, h = cv2.boundingRect(c)
    r = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
    crp = img[y:y + h, x:x + w]
    wth = int(crp.shape[1] * 150 / 100)
    hgt = int(crp.shape[0] * 150 / 100)
    crp = cv2.resize(crp, (wth, hgt), interpolation=cv2.INTER_AREA)

现在,您可以使用pytesseract来读取数据

txt = pytesseract.image_to_string(crp, config=" psm 6 digits")

如果文本数据不为空,则显示

    if txt != '\f':
        print(txt)
        cv2.imshow("crp", crp)
        cv2.waitKey(0)

部分结果:

{a3}{a4}{a5}{a6}{a7}

在控制台中,您应该看到:

31

26

25.

24

23

代码:

import cv2
import pytesseract

img = cv2.imread("gKGc9.png")
img = img[120:620, 25:45]  # (632, 445, 3)
cv2.imwrite("/Users/ahx/Desktop/res.png", img)
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.threshold(gry, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cnt = cv2.findContours(thr, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
for c in cnt:
    x, y, w, h = cv2.boundingRect(c)
    r = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
    crp = img[y:y + h, x:x + w]
    wth = int(crp.shape[1] * 150 / 100)
    hgt = int(crp.shape[0] * 150 / 100)
    crp = cv2.resize(crp, (wth, hgt), interpolation=cv2.INTER_AREA)
    txt = pytesseract.image_to_string(crp, config=" psm 6 digits")
    if txt != '\f':
        print(txt)
        cv2.imshow("crp", crp)
        cv2.waitKey(0)

我通过pip安装了pytesseract,我的pytesseract版本是4.1

相关问题 更多 >