从图像pyteseract读取数字

import cv2 import pytesseract def read_img(): pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe' return cv2.imread('Images/Image2.png') def process_text(img): names = [] data = pytesseract.image_to_data(img) for x, d in enumerate(data.splitlines()): if x != 0: d = d.split() if len(d) == 12: names.append(d[11]) return names img = read_img() print(process_text(img))

['-', '©', '-', 'AceeZ.Rogue', 'a', '5540', 't', '3', '8', '&', '©', 'LeonGids.Rogue', 'a', 'seas', '8', '3', '8', 'e', 'ﬂ', 'karzheka.Rogue', 'a', '5151', '8', '2', '7', '48', '7', 'Q', 'ripz.Rogue', 'a', '5105', '8', '[', '5s', '27', 'm', 'korey.Rogue', 'a', '5105', '7', '2', '6', '36', '-', '[ZH]', 'Shaiiko.BDS', 'C', '3520', 'a', 'B', 's', '22', 'Cps', 'a', '2012', '8', 'i', '8', '21', 'ypc', 'Chee', 'e', '8', '-_', '22', '3', '(2)', 'Flemzje.BDS', 'a', '2420', 'a', '3', '10', '26', '(SF)', 'Renshiro.BDS', 'C', '2410', '6', '1', '8', 'Fo']

1条回答

网友

1楼 · 发布于 2024-05-14 18:01:31

通常，Tesseract喜欢白色背景上的黑色文本。所以你应该反转你的输入图像。你也应该考虑对图像进行阈值化以使其变为黑白。最后，Tesseract可以对每个字符的大小敏感。我发现在提供的比例下，用户名可以识别，但是我必须将图像缩放1.25才能得到数字

import cv2
import pytesseract

img = cv2.imread('acerogue.png', cv2.IMREAD_GRAYSCALE)  

thresh = cv2.threshold(img, 100, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1]
thresh = cv2.resize(thresh, (0,0), fx=1.25, fy=1.25)  # scale image 1.25X

detected_text = pytesseract.image_to_string(thresh, config = ' psm 6')
print(detected_text)

给

| ® AceeZ.Rogue 8 5540 11 2 8 -
© LeonGids.Rogue 8 5343 8 3 8 -

Ww karzheka.Rogue a 5151 8 2 7 48

7 tipz.Rogue a 5105 8 0 5 27
& korey.Rogue a 5105 7 2 6 36

| #4 Shaiiko.BDS B 3520 9 3 8 22
BriD.BDS mH 2912 8 1 8 21

S RaFaLe.BDS BH  —_2605 a 2 8 2

3 BS Elemzje.Bos H 2420 3 3 10 26
Se) Renshiro.BDS m 2410 6 1 8 45

也许，您应该预先裁剪图像以去除图标

相关问题更多 >

编程相关推荐

热门问题

热门文章