基于cv2python的机械显示呆板文本过滤与检测

2024-05-16 12:07:16 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个小问题,得到这种类型的图像轮廓进行过滤,使里面的7段文字更可读,但我已经设法得到一些相当好看的阴影过滤器和精明的过滤,但不能过去,因为投资回报率是相当沉闷和空的例子如下:

Normal_Image

Shadow_removal

Canny_50_100

CNT_IMG

这是我在这个论坛上的一些人的帮助下一直在写的代码,还有一点我仍然被认为是一个完全的初学者

import cv2
import numpy as np

from PIL import Image
import pytesseract
import subprocess
#import numpy as np

import os #for now used to delete files and empty foders

imgAddr = 'ADisplay2.jpg'
#imgAddr = 'example1.jpg'

# Read image and search for contours.
img = cv2.imread(imgAddr)

cv2.imshow('Test_Img', img)

rgb_planes = cv2.split(img)

result_planes = []
result_norm_planes = []
for plane in rgb_planes:
    dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
    bg_img = cv2.medianBlur(dilated_img, 21)
    diff_img = 255 - cv2.absdiff(plane, bg_img)
  #  norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
    result_planes.append(diff_img)
 #   result_norm_planes.append(norm_img)

unShadowImg = cv2.merge(result_planes)
#unShadowImgNorm = cv2.merge(result_norm_planes)

gray = cv2.cvtColor(unShadowImg, cv2.COLOR_BGR2GRAY)
cv2.imshow('Image_gray', gray)


cannyImg = cv2.Canny(unShadowImg, 50, 110)
cv2.imshow('Current_canny', cannyImg)

_, threshold = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)
cv2.imshow('Image_tresh', threshold)

_, contours, hierarchy = cv2.findContours(cannyImg,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)

cntImg = cv2.drawContours(img, contours, -1, (0,255,0), 3)
cv2.imshow('Image_tresh_after_fndCnt', cntImg)
cv2.imwrite('cnt_image.png', cntImg)


# Create first mask used for rotation.
mask = np.ones(img.shape, np.uint8)*255

# Draw contours on the mask with size and ratio of borders for threshold.
for cnt in contours:
    size = cv2.contourArea(cnt)
    x,y,w,h = cv2.boundingRect(cnt)
    if 10000 > size > 500 and w*2.5 > h:
        cv2.drawContours(mask, [cnt], -1, (0,0,0), -1)

# Connect neighbour contours and select the biggest one (text).
kernel = np.ones((50,50),np.uint8)
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
gray_op = cv2.cvtColor(opening, cv2.COLOR_BGR2GRAY)
_, threshold_op = cv2.threshold(gray_op, 150, 255, cv2.THRESH_BINARY_INV)
_, contours_op, hierarchy_op = cv2.findContours(threshold_op, cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours_op, key=cv2.contourArea)

# Create rotated rectangle to get the angle of rotation and the 4 points of the rectangle.
_, _, angle = rect = cv2.minAreaRect(cnt)
(h,w) = img.shape[:2]
(center) = (w//2,h//2)

# Rotate the image.
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (int(w),int(h)), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)

# Create bounding box for rotated text (use old points of rotated rectangle).
box = cv2.boxPoints(rect)
a, b, c, d = box = np.int0(box)
bound =[]
bound.append(a)
bound.append(b)
bound.append(c)
bound.append(d)
bound = np.array(bound)
(x1, y1) = (bound[:,0].min(), bound[:,1].min())
(x2, y2) = (bound[:,0].max(), bound[:,1].max())
cv2.drawContours(img,[box],0,(0,0,255),2)

# Crop the image and create new mask for the final image.
rotated = rotated[y1:y2, x1:x2]
mask_final = np.ones(rotated.shape, np.uint8)*255

# Remove noise from the final image.
gray_r = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
_, threshold_r = cv2.threshold(gray_r, 150, 255, cv2.THRESH_BINARY_INV)
_, contours, hierarchy = cv2.findContours(threshold_r,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
for cnt in contours:
    size = cv2.contourArea(cnt)
    if size < 500:
        cv2.drawContours(threshold_r, [cnt], -1, (0,0,0), -1)

# Invert black and white.
final_image = cv2.bitwise_not(threshold_r)

final_image = cv2.copyMakeBorder(
    final_image,
                 20,
                 20,
                 20,
                 20,
                 cv2.BORDER_CONSTANT,
                 value=255
              )


# Display results.
cv2.imshow('final', final_image)
cv2.imwrite('FinalImg.jpg', final_image)
cv2.imshow('rotated', rotated)



im = Image.open('FinalImg.jpg')
txtStr = pytesseract.image_to_string(Image.open('FinalImg.jpg'))

print("OCR output:")
print(txtStr)

cv2.waitKey(0)
cv2.destroyAllWindows()

Tags: andtheimageimgforthresholdnpcv2