我正在使用python3.7和tesseract 4.00,并尝试使用tesseract进行表检测。在
{1块中的所有元素都是当前讨论中的未知元素
我提供了tesseract中存在的块类型的参考,供您参考
类表检测器:
__TRAINED_DATA_PATH = #Tessdata path
def detect_table(self, image, tx_id, do_pre_process=True):
try:
pre_processed_image = image
if do_pre_process:
pre_processed_image = cvtColor(image, COLOR_BGR2GRAY)
pre_processed_image = medianBlur(pre_processed_image, 3)
pre_processed_image = GaussianBlur(pre_processed_image, (3, 3), 0)
conf_score = 0
with PyTessBaseAPI(psm=6, oem=1, lang="eng",
path=self.__TRAINED_DATA_PATH) as api:
pil_image = Image.fromarray(pre_processed_image)
api.SetImage(pil_image)
api.SetVariable("textord_tabfind_find_tables", "true")
api.SetVariable("textord_tablefind_recognize_tables", "true")
api.SetVariable("textord_show_tables", "true")
api.SetVariable("textord_tablefind_show_stats", "true")
x=api.AnalyseLayout()
# level = RIL.BLOCK
for e in iterate_level(x, RIL.BLOCK):
print(e.Orientation())
print(e.BlockType())
except Exception as e:
Logger.log.error("Error in image_to_data : %s" % e, exc_info=True)
return result_dec
目前没有回答
相关问题 更多 >
编程相关推荐