|
19 | 19 | - 补充文档扭曲矫正/去模糊/去阴影/二值化方案,可作为前置处理 [RapidUnDistort](https://github.com/Joker1212/RapidUnWrap)
|
20 | 20 | - **2025.1.9**
|
21 | 21 | - RapidTable支持了 unitable 模型,精度更高支持torch推理,补充测评数据
|
22 |
| -- **2025.3.9** |
| 22 | +- **2025.3.30** |
23 | 23 | - 输入输出格式对齐RapidTable
|
24 | 24 | - 支持模型自动下载
|
25 | 25 | - 增加来自paddle的新表格分类模型
|
26 | 26 | - 增加最新PaddleX表格识别模型测评值
|
| 27 | + - 支持 rapidocr 2.0 取消重复ocr检测 |
27 | 28 |
|
28 | 29 | ### 简介
|
29 | 30 | 💖该仓库是用来对文档中表格做结构化识别的推理库,包括来自阿里读光有线和无线表格识别模型,llaipython(微信)贡献的有线表格模型,网易Qanything内置表格分类模型等。\
|
@@ -79,71 +80,89 @@ wired_table_rec_v2 对1500px内大小的图片效果最好,所以分辨率超
|
79 | 80 | SLANet-plus/unitable (综合精度最高): 文档场景表格(论文,杂志,期刊中的表格)
|
80 | 81 |
|
81 | 82 | ### 安装
|
82 |
| - |
| 83 | +rapidocr2.0以上版本支持torch,onnx,paddle,openvino等多引擎切换,详情参考[rapidocr文档](https://rapidai.github.io/RapidOCRDocs/main/install_usage/rapidocr/usage/) |
83 | 84 | ``` python {linenos=table}
|
84 | 85 | pip install wired_table_rec lineless_table_rec table_cls
|
| 86 | +pip install rapidocr |
85 | 87 | ```
|
86 | 88 |
|
87 | 89 | ### 快速使用
|
88 | 90 | > ⚠️注意:在`wired_table_rec/table_cls`>=1.2.0` `lineless_table_rec` > 0.1.0 后,采用同RapidTable完全一致格式的输入输出
|
89 | 91 | ``` python {linenos=table}
|
90 | 92 | from pathlib import Path
|
91 | 93 |
|
92 |
| -from wired_table_rec.utils.utils import VisTable |
| 94 | +from demo_wired import viser |
93 | 95 | from table_cls import TableCls
|
94 | 96 | from wired_table_rec.main import WiredTableInput, WiredTableRecognition
|
95 | 97 | from lineless_table_rec.main import LinelessTableInput, LinelessTableRecognition
|
96 |
| -from rapidocr_onnxruntime import RapidOCR, VisRes |
97 |
| - |
98 |
| -# 初始化引擎 |
99 |
| -wired_input = WiredTableInput() |
100 |
| -lineless_input = LinelessTableInput() |
101 |
| -wired_engine = WiredTableRecognition(wired_input) |
102 |
| -lineless_engine = LinelessTableRecognition(lineless_input) |
103 |
| -# 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型或paddle模型(0.03s) |
104 |
| -table_cls = TableCls() |
105 |
| -img_path = f'tests/test_files/table.jpg' |
106 |
| - |
107 |
| -cls,elasp = table_cls(img_path) |
108 |
| -if cls == 'wired': |
109 |
| - table_engine = wired_engine |
110 |
| -else: |
111 |
| - table_engine = lineless_engine |
112 |
| - |
113 |
| -table_results = table_engine(img_path, enhance_box_line=False) |
114 |
| -# 使用RapidOCR输入 |
115 |
| -# ocr_engine = RapidOCR() |
116 |
| -# ocr_result, _ = ocr_engine(img_path) |
117 |
| -# table_results = table_engine(img_path, ocr_result=ocr_result) |
118 |
| - |
119 |
| -# 可视化并存储结果,包含识别框+行列坐标 |
120 |
| -# save_dir = Path("outputs") |
121 |
| -# save_dir.mkdir(parents=True, exist_ok=True) |
122 |
| -# |
123 |
| -# save_html_path = f"outputs/{Path(img_path).stem}.html" |
124 |
| -# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}" |
125 |
| -# save_logic_path = ( |
126 |
| -# f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}" |
127 |
| -# ) |
128 |
| -# |
129 |
| -# vis_table = VisTable() |
130 |
| -# vis_imged = vis_table( |
131 |
| -# img_path, table_results, save_html_path, save_drawed_path, save_logic_path |
132 |
| -# ) |
| 98 | +from rapidocr import RapidOCR |
| 99 | + |
| 100 | + |
| 101 | +if __name__ == "__main__": |
| 102 | + # Init |
| 103 | + wired_input = WiredTableInput() |
| 104 | + lineless_input = LinelessTableInput() |
| 105 | + wired_engine = WiredTableRecognition(wired_input) |
| 106 | + lineless_engine = LinelessTableRecognition(lineless_input) |
| 107 | + # 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型或paddle模型(0.03s) |
| 108 | + table_cls = TableCls() |
| 109 | + img_path = f"tests/test_files/table.jpg" |
| 110 | + |
| 111 | + cls, elasp = table_cls(img_path) |
| 112 | + if cls == "wired": |
| 113 | + table_engine = wired_engine |
| 114 | + else: |
| 115 | + table_engine = lineless_engine |
| 116 | + |
| 117 | + # 使用RapidOCR输入 |
| 118 | + ocr_engine = RapidOCR() |
| 119 | + rapid_ocr_output = ocr_engine(img_path, return_word_box=True) |
| 120 | + ocr_result = list(zip(rapid_ocr_output.boxes, rapid_ocr_output.txts, rapid_ocr_output.scores)) |
| 121 | + table_results = table_engine( |
| 122 | + img_path, ocr_result=ocr_result, enhance_box_line=False |
| 123 | + ) |
| 124 | + |
| 125 | + |
| 126 | + # 使用单字识别 |
| 127 | + # word_results = rapid_ocr_output.word_results |
| 128 | + # ocr_result = [ |
| 129 | + # [word_result[2], word_result[0], word_result[1]] for word_result in word_results |
| 130 | + # ] |
| 131 | + # table_results = table_engine( |
| 132 | + # img_path, ocr_result=ocr_result, enhance_box_line=False |
| 133 | + # ) |
| 134 | + |
| 135 | + # Save |
| 136 | + # save_dir = Path("outputs") |
| 137 | + # save_dir.mkdir(parents=True, exist_ok=True) |
| 138 | + # |
| 139 | + # save_html_path = f"outputs/{Path(img_path).stem}.html" |
| 140 | + # save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}" |
| 141 | + # save_logic_path = ( |
| 142 | + # f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}" |
| 143 | + # ) |
| 144 | + |
| 145 | + # Visualize table rec result |
| 146 | + # vis_imged = viser( |
| 147 | + # img_path, table_results, save_html_path, save_drawed_path, save_logic_path |
| 148 | + # ) |
| 149 | + |
| 150 | + |
133 | 151 |
|
134 | 152 | ```
|
135 | 153 |
|
136 | 154 | #### 单字ocr匹配
|
137 | 155 |
|
138 | 156 | ```python
|
139 | 157 | # 将单字box转换为行识别同样的结构)
|
140 |
| -from rapidocr_onnxruntime import RapidOCR |
141 |
| -from wired_table_rec.utils.utils_table_recover import trans_char_ocr_res |
142 |
| - |
| 158 | +from rapidocr import RapidOCR |
143 | 159 | img_path = "tests/test_files/wired/table4.jpg"
|
144 | 160 | ocr_engine = RapidOCR()
|
145 |
| -ocr_res, _ = ocr_engine(img_path, return_word_box=True) |
146 |
| -ocr_res = trans_char_ocr_res(ocr_res) |
| 161 | +rapid_ocr_output = ocr_engine(img_path, return_word_box=True) |
| 162 | +word_results = rapid_ocr_output.word_results |
| 163 | +ocr_result = [ |
| 164 | + [word_result[2], word_result[0], word_result[1]] for word_result in word_results |
| 165 | +] |
147 | 166 | ```
|
148 | 167 |
|
149 | 168 | #### 表格旋转及透视修正
|
@@ -230,14 +249,12 @@ table_results = wired_table_rec(
|
230 | 249 | row_threshold=10, # 识别框上边界y坐标差值小于row_threshold的默认同行
|
231 | 250 | rotated_fix=True, # wiredV2支持,轻度旋转(-45°~45°)矫正,默认为True
|
232 | 251 | need_ocr=True, # 是否进行OCR识别, 默认为True
|
233 |
| - rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True |
234 | 252 | )
|
235 | 253 | lineless_table_rec = LinelessTableRecognition(LinelessTableInput())
|
236 | 254 | table_results = lineless_table_rec(
|
237 | 255 | img, # 图片 Union[str, np.ndarray, bytes, Path, PIL.Image.Image]
|
238 | 256 | ocr_result, # 输入rapidOCR识别结果,不传默认使用内部rapidocr模型
|
239 | 257 | need_ocr=True, # 是否进行OCR识别, 默认为True
|
240 |
| - rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True |
241 | 258 | )
|
242 | 259 | ```
|
243 | 260 |
|
@@ -268,7 +285,7 @@ table_results = lineless_table_rec(
|
268 | 285 | ```mermaid
|
269 | 286 | flowchart TD
|
270 | 287 | A[/表格图片/] --> B([表格分类 table_cls])
|
271 |
| - B --> C([有线表格识别 wired_table_rec]) & D([无线表格识别 lineless_table_rec]) --> E([文字识别 rapidocr_onnxruntime]) |
| 288 | + B --> C([有线表格识别 wired_table_rec]) & D([无线表格识别 lineless_table_rec]) --> E([文字识别 rapidocr]) |
272 | 289 | E --> F[/html结构化输出/]
|
273 | 290 | ```
|
274 | 291 |
|
|
0 commit comments