|
15 | 15 | </div>
|
16 | 16 |
|
17 | 17 | ### 最近更新
|
18 |
| -- **2024.11.22** |
19 |
| - - 支持单字符匹配方案,需要RapidOCR>=1.4.0 |
20 | 18 | - **2024.12.25**
|
21 | 19 | - 补充文档扭曲矫正/去模糊/去阴影/二值化方案,可作为前置处理 [RapidUnDistort](https://github.com/Joker1212/RapidUnWrap)
|
22 | 20 | - **2025.1.9**
|
23 |
| - - RapidTable支持了 unitable 模型,精度更高支持torch推理,补充测评数据 |
| 21 | + - RapidTable支持了 unitable 模型,精度更高支持torch推理,补充测评数据 |
| 22 | +- **2025.3.9** |
| 23 | + - 输入输出格式对齐RapidTable |
| 24 | + - 支持模型自动下载 |
| 25 | + - 增加来自paddle的新表格分类模型 |
24 | 26 |
|
25 | 27 | ### 简介
|
26 | 28 | 💖该仓库是用来对文档中表格做结构化识别的推理库,包括来自阿里读光有线和无线表格识别模型,llaipython(微信)贡献的有线表格模型,网易Qanything内置表格分类模型等。\
|
@@ -81,55 +83,63 @@ pip install wired_table_rec lineless_table_rec table_cls
|
81 | 83 | ```
|
82 | 84 |
|
83 | 85 | ### 快速使用
|
84 |
| - |
| 86 | +> ⚠️注意:在`wired_table_rec/table_cls`>=1.2.0` `lineless_table_rec` > 0.1.0 后,采用同RapidTable完全一致格式的输入输出 |
85 | 87 | ``` python {linenos=table}
|
86 |
| -import os |
| 88 | +from pathlib import Path |
87 | 89 |
|
88 |
| -from lineless_table_rec import LinelessTableRecognition |
89 |
| -from lineless_table_rec.utils_table_recover import format_html, plot_rec_box_with_logic_info, plot_rec_box |
| 90 | +from wired_table_rec.utils.utils import VisTable |
90 | 91 | from table_cls import TableCls
|
91 |
| -from wired_table_rec import WiredTableRecognition |
92 |
| -from rapidocr_onnxruntime import RapidOCR |
93 |
| - |
94 |
| -lineless_engine = LinelessTableRecognition() |
95 |
| -wired_engine = WiredTableRecognition() |
96 |
| -# 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型 |
97 |
| -table_cls = TableCls() # TableCls(model_type="yolox"),TableCls(model_type="q") |
98 |
| -img_path = f'images/img14.jpg' |
| 92 | +from wired_table_rec.main import WiredTableInput, WiredTableRecognition |
| 93 | +from lineless_table_rec.main import LinelessTableInput, LinelessTableRecognition |
| 94 | +from rapidocr_onnxruntime import RapidOCR, VisRes |
| 95 | + |
| 96 | +# 初始化引擎 |
| 97 | +wired_input = WiredTableInput() |
| 98 | +lineless_input = LinelessTableInput() |
| 99 | +wired_engine = WiredTableRecognition(wired_input) |
| 100 | +lineless_engine = LinelessTableRecognition(lineless_input) |
| 101 | +# 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型或paddle模型(0.03s) |
| 102 | +table_cls = TableCls() |
| 103 | +img_path = f'tests/test_files/table.jpg' |
99 | 104 |
|
100 | 105 | cls,elasp = table_cls(img_path)
|
101 | 106 | if cls == 'wired':
|
102 | 107 | table_engine = wired_engine
|
103 | 108 | else:
|
104 | 109 | table_engine = lineless_engine
|
105 |
| - |
106 |
| -html, elasp, polygons, logic_points, ocr_res = table_engine(img_path) |
107 |
| -print(f"elasp: {elasp}") |
108 |
| - |
109 |
| -# 使用其他ocr模型 |
110 |
| -#ocr_engine =RapidOCR(det_model_path="xxx/det_server_infer.onnx",rec_model_path="xxx/rec_server_infer.onnx") |
111 |
| -#ocr_res, _ = ocr_engine(img_path) |
112 |
| -#html, elasp, polygons, logic_points, ocr_res = table_engine(img_path, ocr_result=ocr_res) |
113 |
| -# output_dir = f'outputs' |
114 |
| -# complete_html = format_html(html) |
115 |
| -# os.makedirs(os.path.dirname(f"{output_dir}/table.html"), exist_ok=True) |
116 |
| -# with open(f"{output_dir}/table.html", "w", encoding="utf-8") as file: |
117 |
| -# file.write(complete_html) |
118 |
| -# # 可视化表格识别框 + 逻辑行列信息 |
119 |
| -# plot_rec_box_with_logic_info( |
120 |
| -# img_path, f"{output_dir}/table_rec_box.jpg", logic_points, polygons |
| 110 | + |
| 111 | +table_results = table_engine(img_path, enhance_box_line=False) |
| 112 | +# 使用RapidOCR输入 |
| 113 | +# ocr_engine = RapidOCR() |
| 114 | +# ocr_result, _ = ocr_engine(img_path) |
| 115 | +# table_results = table_engine(img_path, ocr_result=ocr_result) |
| 116 | + |
| 117 | +# 可视化并存储结果,包含识别框+行列坐标 |
| 118 | +# save_dir = Path("outputs") |
| 119 | +# save_dir.mkdir(parents=True, exist_ok=True) |
| 120 | +# |
| 121 | +# save_html_path = f"outputs/{Path(img_path).stem}.html" |
| 122 | +# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}" |
| 123 | +# save_logic_path = ( |
| 124 | +# f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}" |
121 | 125 | # )
|
122 |
| -# # 可视化 ocr 识别框 |
123 |
| -# plot_rec_box(img_path, f"{output_dir}/ocr_box.jpg", ocr_res) |
| 126 | +# |
| 127 | +# vis_table = VisTable() |
| 128 | +# vis_imged = vis_table( |
| 129 | +# img_path, table_results, save_html_path, save_drawed_path, save_logic_path |
| 130 | +# ) |
| 131 | + |
124 | 132 | ```
|
125 | 133 |
|
126 | 134 | #### 单字ocr匹配
|
| 135 | + |
127 | 136 | ```python
|
128 | 137 | # 将单字box转换为行识别同样的结构)
|
129 | 138 | from rapidocr_onnxruntime import RapidOCR
|
130 |
| -from wired_table_rec.utils_table_recover import trans_char_ocr_res |
| 139 | +from wired_table_rec.utils.utils_table_recover import trans_char_ocr_res |
| 140 | + |
131 | 141 | img_path = "tests/test_files/wired/table4.jpg"
|
132 |
| -ocr_engine =RapidOCR() |
| 142 | +ocr_engine = RapidOCR() |
133 | 143 | ocr_res, _ = ocr_engine(img_path, return_word_box=True)
|
134 | 144 | ocr_res = trans_char_ocr_res(ocr_res)
|
135 | 145 | ```
|
@@ -177,20 +187,51 @@ for i, res in enumerate(result):
|
177 | 187 |
|
178 | 188 | ### 核心参数
|
179 | 189 | ```python
|
180 |
| -wired_table_rec = WiredTableRecognition() |
181 |
| -html, elasp, polygons, logic_points, ocr_res = wired_table_rec( |
| 190 | +# 输入(WiredTableInput/LinelessTableInput) |
| 191 | +@dataclass |
| 192 | +class WiredTableInput: |
| 193 | + model_type: Optional[str] = "unet" #unet/cycle_center_net |
| 194 | + model_path: Union[str, Path, None, Dict[str, str]] = None |
| 195 | + use_cuda: bool = False |
| 196 | + device: str = "cpu" |
| 197 | + |
| 198 | +@dataclass |
| 199 | +class LinelessTableInput: |
| 200 | + model_type: Optional[str] = "lore" #lore |
| 201 | + model_path: Union[str, Path, None, Dict[str, str]] = None |
| 202 | + use_cuda: bool = False |
| 203 | + device: str = "cpu" |
| 204 | + |
| 205 | +# 输出(WiredTableOutput/LinelessTableOutput) |
| 206 | +@dataclass |
| 207 | +class WiredTableOutput: |
| 208 | + pred_html: Optional[str] = None |
| 209 | + cell_bboxes: Optional[np.ndarray] = None |
| 210 | + logic_points: Optional[np.ndarray] = None |
| 211 | + elapse: Optional[float] = None |
| 212 | + |
| 213 | +@dataclass |
| 214 | +class LinelessTableOutput: |
| 215 | + pred_html: Optional[str] = None |
| 216 | + cell_bboxes: Optional[np.ndarray] = None |
| 217 | + logic_points: Optional[np.ndarray] = None |
| 218 | + elapse: Optional[float] = None |
| 219 | +``` |
| 220 | + |
| 221 | +```python |
| 222 | +wired_table_rec = WiredTableRecognition(WiredTableInput()) |
| 223 | +table_results = wired_table_rec( |
182 | 224 | img, # 图片 Union[str, np.ndarray, bytes, Path, PIL.Image.Image]
|
183 | 225 | ocr_result, # 输入rapidOCR识别结果,不传默认使用内部rapidocr模型
|
184 |
| - version="v2", #默认使用v2线框模型,切换阿里读光模型可改为v1 |
185 | 226 | enhance_box_line=True, # 识别框切割增强(关闭避免多余切割,开启减少漏切割),默认为True
|
186 | 227 | col_threshold=15, # 识别框左边界x坐标差值小于col_threshold的默认同列
|
187 | 228 | row_threshold=10, # 识别框上边界y坐标差值小于row_threshold的默认同行
|
188 | 229 | rotated_fix=True, # wiredV2支持,轻度旋转(-45°~45°)矫正,默认为True
|
189 | 230 | need_ocr=True, # 是否进行OCR识别, 默认为True
|
190 | 231 | rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True
|
191 | 232 | )
|
192 |
| -lineless_table_rec = LinelessTableRecognition() |
193 |
| -html, elasp, polygons, logic_points, ocr_res = lineless_table_rec( |
| 233 | +lineless_table_rec = LinelessTableRecognition(LinelessTableInput()) |
| 234 | +table_results = lineless_table_rec( |
194 | 235 | img, # 图片 Union[str, np.ndarray, bytes, Path, PIL.Image.Image]
|
195 | 236 | ocr_result, # 输入rapidOCR识别结果,不传默认使用内部rapidocr模型
|
196 | 237 | need_ocr=True, # 是否进行OCR识别, 默认为True
|
|
0 commit comments