Skip to content

Commit 05d10ee

Browse files
committed
feat: sup for rapidOCR 2.0
1 parent f8ed5f5 commit 05d10ee

12 files changed

+221
-181
lines changed

.github/workflows/lineless_table_rec.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
run: |
3030
pip install -r requirements.txt
3131
pip install pytest
32-
32+
pip install rapidocr
3333
pytest tests/test_lineless_table_rec.py
3434
3535
GenerateWHL_PushPyPi:
@@ -50,7 +50,7 @@ jobs:
5050
pip install -r requirements.txt
5151
python -m pip install --upgrade pip
5252
pip install wheel get_pypi_latest_version
53-
53+
pip install rapidocr
5454
python setup_lineless.py bdist_wheel "${{ github.ref_name }}"
5555
5656
# - name: Publish distribution 📦 to Test PyPI

.github/workflows/wired_table_rec.yml

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ jobs:
2828
run: |
2929
pip install -r requirements.txt
3030
pip install pytest beautifulsoup4
31+
pip install rapidocr
3132
pytest tests/test_wired_table_rec.py
3233
3334
GenerateWHL_PushPyPi:
@@ -48,6 +49,7 @@ jobs:
4849
pip install -r requirements.txt
4950
python -m pip install --upgrade pip
5051
pip install wheel get_pypi_latest_version
52+
pip install rapidocr
5153
python setup_wired.py bdist_wheel "${{ github.ref_name }}"
5254
5355
- name: Publish distribution 📦 to PyPI

README.md

+65-48
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@
1919
- 补充文档扭曲矫正/去模糊/去阴影/二值化方案,可作为前置处理 [RapidUnDistort](https://github.com/Joker1212/RapidUnWrap)
2020
- **2025.1.9**
2121
- RapidTable支持了 unitable 模型,精度更高支持torch推理,补充测评数据
22-
- **2025.3.9**
22+
- **2025.3.30**
2323
- 输入输出格式对齐RapidTable
2424
- 支持模型自动下载
2525
- 增加来自paddle的新表格分类模型
2626
- 增加最新PaddleX表格识别模型测评值
27+
- 支持 rapidocr 2.0 取消重复ocr检测
2728

2829
### 简介
2930
💖该仓库是用来对文档中表格做结构化识别的推理库,包括来自阿里读光有线和无线表格识别模型,llaipython(微信)贡献的有线表格模型,网易Qanything内置表格分类模型等。\
@@ -79,71 +80,89 @@ wired_table_rec_v2 对1500px内大小的图片效果最好,所以分辨率超
7980
SLANet-plus/unitable (综合精度最高): 文档场景表格(论文,杂志,期刊中的表格)
8081

8182
### 安装
82-
83+
rapidocr2.0以上版本支持torch,onnx,paddle,openvino等多引擎切换,详情参考[rapidocr文档](https://rapidai.github.io/RapidOCRDocs/main/install_usage/rapidocr/usage/)
8384
``` python {linenos=table}
8485
pip install wired_table_rec lineless_table_rec table_cls
86+
pip install rapidocr
8587
```
8688

8789
### 快速使用
8890
> ⚠️注意:在`wired_table_rec/table_cls`>=1.2.0` `lineless_table_rec` > 0.1.0 后,采用同RapidTable完全一致格式的输入输出
8991
``` python {linenos=table}
9092
from pathlib import Path
9193

92-
from wired_table_rec.utils.utils import VisTable
94+
from demo_wired import viser
9395
from table_cls import TableCls
9496
from wired_table_rec.main import WiredTableInput, WiredTableRecognition
9597
from lineless_table_rec.main import LinelessTableInput, LinelessTableRecognition
96-
from rapidocr_onnxruntime import RapidOCR, VisRes
97-
98-
# 初始化引擎
99-
wired_input = WiredTableInput()
100-
lineless_input = LinelessTableInput()
101-
wired_engine = WiredTableRecognition(wired_input)
102-
lineless_engine = LinelessTableRecognition(lineless_input)
103-
# 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型或paddle模型(0.03s)
104-
table_cls = TableCls()
105-
img_path = f'tests/test_files/table.jpg'
106-
107-
cls,elasp = table_cls(img_path)
108-
if cls == 'wired':
109-
table_engine = wired_engine
110-
else:
111-
table_engine = lineless_engine
112-
113-
table_results = table_engine(img_path, enhance_box_line=False)
114-
# 使用RapidOCR输入
115-
# ocr_engine = RapidOCR()
116-
# ocr_result, _ = ocr_engine(img_path)
117-
# table_results = table_engine(img_path, ocr_result=ocr_result)
118-
119-
# 可视化并存储结果,包含识别框+行列坐标
120-
# save_dir = Path("outputs")
121-
# save_dir.mkdir(parents=True, exist_ok=True)
122-
#
123-
# save_html_path = f"outputs/{Path(img_path).stem}.html"
124-
# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
125-
# save_logic_path = (
126-
# f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}"
127-
# )
128-
#
129-
# vis_table = VisTable()
130-
# vis_imged = vis_table(
131-
# img_path, table_results, save_html_path, save_drawed_path, save_logic_path
132-
# )
98+
from rapidocr import RapidOCR
99+
100+
101+
if __name__ == "__main__":
102+
# Init
103+
wired_input = WiredTableInput()
104+
lineless_input = LinelessTableInput()
105+
wired_engine = WiredTableRecognition(wired_input)
106+
lineless_engine = LinelessTableRecognition(lineless_input)
107+
# 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型或paddle模型(0.03s)
108+
table_cls = TableCls()
109+
img_path = f"tests/test_files/table.jpg"
110+
111+
cls, elasp = table_cls(img_path)
112+
if cls == "wired":
113+
table_engine = wired_engine
114+
else:
115+
table_engine = lineless_engine
116+
117+
# 使用RapidOCR输入
118+
ocr_engine = RapidOCR()
119+
rapid_ocr_output = ocr_engine(img_path, return_word_box=True)
120+
ocr_result = list(zip(rapid_ocr_output.boxes, rapid_ocr_output.txts, rapid_ocr_output.scores))
121+
table_results = table_engine(
122+
img_path, ocr_result=ocr_result, enhance_box_line=False
123+
)
124+
125+
126+
# 使用单字识别
127+
# word_results = rapid_ocr_output.word_results
128+
# ocr_result = [
129+
# [word_result[2], word_result[0], word_result[1]] for word_result in word_results
130+
# ]
131+
# table_results = table_engine(
132+
# img_path, ocr_result=ocr_result, enhance_box_line=False
133+
# )
134+
135+
# Save
136+
# save_dir = Path("outputs")
137+
# save_dir.mkdir(parents=True, exist_ok=True)
138+
#
139+
# save_html_path = f"outputs/{Path(img_path).stem}.html"
140+
# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
141+
# save_logic_path = (
142+
# f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}"
143+
# )
144+
145+
# Visualize table rec result
146+
# vis_imged = viser(
147+
# img_path, table_results, save_html_path, save_drawed_path, save_logic_path
148+
# )
149+
150+
133151

134152
```
135153

136154
#### 单字ocr匹配
137155

138156
```python
139157
# 将单字box转换为行识别同样的结构)
140-
from rapidocr_onnxruntime import RapidOCR
141-
from wired_table_rec.utils.utils_table_recover import trans_char_ocr_res
142-
158+
from rapidocr import RapidOCR
143159
img_path = "tests/test_files/wired/table4.jpg"
144160
ocr_engine = RapidOCR()
145-
ocr_res, _ = ocr_engine(img_path, return_word_box=True)
146-
ocr_res = trans_char_ocr_res(ocr_res)
161+
rapid_ocr_output = ocr_engine(img_path, return_word_box=True)
162+
word_results = rapid_ocr_output.word_results
163+
ocr_result = [
164+
[word_result[2], word_result[0], word_result[1]] for word_result in word_results
165+
]
147166
```
148167

149168
#### 表格旋转及透视修正
@@ -230,14 +249,12 @@ table_results = wired_table_rec(
230249
row_threshold=10, # 识别框上边界y坐标差值小于row_threshold的默认同行
231250
rotated_fix=True, # wiredV2支持,轻度旋转(-45°~45°)矫正,默认为True
232251
need_ocr=True, # 是否进行OCR识别, 默认为True
233-
rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True
234252
)
235253
lineless_table_rec = LinelessTableRecognition(LinelessTableInput())
236254
table_results = lineless_table_rec(
237255
img, # 图片 Union[str, np.ndarray, bytes, Path, PIL.Image.Image]
238256
ocr_result, # 输入rapidOCR识别结果,不传默认使用内部rapidocr模型
239257
need_ocr=True, # 是否进行OCR识别, 默认为True
240-
rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True
241258
)
242259
```
243260

@@ -268,7 +285,7 @@ table_results = lineless_table_rec(
268285
```mermaid
269286
flowchart TD
270287
A[/表格图片/] --> B([表格分类 table_cls])
271-
B --> C([有线表格识别 wired_table_rec]) & D([无线表格识别 lineless_table_rec]) --> E([文字识别 rapidocr_onnxruntime])
288+
B --> C([有线表格识别 wired_table_rec]) & D([无线表格识别 lineless_table_rec]) --> E([文字识别 rapidocr])
272289
E --> F[/html结构化输出/]
273290
```
274291

README_en.md

+63-45
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
- Add document preprocessing solutions for distortion correction, deblurring, shadow removal, and binarization. [RapidUnDistort](https://github.com/Joker1212/RapidUnWrap)
1818
- **2025.1.9**
1919
- RapidTable now supports the Unitable model, Evaluation data has been added.
20-
- **2025.3.9**
20+
- **2025.3.30**
2121
- Align input and output formats with RapidTable
2222
- support automatic model downloading
2323
- introduce a new table classification model from [PaddleOCR](https://github.com/PaddlePaddle/PaddleX/blob/release/3.0-rc/docs/module_usage/tutorials/ocr_modules/table_classification.en.md).
24+
- sup rapidocr2
2425
### Introduction
2526
💖 This repository serves as an inference library for structured recognition of tables within documents, including models for wired and wireless table recognition from Alibaba DulaLight, a wired table model from llaipython (WeChat), and a built-in table classification model from NetEase Qanything.
2627

@@ -81,6 +82,7 @@ paddlex-SLANet-plus (highest overall precision): Document scene tables (tables i
8182

8283
```python
8384
pip install wired_table_rec lineless_table_rec table_cls
85+
pip install rapidocr
8486
```
8587

8688
### Quick start
@@ -89,59 +91,75 @@ pip install wired_table_rec lineless_table_rec table_cls
8991
``` python {linenos=table}
9092
from pathlib import Path
9193

92-
from wired_table_rec.utils.utils import VisTable
94+
from demo_wired import viser
9395
from table_cls import TableCls
9496
from wired_table_rec.main import WiredTableInput, WiredTableRecognition
9597
from lineless_table_rec.main import LinelessTableInput, LinelessTableRecognition
96-
from rapidocr_onnxruntime import RapidOCR, VisRes
97-
98-
# init engine
99-
wired_input = WiredTableInput()
100-
lineless_input = LinelessTableInput()
101-
wired_engine = WiredTableRecognition(wired_input)
102-
lineless_engine = LinelessTableRecognition(lineless_input)
103-
#The default model is a small YOLO model (0.1s inference time), which can be switched to higher-precision YOLOX (0.25s), faster QAnything (0.07s), or PaddlePaddle models (0.03s).
104-
table_cls = TableCls()
105-
img_path = f'tests/test_files/table.jpg'
106-
107-
cls,elasp = table_cls(img_path)
108-
if cls == 'wired':
109-
table_engine = wired_engine
110-
else:
111-
table_engine = lineless_engine
112-
113-
table_results = table_engine(img_path, enhance_box_line=False)
114-
# use rapidOCR for as input
115-
# ocr_engine = RapidOCR()
116-
# ocr_result, _ = ocr_engine(img_path)
117-
# table_results = table_engine(img_path, ocr_result=ocr_result)
118-
119-
# Visualize and store the results, including detection bounding boxes and row/column coordinates.
120-
# save_dir = Path("outputs")
121-
# save_dir.mkdir(parents=True, exist_ok=True)
122-
#
123-
# save_html_path = f"outputs/{Path(img_path).stem}.html"
124-
# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
125-
# save_logic_path = (
126-
# f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}"
127-
# )
128-
#
129-
# vis_table = VisTable()
130-
# vis_imged = vis_table(
131-
# img_path, table_results, save_html_path, save_drawed_path, save_logic_path
132-
# )
98+
from rapidocr import RapidOCR
99+
100+
101+
if __name__ == "__main__":
102+
# Init
103+
wired_input = WiredTableInput()
104+
lineless_input = LinelessTableInput()
105+
wired_engine = WiredTableRecognition(wired_input)
106+
lineless_engine = LinelessTableRecognition(lineless_input)
107+
# yolo(0.1s),yolox(0.25s),qanything(0.07s) paddle(0.03s)
108+
table_cls = TableCls()
109+
img_path = f"tests/test_files/table.jpg"
110+
111+
cls, elasp = table_cls(img_path)
112+
if cls == "wired":
113+
table_engine = wired_engine
114+
else:
115+
table_engine = lineless_engine
116+
117+
# use rapid ocr as input
118+
ocr_engine = RapidOCR()
119+
rapid_ocr_output = ocr_engine(img_path, return_word_box=True)
120+
ocr_result = list(zip(rapid_ocr_output.boxes, rapid_ocr_output.txts, rapid_ocr_output.scores))
121+
table_results = table_engine(
122+
img_path, ocr_result=ocr_result, enhance_box_line=False
123+
)
124+
125+
126+
# use word rec ocr
127+
# word_results = rapid_ocr_output.word_results
128+
# ocr_result = [
129+
# [word_result[2], word_result[0], word_result[1]] for word_result in word_results
130+
# ]
131+
# table_results = table_engine(
132+
# img_path, ocr_result=ocr_result, enhance_box_line=False
133+
# )
134+
135+
# Save
136+
# save_dir = Path("outputs")
137+
# save_dir.mkdir(parents=True, exist_ok=True)
138+
#
139+
# save_html_path = f"outputs/{Path(img_path).stem}.html"
140+
# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
141+
# save_logic_path = (
142+
# f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}"
143+
# )
144+
145+
# Visualize table rec result
146+
# vis_imged = viser(
147+
# img_path, table_results, save_html_path, save_drawed_path, save_logic_path
148+
# )
149+
133150
```
134151
#### Single Character OCR Matching
135152

136153
```python
137154
# Convert single character boxes to the same structure as line recognition
138-
from rapidocr_onnxruntime import RapidOCR
139-
from wired_table_rec.utils.utils_table_recover import trans_char_ocr_res
140-
155+
from rapidocr import RapidOCR
141156
img_path = "tests/test_files/wired/table4.jpg"
142157
ocr_engine = RapidOCR()
143-
ocr_res, _ = ocr_engine(img_path, return_word_box=True)
144-
ocr_res = trans_char_ocr_res(ocr_res)
158+
rapid_ocr_output = ocr_engine(img_path, return_word_box=True)
159+
word_results = rapid_ocr_output.word_results
160+
ocr_result = [
161+
[word_result[2], word_result[0], word_result[1]] for word_result in word_results
162+
]
145163
```
146164

147165
#### Table Rotation and Perspective Correction
@@ -251,7 +269,7 @@ html, elasp, polygons, logic_points, ocr_res = lineless_table_rec(
251269
```mermaid
252270
flowchart TD
253271
A[/table image/] --> B([table cls table_cls])
254-
B --> C([wired_table_rec]) & D([lineless_table_rec]) --> E([rapidocr_onnxruntime])
272+
B --> C([wired_table_rec]) & D([lineless_table_rec]) --> E([rapidocr])
255273
E --> F[/html output/]
256274
```
257275

0 commit comments

Comments
 (0)