Skip to content

Commit 49ac513

Browse files
committed
chore: add readme & change workflow
1 parent b844584 commit 49ac513

8 files changed

+158
-109
lines changed

.github/workflows/lineless_table_rec.yml

-8
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,6 @@ jobs:
3030
pip install -r requirements.txt
3131
pip install pytest
3232
33-
wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip
34-
unzip lineless_table_rec_models.zip
35-
mv lineless_table_rec_models/*.onnx lineless_table_rec/models/
36-
3733
pytest tests/test_lineless_table_rec.py
3834
3935
GenerateWHL_PushPyPi:
@@ -55,10 +51,6 @@ jobs:
5551
python -m pip install --upgrade pip
5652
pip install wheel get_pypi_latest_version
5753
58-
wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip
59-
unzip lineless_table_rec_models.zip
60-
mv lineless_table_rec_models/*.onnx lineless_table_rec/models/
61-
6254
python setup_lineless.py bdist_wheel "${{ github.ref_name }}"
6355
6456
# - name: Publish distribution 📦 to Test PyPI

.github/workflows/table_cls.yml

-8
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ jobs:
2929
pip install -r requirements.txt
3030
pip install pytest beautifulsoup4
3131
32-
wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/table_cls_models.zip
33-
unzip table_cls_models.zip
34-
mv table_cls_models/*.onnx table_cls/models/
35-
3632
pytest tests/test_table_cls.py
3733
3834
GenerateWHL_PushPyPi:
@@ -54,10 +50,6 @@ jobs:
5450
python -m pip install --upgrade pip
5551
pip install wheel get_pypi_latest_version
5652
57-
wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/table_cls_models.zip
58-
unzip table_cls_models.zip
59-
mv table_cls_models/*.onnx table_cls/models/
60-
6153
python setup_table_cls.py bdist_wheel "${{ github.ref_name }}"
6254
6355
- name: Publish distribution 📦 to PyPI

.github/workflows/wired_table_rec.yml

-10
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ jobs:
2828
run: |
2929
pip install -r requirements.txt
3030
pip install pytest beautifulsoup4
31-
32-
wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/wired_table_rec_models.zip
33-
unzip wired_table_rec_models.zip
34-
mv wired_table_rec_models/*.onnx wired_table_rec/models/
35-
3631
pytest tests/test_wired_table_rec.py
3732
3833
GenerateWHL_PushPyPi:
@@ -53,11 +48,6 @@ jobs:
5348
pip install -r requirements.txt
5449
python -m pip install --upgrade pip
5550
pip install wheel get_pypi_latest_version
56-
57-
wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/wired_table_rec_models.zip
58-
unzip wired_table_rec_models.zip
59-
mv wired_table_rec_models/*.onnx wired_table_rec/models/
60-
6151
python setup_wired.py bdist_wheel "${{ github.ref_name }}"
6252
6353
- name: Publish distribution 📦 to PyPI

README.md

+81-40
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@
1515
</div>
1616

1717
### 最近更新
18-
- **2024.11.22**
19-
- 支持单字符匹配方案,需要RapidOCR>=1.4.0
2018
- **2024.12.25**
2119
- 补充文档扭曲矫正/去模糊/去阴影/二值化方案,可作为前置处理 [RapidUnDistort](https://github.com/Joker1212/RapidUnWrap)
2220
- **2025.1.9**
23-
- RapidTable支持了 unitable 模型,精度更高支持torch推理,补充测评数据
21+
- RapidTable支持了 unitable 模型,精度更高支持torch推理,补充测评数据
22+
- **2025.3.9**
23+
- 输入输出格式对齐RapidTable
24+
- 支持模型自动下载
25+
- 增加来自paddle的新表格分类模型
2426

2527
### 简介
2628
💖该仓库是用来对文档中表格做结构化识别的推理库,包括来自阿里读光有线和无线表格识别模型,llaipython(微信)贡献的有线表格模型,网易Qanything内置表格分类模型等。\
@@ -81,55 +83,63 @@ pip install wired_table_rec lineless_table_rec table_cls
8183
```
8284

8385
### 快速使用
84-
86+
> ⚠️注意:在`wired_table_rec/table_cls`>=1.2.0` `lineless_table_rec` > 0.1.0 后,采用同RapidTable完全一致格式的输入输出
8587
``` python {linenos=table}
86-
import os
88+
from pathlib import Path
8789

88-
from lineless_table_rec import LinelessTableRecognition
89-
from lineless_table_rec.utils_table_recover import format_html, plot_rec_box_with_logic_info, plot_rec_box
90+
from wired_table_rec.utils.utils import VisTable
9091
from table_cls import TableCls
91-
from wired_table_rec import WiredTableRecognition
92-
from rapidocr_onnxruntime import RapidOCR
93-
94-
lineless_engine = LinelessTableRecognition()
95-
wired_engine = WiredTableRecognition()
96-
# 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型
97-
table_cls = TableCls() # TableCls(model_type="yolox"),TableCls(model_type="q")
98-
img_path = f'images/img14.jpg'
92+
from wired_table_rec.main import WiredTableInput, WiredTableRecognition
93+
from lineless_table_rec.main import LinelessTableInput, LinelessTableRecognition
94+
from rapidocr_onnxruntime import RapidOCR, VisRes
95+
96+
# 初始化引擎
97+
wired_input = WiredTableInput()
98+
lineless_input = LinelessTableInput()
99+
wired_engine = WiredTableRecognition(wired_input)
100+
lineless_engine = LinelessTableRecognition(lineless_input)
101+
# 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型或paddle模型(0.03s)
102+
table_cls = TableCls()
103+
img_path = f'tests/test_files/table.jpg'
99104

100105
cls,elasp = table_cls(img_path)
101106
if cls == 'wired':
102107
table_engine = wired_engine
103108
else:
104109
table_engine = lineless_engine
105-
106-
html, elasp, polygons, logic_points, ocr_res = table_engine(img_path)
107-
print(f"elasp: {elasp}")
108-
109-
# 使用其他ocr模型
110-
#ocr_engine =RapidOCR(det_model_path="xxx/det_server_infer.onnx",rec_model_path="xxx/rec_server_infer.onnx")
111-
#ocr_res, _ = ocr_engine(img_path)
112-
#html, elasp, polygons, logic_points, ocr_res = table_engine(img_path, ocr_result=ocr_res)
113-
# output_dir = f'outputs'
114-
# complete_html = format_html(html)
115-
# os.makedirs(os.path.dirname(f"{output_dir}/table.html"), exist_ok=True)
116-
# with open(f"{output_dir}/table.html", "w", encoding="utf-8") as file:
117-
# file.write(complete_html)
118-
# # 可视化表格识别框 + 逻辑行列信息
119-
# plot_rec_box_with_logic_info(
120-
# img_path, f"{output_dir}/table_rec_box.jpg", logic_points, polygons
110+
111+
table_results = table_engine(img_path, enhance_box_line=False)
112+
# 使用RapidOCR输入
113+
# ocr_engine = RapidOCR()
114+
# ocr_result, _ = ocr_engine(img_path)
115+
# table_results = table_engine(img_path, ocr_result=ocr_result)
116+
117+
# 可视化并存储结果,包含识别框+行列坐标
118+
# save_dir = Path("outputs")
119+
# save_dir.mkdir(parents=True, exist_ok=True)
120+
#
121+
# save_html_path = f"outputs/{Path(img_path).stem}.html"
122+
# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
123+
# save_logic_path = (
124+
# f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}"
121125
# )
122-
# # 可视化 ocr 识别框
123-
# plot_rec_box(img_path, f"{output_dir}/ocr_box.jpg", ocr_res)
126+
#
127+
# vis_table = VisTable()
128+
# vis_imged = vis_table(
129+
# img_path, table_results, save_html_path, save_drawed_path, save_logic_path
130+
# )
131+
124132
```
125133

126134
#### 单字ocr匹配
135+
127136
```python
128137
# 将单字box转换为行识别同样的结构)
129138
from rapidocr_onnxruntime import RapidOCR
130-
from wired_table_rec.utils_table_recover import trans_char_ocr_res
139+
from wired_table_rec.utils.utils_table_recover import trans_char_ocr_res
140+
131141
img_path = "tests/test_files/wired/table4.jpg"
132-
ocr_engine =RapidOCR()
142+
ocr_engine = RapidOCR()
133143
ocr_res, _ = ocr_engine(img_path, return_word_box=True)
134144
ocr_res = trans_char_ocr_res(ocr_res)
135145
```
@@ -177,20 +187,51 @@ for i, res in enumerate(result):
177187

178188
### 核心参数
179189
```python
180-
wired_table_rec = WiredTableRecognition()
181-
html, elasp, polygons, logic_points, ocr_res = wired_table_rec(
190+
# 输入(WiredTableInput/LinelessTableInput)
191+
@dataclass
192+
class WiredTableInput:
193+
model_type: Optional[str] = "unet" #unet/cycle_center_net
194+
model_path: Union[str, Path, None, Dict[str, str]] = None
195+
use_cuda: bool = False
196+
device: str = "cpu"
197+
198+
@dataclass
199+
class LinelessTableInput:
200+
model_type: Optional[str] = "lore" #lore
201+
model_path: Union[str, Path, None, Dict[str, str]] = None
202+
use_cuda: bool = False
203+
device: str = "cpu"
204+
205+
# 输出(WiredTableOutput/LinelessTableOutput)
206+
@dataclass
207+
class WiredTableOutput:
208+
pred_html: Optional[str] = None
209+
cell_bboxes: Optional[np.ndarray] = None
210+
logic_points: Optional[np.ndarray] = None
211+
elapse: Optional[float] = None
212+
213+
@dataclass
214+
class LinelessTableOutput:
215+
pred_html: Optional[str] = None
216+
cell_bboxes: Optional[np.ndarray] = None
217+
logic_points: Optional[np.ndarray] = None
218+
elapse: Optional[float] = None
219+
```
220+
221+
```python
222+
wired_table_rec = WiredTableRecognition(WiredTableInput())
223+
table_results = wired_table_rec(
182224
img, # 图片 Union[str, np.ndarray, bytes, Path, PIL.Image.Image]
183225
ocr_result, # 输入rapidOCR识别结果,不传默认使用内部rapidocr模型
184-
version="v2", #默认使用v2线框模型,切换阿里读光模型可改为v1
185226
enhance_box_line=True, # 识别框切割增强(关闭避免多余切割,开启减少漏切割),默认为True
186227
col_threshold=15, # 识别框左边界x坐标差值小于col_threshold的默认同列
187228
row_threshold=10, # 识别框上边界y坐标差值小于row_threshold的默认同行
188229
rotated_fix=True, # wiredV2支持,轻度旋转(-45°~45°)矫正,默认为True
189230
need_ocr=True, # 是否进行OCR识别, 默认为True
190231
rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True
191232
)
192-
lineless_table_rec = LinelessTableRecognition()
193-
html, elasp, polygons, logic_points, ocr_res = lineless_table_rec(
233+
lineless_table_rec = LinelessTableRecognition(LinelessTableInput())
234+
table_results = lineless_table_rec(
194235
img, # 图片 Union[str, np.ndarray, bytes, Path, PIL.Image.Image]
195236
ocr_result, # 输入rapidOCR识别结果,不传默认使用内部rapidocr模型
196237
need_ocr=True, # 是否进行OCR识别, 默认为True

README_en.md

+74-37
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@
1313
</div>
1414

1515
### Recent Updates
16-
- **2024.11.16**
17-
- Added document distortion correction solution, which can be used as a pre-processing step [RapidUnWrap](https://github.com/Joker1212/RapidUnWrap)
18-
- **2024.11.22**
19-
- Support Char Rec, RapidOCR>=1.4.0
2016
- **2024.12.25**
2117
- Add document preprocessing solutions for distortion correction, deblurring, shadow removal, and binarization. [RapidUnDistort](https://github.com/Joker1212/RapidUnWrap)
18+
- **2025.1.9**
19+
- RapidTable now supports the Unitable model, Evaluation data has been added.
20+
- **2025.3.9**
21+
- Align input and output formats with RapidTable
22+
- support automatic model downloading
23+
- introduce a new table classification model from [PaddleOCR](https://github.com/PaddlePaddle/PaddleX/blob/release/3.0-rc/docs/module_usage/tutorials/ocr_modules/table_classification.en.md).
2224
### Introduction
2325
💖 This repository serves as an inference library for structured recognition of tables within documents, including models for wired and wireless table recognition from Alibaba DulaLight, a wired table model from llaipython (WeChat), and a built-in table classification model from NetEase Qanything.
2426

@@ -79,55 +81,62 @@ pip install wired_table_rec lineless_table_rec table_cls
7981
```
8082

8183
### Quick start
84+
> ⚠️:`wired_table_rec/table_cls`>=1.2.0` `lineless_table_rec` > 0.1.0 ,the input and output format are same with `RapidTable`
8285
8386
``` python {linenos=table}
84-
import os
87+
from pathlib import Path
8588

86-
from lineless_table_rec import LinelessTableRecognition
87-
from lineless_table_rec.utils_table_recover import format_html, plot_rec_box_with_logic_info, plot_rec_box
89+
from wired_table_rec.utils.utils import VisTable
8890
from table_cls import TableCls
89-
from wired_table_rec import WiredTableRecognition
90-
from rapidocr_onnxruntime import RapidOCR
91-
92-
lineless_engine = LinelessTableRecognition()
93-
wired_engine = WiredTableRecognition()
94-
# Default small YOLO model (0.1s), can switch to higher precision YOLOX (0.25s), or faster QAnything (0.07s) model
95-
table_cls = TableCls() # TableCls(model_type="yolox"),TableCls(model_type="q")
96-
img_path = f'images/img14.jpg'
91+
from wired_table_rec.main import WiredTableInput, WiredTableRecognition
92+
from lineless_table_rec.main import LinelessTableInput, LinelessTableRecognition
93+
from rapidocr_onnxruntime import RapidOCR, VisRes
94+
95+
# init engine
96+
wired_input = WiredTableInput()
97+
lineless_input = LinelessTableInput()
98+
wired_engine = WiredTableRecognition(wired_input)
99+
lineless_engine = LinelessTableRecognition(lineless_input)
100+
#The default model is a small YOLO model (0.1s inference time), which can be switched to higher-precision YOLOX (0.25s), faster QAnything (0.07s), or PaddlePaddle models (0.03s).
101+
table_cls = TableCls()
102+
img_path = f'tests/test_files/table.jpg'
97103

98104
cls,elasp = table_cls(img_path)
99105
if cls == 'wired':
100106
table_engine = wired_engine
101107
else:
102108
table_engine = lineless_engine
103-
104-
html, elasp, polygons, logic_points, ocr_res = table_engine(img_path)
105-
print(f"elasp: {elasp}")
106-
107-
# Use other OCR models
108-
#ocr_engine =RapidOCR(det_model_path="xxx/det_server_infer.onnx",rec_model_path="xxx/rec_server_infer.onnx")
109-
#ocr_res, _ = ocr_engine(img_path)
110-
#html, elasp, polygons, logic_points, ocr_res = table_engine(img_path, ocr_result=ocr_res)
111-
112-
# output_dir = f'outputs'
113-
# complete_html = format_html(html)
114-
# os.makedirs(os.path.dirname(f"{output_dir}/table.html"), exist_ok=True)
115-
# with open(f"{output_dir}/table.html", "w", encoding="utf-8") as file:
116-
# file.write(complete_html)
117-
# Visualize table recognition boxes + logical row and column information
118-
# plot_rec_box_with_logic_info(
119-
# img_path, f"{output_dir}/table_rec_box.jpg", logic_points, polygons
109+
110+
table_results = table_engine(img_path, enhance_box_line=False)
111+
# use rapidOCR for as input
112+
# ocr_engine = RapidOCR()
113+
# ocr_result, _ = ocr_engine(img_path)
114+
# table_results = table_engine(img_path, ocr_result=ocr_result)
115+
116+
# Visualize and store the results, including detection bounding boxes and row/column coordinates.
117+
# save_dir = Path("outputs")
118+
# save_dir.mkdir(parents=True, exist_ok=True)
119+
#
120+
# save_html_path = f"outputs/{Path(img_path).stem}.html"
121+
# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
122+
# save_logic_path = (
123+
# f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}"
124+
# )
125+
#
126+
# vis_table = VisTable()
127+
# vis_imged = vis_table(
128+
# img_path, table_results, save_html_path, save_drawed_path, save_logic_path
120129
# )
121-
# Visualize OCR recognition boxes
122-
# plot_rec_box(img_path, f"{output_dir}/ocr_box.jpg", ocr_res)
123130
```
124131
#### Single Character OCR Matching
132+
125133
```python
126134
# Convert single character boxes to the same structure as line recognition
127135
from rapidocr_onnxruntime import RapidOCR
128-
from wired_table_rec.utils_table_recover import trans_char_ocr_res
136+
from wired_table_rec.utils.utils_table_recover import trans_char_ocr_res
137+
129138
img_path = "tests/test_files/wired/table4.jpg"
130-
ocr_engine =RapidOCR()
139+
ocr_engine = RapidOCR()
131140
ocr_res, _ = ocr_engine(img_path, return_word_box=True)
132141
ocr_res = trans_char_ocr_res(ocr_res)
133142
```
@@ -174,11 +183,39 @@ for i, res in enumerate(result):
174183

175184
### Core Parameters
176185
```python
186+
@dataclass
187+
class WiredTableInput:
188+
model_type: Optional[str] = "unet" #unet/cycle_center_net
189+
model_path: Union[str, Path, None, Dict[str, str]] = None
190+
use_cuda: bool = False
191+
device: str = "cpu"
192+
193+
@dataclass
194+
class LinelessTableInput:
195+
model_type: Optional[str] = "lore" #lore
196+
model_path: Union[str, Path, None, Dict[str, str]] = None
197+
use_cuda: bool = False
198+
device: str = "cpu"
199+
200+
@dataclass
201+
class WiredTableOutput:
202+
pred_html: Optional[str] = None
203+
cell_bboxes: Optional[np.ndarray] = None
204+
logic_points: Optional[np.ndarray] = None
205+
elapse: Optional[float] = None
206+
207+
@dataclass
208+
class LinelessTableOutput:
209+
pred_html: Optional[str] = None
210+
cell_bboxes: Optional[np.ndarray] = None
211+
logic_points: Optional[np.ndarray] = None
212+
elapse: Optional[float] = None
213+
```
214+
```python
177215
wired_table_rec = WiredTableRecognition()
178216
html, elasp, polygons, logic_points, ocr_res = wired_table_rec(
179217
img, # Image Union[str, np.ndarray, bytes, Path, PIL.Image.Image]
180218
ocr_result, # Input rapidOCR recognition result, use internal rapidocr model by default if not provided
181-
version="v2", # Default to using v2 line model, switch to AliDamo model by changing to v1
182219
enhance_box_line=True, # Enhance box line find (turn off to avoid excessive cutting, turn on to reduce missed cuts), default is True
183220
need_ocr=True, # Whether to perform OCR recognition, default is True
184221
rec_again=True, # Whether to re-recognize table boxes without detected text by cropping them separately, default is True

0 commit comments

Comments
 (0)