Skip to content

Commit 517825c

Browse files
authored
Merge pull request #61 from RapidAI/correct_cls_preprocess
support diff size cls model
2 parents 37fa544 + cd2ea53 commit 517825c

File tree

3 files changed

+17
-26
lines changed

3 files changed

+17
-26
lines changed

README.md

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@
1515
### 最近更新
1616
- **2024.10.13**
1717
- 补充最新paddlex-SLANet-plus 测评结果(已集成模型到[RapidTable](https://github.com/RapidAI/RapidTable)仓库)
18-
- **2024.10.17**
19-
- 补充最新surya 表格识别测评结果
2018
- **2024.10.22**
21-
- 补充复杂背景多表格检测提取方案[RapidTableDet](https://github.com/RapidAI/RapidTableDetection)
19+
- 补充复杂背景多表格检测提取方案[RapidTableDet](https://github.com/RapidAI/RapidTableDetection)
20+
- **2024.10.29**
21+
- 使用yolo11重新训练表格分类器,修正wired_table_rec v2逻辑坐标还原错误,并更新测评
22+
2223
### 简介
2324
💖该仓库是用来对文档中表格做结构化识别的推理库,包括来自阿里读光有线和无线表格识别模型,llaipython(微信)贡献的有线表格模型,网易Qanything内置表格分类模型等。
2425

@@ -57,10 +58,10 @@
5758
| [deepdoctection(rag-flow)](https://github.com/deepdoctection/deepdoctection?tab=readme-ov-file) | 0.59975 | 0.69918 |
5859
| [ppstructure_table_master](https://github.com/PaddlePaddle/PaddleOCR/tree/main/ppstructure) | 0.61606 | 0.73892 |
5960
| [ppsturcture_table_engine](https://github.com/PaddlePaddle/PaddleOCR/tree/main/ppstructure) | 0.67924 | 0.78653 |
60-
| table_cls + wired_table_rec v1 + lineless_table_rec | 0.68507 | 0.75140 |
6161
| [StructEqTable](https://github.com/UniModal4Reasoning/StructEqTable-Deploy) | 0.67310 | 0.81210 |
6262
| [RapidTable(SLANet)](https://github.com/RapidAI/RapidTable) | 0.71654 | 0.81067 |
63-
| table_cls + wired_table_rec v2 + lineless_table_rec | 0.73702 | 0.80210 |
63+
| table_cls + wired_table_rec v1 + lineless_table_rec | 0.75288 | 0.82574 |
64+
| table_cls + wired_table_rec v2 + lineless_table_rec | 0.77676 | 0.84580 |
6465
| [RapidTable(SLANet-plus)](https://github.com/RapidAI/RapidTable) | **0.84481** | **0.91369** |
6566

6667
### 使用建议
@@ -86,7 +87,8 @@ from wired_table_rec import WiredTableRecognition
8687

8788
lineless_engine = LinelessTableRecognition()
8889
wired_engine = WiredTableRecognition()
89-
table_cls = TableCls()
90+
# 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型
91+
table_cls = TableCls() # TableCls(model_type="yolox"),TableCls(model_type="q")
9092
img_path = f'images/img14.jpg'
9193

9294
cls,elasp = table_cls(img_path)
@@ -158,7 +160,8 @@ for i, res in enumerate(result):
158160
- [x] 图片小角度偏移修正方法补充
159161
- [x] 增加数据集数量,增加更多评测对比
160162
- [x] 补充复杂场景表格检测和提取,解决旋转和透视导致的低识别率
161-
- [ ] 优化表格分类器,优化无线表格模型
163+
- [x] 优化表格分类器
164+
- [ ] 优化无线表格模型
162165

163166
### 处理流程
164167

table_cls/main.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,20 @@
55
import numpy as np
66
from PIL import Image
77

8-
from .utils import InputType, LoadImage, OrtInferSession, ResizePad
8+
from .utils import InputType, LoadImage, OrtInferSession
99

1010
cur_dir = Path(__file__).resolve().parent
1111
q_cls_model_path = cur_dir / "models" / "table_cls.onnx"
1212
yolo_cls_model_path = cur_dir / "models" / "yolo_cls.onnx"
13+
yolo_cls_x_model_path = cur_dir / "models" / "yolo_cls_x.onnx"
1314

1415

1516
class TableCls:
1617
def __init__(self, model_type="yolo", model_path=yolo_cls_model_path):
1718
if model_type == "yolo":
1819
self.table_engine = YoloCls(model_path)
20+
elif model_type == "yolox":
21+
self.table_engine = YoloCls(yolo_cls_x_model_path)
1922
else:
2023
model_path = q_cls_model_path
2124
self.table_engine = QanythingCls(model_path)
@@ -66,8 +69,9 @@ def __init__(self, model_path):
6669
self.cls = {0: "wireless", 1: "wired"}
6770

6871
def preprocess(self, img):
69-
img, *_ = ResizePad(img, 640)
70-
img = np.array(img, dtype=np.float32) / 255.0
72+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
73+
img = cv2.resize(img, (640, 640))
74+
img = np.array(img, dtype=np.float32) / 255
7175
img = img.transpose(2, 0, 1) # HWC to CHW
7276
img = np.expand_dims(img, axis=0) # Add batch dimension, only one image
7377
return img

table_cls/utils.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -178,19 +178,3 @@ def cvt_four_to_three(img: np.ndarray) -> np.ndarray:
178178
def verify_exist(file_path: Union[str, Path]):
179179
if not Path(file_path).exists():
180180
raise LoadImageError(f"{file_path} does not exist.")
181-
182-
183-
def ResizePad(img, target_size):
184-
h, w = img.shape[:2]
185-
m = max(h, w)
186-
ratio = target_size / m
187-
new_w, new_h = int(ratio * w), int(ratio * h)
188-
img = cv2.resize(img, (new_w, new_h), cv2.INTER_LINEAR)
189-
top = (target_size - new_h) // 2
190-
bottom = (target_size - new_h) - top
191-
left = (target_size - new_w) // 2
192-
right = (target_size - new_w) - left
193-
img1 = cv2.copyMakeBorder(
194-
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
195-
)
196-
return img1, new_w, new_h, left, top

0 commit comments

Comments
 (0)