RapidAI
diff --git a/‎.github/workflows/lineless_table_rec.yml
Lines changed: 0 additions & 8 deletions b/‎.github/workflows/lineless_table_rec.yml
Lines changed: 0 additions & 8 deletions
diff --git a/‎.github/workflows/table_cls.yml
Lines changed: 0 additions & 8 deletions b/‎.github/workflows/table_cls.yml
Lines changed: 0 additions & 8 deletions
diff --git a/‎.github/workflows/wired_table_rec.yml
Lines changed: 0 additions & 10 deletions b/‎.github/workflows/wired_table_rec.yml
Lines changed: 0 additions & 10 deletions
diff --git a/‎README.md
Lines changed: 81 additions & 40 deletions b/‎README.md
Lines changed: 81 additions & 40 deletions
diff --git a/‎README_en.md
Lines changed: 74 additions & 37 deletions b/‎README_en.md
Lines changed: 74 additions & 37 deletions
@@ -30,10 +30,6 @@ jobs:
           pip install -r requirements.txt
           pip install pytest
 
-          wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip
-          unzip lineless_table_rec_models.zip
-          mv lineless_table_rec_models/*.onnx lineless_table_rec/models/
-
           pytest tests/test_lineless_table_rec.py
 
   GenerateWHL_PushPyPi:
@@ -55,10 +51,6 @@ jobs:
           python -m pip install --upgrade pip
           pip install wheel get_pypi_latest_version
 
-          wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip
-          unzip lineless_table_rec_models.zip
-          mv lineless_table_rec_models/*.onnx lineless_table_rec/models/
-
           python setup_lineless.py bdist_wheel "${{ github.ref_name }}"
 
       # - name: Publish distribution 📦 to Test PyPI
 
@@ -29,10 +29,6 @@ jobs:
           pip install -r requirements.txt
           pip install pytest beautifulsoup4
 
-          wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/table_cls_models.zip
-          unzip table_cls_models.zip
-          mv table_cls_models/*.onnx table_cls/models/
-
           pytest tests/test_table_cls.py
 
   GenerateWHL_PushPyPi:
@@ -54,10 +50,6 @@ jobs:
           python -m pip install --upgrade pip
           pip install wheel get_pypi_latest_version
 
-          wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/table_cls_models.zip
-          unzip table_cls_models.zip
-          mv table_cls_models/*.onnx table_cls/models/
-
           python setup_table_cls.py bdist_wheel "${{ github.ref_name }}"
 
       - name: Publish distribution 📦 to PyPI
 
@@ -28,11 +28,6 @@ jobs:
         run: |
           pip install -r requirements.txt
           pip install pytest beautifulsoup4
-
-          wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/wired_table_rec_models.zip
-          unzip wired_table_rec_models.zip
-          mv wired_table_rec_models/*.onnx wired_table_rec/models/
-
           pytest tests/test_wired_table_rec.py
 
   GenerateWHL_PushPyPi:
@@ -53,11 +48,6 @@ jobs:
           pip install -r requirements.txt
           python -m pip install --upgrade pip
           pip install wheel get_pypi_latest_version
-
-          wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/wired_table_rec_models.zip
-          unzip wired_table_rec_models.zip
-          mv wired_table_rec_models/*.onnx wired_table_rec/models/
-
           python setup_wired.py bdist_wheel "${{ github.ref_name }}"
 
       - name: Publish distribution 📦 to PyPI
 
@@ -15,12 +15,14 @@
 </div>
 
 ### 最近更新
-- **2024.11.22**
-  - 支持单字符匹配方案，需要RapidOCR>=1.4.0
 - **2024.12.25**
     - 补充文档扭曲矫正/去模糊/去阴影/二值化方案，可作为前置处理 [RapidUnDistort](https://github.com/Joker1212/RapidUnWrap)
 - **2025.1.9**
-  - RapidTable支持了 unitable 模型，精度更高支持torch推理，补充测评数据   
+  - RapidTable支持了 unitable 模型，精度更高支持torch推理，补充测评数据
+- **2025.3.9**
+    - 输入输出格式对齐RapidTable
+    - 支持模型自动下载
+    - 增加来自paddle的新表格分类模型
 
 ### 简介
 💖该仓库是用来对文档中表格做结构化识别的推理库，包括来自阿里读光有线和无线表格识别模型，llaipython(微信)贡献的有线表格模型，网易Qanything内置表格分类模型等。\
@@ -81,55 +83,63 @@ pip install wired_table_rec lineless_table_rec table_cls
 ```
 
 ### 快速使用
-
+> ⚠️注意：在`wired_table_rec/table_cls`>=1.2.0` `lineless_table_rec` > 0.1.0 后，采用同RapidTable完全一致格式的输入输出
 ``` python {linenos=table}
-import os
+from pathlib import Path
 
-from lineless_table_rec import LinelessTableRecognition
-from lineless_table_rec.utils_table_recover import format_html, plot_rec_box_with_logic_info, plot_rec_box
+from wired_table_rec.utils.utils import VisTable
 from table_cls import TableCls
-from wired_table_rec import WiredTableRecognition
-from rapidocr_onnxruntime import RapidOCR 
-
-lineless_engine = LinelessTableRecognition()
-wired_engine = WiredTableRecognition()
-# 默认小yolo模型(0.1s)，可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型
-table_cls = TableCls() # TableCls(model_type="yolox"),TableCls(model_type="q")
-img_path = f'images/img14.jpg'
+from wired_table_rec.main import WiredTableInput, WiredTableRecognition
+from lineless_table_rec.main import LinelessTableInput, LinelessTableRecognition
+from rapidocr_onnxruntime import RapidOCR, VisRes
+
+# 初始化引擎
+wired_input = WiredTableInput()
+lineless_input = LinelessTableInput()
+wired_engine = WiredTableRecognition(wired_input)
+lineless_engine = LinelessTableRecognition(lineless_input)
+# 默认小yolo模型(0.1s)，可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型或paddle模型(0.03s)
+table_cls = TableCls()
+img_path = f'tests/test_files/table.jpg'
 
 cls,elasp = table_cls(img_path)
 if cls == 'wired':
     table_engine = wired_engine
 else:
     table_engine = lineless_engine
-  
-html, elasp, polygons, logic_points, ocr_res = table_engine(img_path)
-print(f"elasp: {elasp}")
-
-# 使用其他ocr模型
-#ocr_engine =RapidOCR(det_model_path="xxx/det_server_infer.onnx",rec_model_path="xxx/rec_server_infer.onnx")
-#ocr_res, _ = ocr_engine(img_path)
-#html, elasp, polygons, logic_points, ocr_res = table_engine(img_path, ocr_result=ocr_res)
-# output_dir = f'outputs'
-# complete_html = format_html(html)
-# os.makedirs(os.path.dirname(f"{output_dir}/table.html"), exist_ok=True)
-# with open(f"{output_dir}/table.html", "w", encoding="utf-8") as file:
-#     file.write(complete_html)
-# # 可视化表格识别框 + 逻辑行列信息
-# plot_rec_box_with_logic_info(
-#     img_path, f"{output_dir}/table_rec_box.jpg", logic_points, polygons
+
+table_results = table_engine(img_path, enhance_box_line=False)
+# 使用RapidOCR输入
+# ocr_engine = RapidOCR()
+# ocr_result, _ = ocr_engine(img_path)
+# table_results = table_engine(img_path, ocr_result=ocr_result)
+
+# 可视化并存储结果，包含识别框+行列坐标
+# save_dir = Path("outputs")
+# save_dir.mkdir(parents=True, exist_ok=True)
+#
+# save_html_path = f"outputs/{Path(img_path).stem}.html"
+# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
+# save_logic_path = (
+#     f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}"
 # )
-# # 可视化 ocr 识别框
-# plot_rec_box(img_path, f"{output_dir}/ocr_box.jpg", ocr_res)
+# 
+# vis_table = VisTable()
+# vis_imged = vis_table(
+#     img_path, table_results, save_html_path, save_drawed_path, save_logic_path
+# )
+
 ```
 
 #### 单字ocr匹配
+
 ```python
 # 将单字box转换为行识别同样的结构)
 from rapidocr_onnxruntime import RapidOCR
-from wired_table_rec.utils_table_recover import trans_char_ocr_res
+from wired_table_rec.utils.utils_table_recover import trans_char_ocr_res
+
 img_path = "tests/test_files/wired/table4.jpg"
-ocr_engine =RapidOCR()
+ocr_engine = RapidOCR()
 ocr_res, _ = ocr_engine(img_path, return_word_box=True)
 ocr_res = trans_char_ocr_res(ocr_res)
 ```
@@ -177,20 +187,51 @@ for i, res in enumerate(result):
 
 ### 核心参数
 ```python
-wired_table_rec = WiredTableRecognition()
-html, elasp, polygons, logic_points, ocr_res = wired_table_rec(
+# 输入(WiredTableInput/LinelessTableInput)
+@dataclass
+class WiredTableInput:
+    model_type: Optional[str] = "unet" #unet/cycle_center_net
+    model_path: Union[str, Path, None, Dict[str, str]] = None
+    use_cuda: bool = False
+    device: str = "cpu"
+    
+@dataclass
+class LinelessTableInput:
+    model_type: Optional[str] = "lore" #lore
+    model_path: Union[str, Path, None, Dict[str, str]] = None
+    use_cuda: bool = False
+    device: str = "cpu"
+    
+# 输出(WiredTableOutput/LinelessTableOutput)
+@dataclass
+class WiredTableOutput:
+    pred_html: Optional[str] = None
+    cell_bboxes: Optional[np.ndarray] = None
+    logic_points: Optional[np.ndarray] = None
+    elapse: Optional[float] = None
+    
+@dataclass
+class LinelessTableOutput:
+    pred_html: Optional[str] = None
+    cell_bboxes: Optional[np.ndarray] = None
+    logic_points: Optional[np.ndarray] = None
+    elapse: Optional[float] = None
+```
+
+```python
+wired_table_rec = WiredTableRecognition(WiredTableInput())
+table_results = wired_table_rec(
     img, # 图片 Union[str, np.ndarray, bytes, Path, PIL.Image.Image]
     ocr_result, # 输入rapidOCR识别结果，不传默认使用内部rapidocr模型
-    version="v2", #默认使用v2线框模型，切换阿里读光模型可改为v1
     enhance_box_line=True, # 识别框切割增强(关闭避免多余切割，开启减少漏切割)，默认为True
     col_threshold=15, # 识别框左边界x坐标差值小于col_threshold的默认同列
     row_threshold=10, # 识别框上边界y坐标差值小于row_threshold的默认同行
     rotated_fix=True, # wiredV2支持，轻度旋转(-45°~45°)矫正，默认为True
     need_ocr=True, # 是否进行OCR识别, 默认为True
     rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True
 )
-lineless_table_rec = LinelessTableRecognition()
-html, elasp, polygons, logic_points, ocr_res = lineless_table_rec(
+lineless_table_rec = LinelessTableRecognition(LinelessTableInput())
+table_results = lineless_table_rec(
     img, # 图片 Union[str, np.ndarray, bytes, Path, PIL.Image.Image]
     ocr_result, # 输入rapidOCR识别结果，不传默认使用内部rapidocr模型
     need_ocr=True, # 是否进行OCR识别, 默认为True
 
@@ -13,12 +13,14 @@
 </div>
 
 ### Recent Updates
-- **2024.11.16**
-    - Added document distortion correction solution, which can be used as a pre-processing step [RapidUnWrap](https://github.com/Joker1212/RapidUnWrap)
-- **2024.11.22**
-    - Support Char Rec, RapidOCR>=1.4.0
 - **2024.12.25**
     - Add document preprocessing solutions for distortion correction, deblurring, shadow removal, and binarization. [RapidUnDistort](https://github.com/Joker1212/RapidUnWrap)
+- **2025.1.9**
+  - RapidTable now supports the Unitable model, Evaluation data has been added.
+- **2025.3.9**
+  - Align input and output formats with RapidTable
+  - support automatic model downloading
+  - introduce a new table classification model from [PaddleOCR](https://github.com/PaddlePaddle/PaddleX/blob/release/3.0-rc/docs/module_usage/tutorials/ocr_modules/table_classification.en.md).
 ### Introduction
 💖 This repository serves as an inference library for structured recognition of tables within documents, including models for wired and wireless table recognition from Alibaba DulaLight, a wired table model from llaipython (WeChat), and a built-in table classification model from NetEase Qanything.
 
@@ -79,55 +81,62 @@ pip install wired_table_rec lineless_table_rec table_cls
 ```
 
 ### Quick start
+> ⚠️：`wired_table_rec/table_cls`>=1.2.0` `lineless_table_rec` > 0.1.0 ，the input and output format are same with `RapidTable`
 
 ``` python {linenos=table}
-import os
+from pathlib import Path
 
-from lineless_table_rec import LinelessTableRecognition
-from lineless_table_rec.utils_table_recover import format_html, plot_rec_box_with_logic_info, plot_rec_box
+from wired_table_rec.utils.utils import VisTable
 from table_cls import TableCls
-from wired_table_rec import WiredTableRecognition
-from rapidocr_onnxruntime import RapidOCR 
-
-lineless_engine = LinelessTableRecognition()
-wired_engine = WiredTableRecognition()
-# Default small YOLO model (0.1s), can switch to higher precision YOLOX (0.25s), or faster QAnything (0.07s) model
-table_cls = TableCls() # TableCls(model_type="yolox"),TableCls(model_type="q")
-img_path = f'images/img14.jpg'
+from wired_table_rec.main import WiredTableInput, WiredTableRecognition
+from lineless_table_rec.main import LinelessTableInput, LinelessTableRecognition
+from rapidocr_onnxruntime import RapidOCR, VisRes
+
+# init engine
+wired_input = WiredTableInput()
+lineless_input = LinelessTableInput()
+wired_engine = WiredTableRecognition(wired_input)
+lineless_engine = LinelessTableRecognition(lineless_input)
+#The default model is a small YOLO model (0.1s inference time), which can be switched to higher-precision YOLOX (0.25s), faster QAnything (0.07s), or PaddlePaddle models (0.03s).
+table_cls = TableCls()
+img_path = f'tests/test_files/table.jpg'
 
 cls,elasp = table_cls(img_path)
 if cls == 'wired':
     table_engine = wired_engine
 else:
     table_engine = lineless_engine
-  
-html, elasp, polygons, logic_points, ocr_res = table_engine(img_path)
-print(f"elasp: {elasp}")
-
-# Use other OCR models
-#ocr_engine =RapidOCR(det_model_path="xxx/det_server_infer.onnx",rec_model_path="xxx/rec_server_infer.onnx")
-#ocr_res, _ = ocr_engine(img_path)
-#html, elasp, polygons, logic_points, ocr_res = table_engine(img_path, ocr_result=ocr_res)  
-
-# output_dir = f'outputs'
-# complete_html = format_html(html)
-# os.makedirs(os.path.dirname(f"{output_dir}/table.html"), exist_ok=True)
-# with open(f"{output_dir}/table.html", "w", encoding="utf-8") as file:
-#     file.write(complete_html)
-# Visualize table recognition boxes + logical row and column information
-# plot_rec_box_with_logic_info(
-#     img_path, f"{output_dir}/table_rec_box.jpg", logic_points, polygons
+
+table_results = table_engine(img_path, enhance_box_line=False)
+# use rapidOCR for as input
+# ocr_engine = RapidOCR()
+# ocr_result, _ = ocr_engine(img_path)
+# table_results = table_engine(img_path, ocr_result=ocr_result)
+
+# Visualize and store the results, including detection bounding boxes and row/column coordinates.
+# save_dir = Path("outputs")
+# save_dir.mkdir(parents=True, exist_ok=True)
+#
+# save_html_path = f"outputs/{Path(img_path).stem}.html"
+# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
+# save_logic_path = (
+#     f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}"
+# )
+# 
+# vis_table = VisTable()
+# vis_imged = vis_table(
+#     img_path, table_results, save_html_path, save_drawed_path, save_logic_path
 # )
-# Visualize OCR recognition boxes
-# plot_rec_box(img_path, f"{output_dir}/ocr_box.jpg", ocr_res)
 ```
 #### Single Character OCR Matching
+
 ```python
 # Convert single character boxes to the same structure as line recognition
 from rapidocr_onnxruntime import RapidOCR
-from wired_table_rec.utils_table_recover import trans_char_ocr_res
+from wired_table_rec.utils.utils_table_recover import trans_char_ocr_res
+
 img_path = "tests/test_files/wired/table4.jpg"
-ocr_engine =RapidOCR()
+ocr_engine = RapidOCR()
 ocr_res, _ = ocr_engine(img_path, return_word_box=True)
 ocr_res = trans_char_ocr_res(ocr_res)
 ```
@@ -174,11 +183,39 @@ for i, res in enumerate(result):
 
 ### Core Parameters
 ```python
+@dataclass
+class WiredTableInput:
+    model_type: Optional[str] = "unet" #unet/cycle_center_net
+    model_path: Union[str, Path, None, Dict[str, str]] = None
+    use_cuda: bool = False
+    device: str = "cpu"
+    
+@dataclass
+class LinelessTableInput:
+    model_type: Optional[str] = "lore" #lore
+    model_path: Union[str, Path, None, Dict[str, str]] = None
+    use_cuda: bool = False
+    device: str = "cpu"
+    
+@dataclass
+class WiredTableOutput:
+    pred_html: Optional[str] = None
+    cell_bboxes: Optional[np.ndarray] = None
+    logic_points: Optional[np.ndarray] = None
+    elapse: Optional[float] = None
+    
+@dataclass
+class LinelessTableOutput:
+    pred_html: Optional[str] = None
+    cell_bboxes: Optional[np.ndarray] = None
+    logic_points: Optional[np.ndarray] = None
+    elapse: Optional[float] = None
+```
+```python
 wired_table_rec = WiredTableRecognition()
 html, elasp, polygons, logic_points, ocr_res = wired_table_rec(
     img,  # Image Union[str, np.ndarray, bytes, Path, PIL.Image.Image]
     ocr_result,  # Input rapidOCR recognition result, use internal rapidocr model by default if not provided
-    version="v2",  # Default to using v2 line model, switch to AliDamo model by changing to v1
     enhance_box_line=True,  # Enhance box line find (turn off to avoid excessive cutting, turn on to reduce missed cuts), default is True
     need_ocr=True,  # Whether to perform OCR recognition, default is True
     rec_again=True,  # Whether to re-recognize table boxes without detected text by cropping them separately, default is True