feat: add paddle cls for table cls

Joker1212 · Joker1212 · commit 6cda006c15db · 2025-03-19T22:05:40.000+08:00
diff --git a/demo_table_cls.py b/demo_table_cls.py
@@ -1,8 +1,9 @@
 # -*- encoding: utf-8 -*-
 from table_cls import TableCls
 
-table_cls = TableCls()
-img_path = "tests/test_files/table_cls/lineless_table.png"
-cls_str, elapse = table_cls(img_path)
-print(cls_str)
-print(elapse)
+if __name__ == "__main__":
+    table_cls = TableCls(model_type="yolox")
+    img_path = "tests/test_files/table_cls/lineless_table_2.png"
+    cls_str, elapse = table_cls(img_path)
+    print(cls_str)
+    print(elapse)
diff --git a/lineless_table_rec/main.py b/lineless_table_rec/main.py
@@ -84,6 +84,7 @@ def __call__(
         need_ocr = True
         if kwargs:
             rec_again = kwargs.get("rec_again", True)
+            need_ocr = kwargs.get("need_ocr", True)
         img = self.load_img(content)
         try:
             polygons, logi_points = self.table_structure(img)
diff --git a/table_cls/main.py b/table_cls/main.py
@@ -1,26 +1,42 @@
 import time
+from enum import Enum
 from pathlib import Path
+from typing import Union, Dict
 
 import cv2
 import numpy as np
 from PIL import Image
 
-from .utils import InputType, LoadImage, OrtInferSession, resize_and_center_crop
+from .utils.download_model import DownloadModel
+from .utils.utils import InputType, LoadImage, OrtInferSession, resize_and_center_crop
 
-cur_dir = Path(__file__).resolve().parent
-q_cls_model_path = cur_dir / "models" / "table_cls.onnx"
-yolo_cls_model_path = cur_dir / "models" / "yolo_cls.onnx"
-yolo_cls_x_model_path = cur_dir / "models" / "yolo_cls_x.onnx"
+
+class ModelType(Enum):
+    YOLO_CLS_X = "yolox"
+    YOLO_CLS = "yolo"
+    PADDLE_CLS = "paddle"
+    Q_CLS = "q"
+
+
+ROOT_URL = "https://www.modelscope.cn/models/RapidAI/RapidTable/resolve/master/"
+KEY_TO_MODEL_URL = {
+    ModelType.YOLO_CLS_X.value: f"{ROOT_URL}/table_cls/yolo_cls_x.onnx",
+    ModelType.YOLO_CLS.value: f"{ROOT_URL}/table_cls/yolo_cls.onnx",
+    ModelType.PADDLE_CLS.value: f"{ROOT_URL}/table_cls/paddle_cls.onnx",
+    ModelType.Q_CLS.value: f"{ROOT_URL}/table_cls/q_cls.onnx",
+}
 
 
 class TableCls:
-    def __init__(self, model_type="yolo", model_path=yolo_cls_model_path):
-        if model_type == "yolo":
+    def __init__(self, model_type=ModelType.YOLO_CLS.value, model_path=None):
+        model_path = self.get_model_path(model_type, model_path)
+        if model_type == ModelType.YOLO_CLS.value:
+            self.table_engine = YoloCls(model_path)
+        elif model_type == ModelType.YOLO_CLS_X.value:
             self.table_engine = YoloCls(model_path)
-        elif model_type == "yolox":
-            self.table_engine = YoloCls(yolo_cls_x_model_path)
+        elif model_type == ModelType.PADDLE_CLS.value:
+            self.table_engine = PaddleCls(model_path)
         else:
-            model_path = q_cls_model_path
             self.table_engine = QanythingCls(model_path)
         self.load_img = LoadImage()
 
@@ -32,6 +48,69 @@ def __call__(self, content: InputType):
         table_elapse = time.perf_counter() - ss
         return predict_cla, table_elapse
 
+    @staticmethod
+    def get_model_path(
+        model_type: str, model_path: Union[str, Path, None]
+    ) -> Union[str, Dict[str, str]]:
+        if model_path is not None:
+            return model_path
+
+        model_url = KEY_TO_MODEL_URL.get(model_type, None)
+        if isinstance(model_url, str):
+            model_path = DownloadModel.download(model_url)
+            return model_path
+
+        if isinstance(model_url, dict):
+            model_paths = {}
+            for k, url in model_url.items():
+                model_paths[k] = DownloadModel.download(
+                    url, save_model_name=f"{model_type}_{Path(url).name}"
+                )
+            return model_paths
+
+        raise ValueError(f"Model URL: {type(model_url)} is not between str and dict.")
+
+
+class PaddleCls:
+    def __init__(self, model_path):
+        self.table_cls = OrtInferSession(model_path)
+        self.inp_h = 224
+        self.inp_w = 224
+        self.resize_short = 256
+        self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
+        self.std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
+        self.cls = {0: "wired", 1: "wireless"}
+
+    def preprocess(self, img):
+        # short resize
+        img_h, img_w = img.shape[:2]
+        percent = float(self.resize_short) / min(img_w, img_h)
+        w = int(round(img_w * percent))
+        h = int(round(img_h * percent))
+        img = cv2.resize(img, dsize=(w, h), interpolation=cv2.INTER_LANCZOS4)
+        # center crop
+        img_h, img_w = img.shape[:2]
+        w_start = (img_w - self.inp_w) // 2
+        h_start = (img_h - self.inp_h) // 2
+        w_end = w_start + self.inp_w
+        h_end = h_start + self.inp_h
+        img = img[h_start:h_end, w_start:w_end, :]
+        # normalize
+        img = np.array(img, dtype=np.float32) / 255.0
+        img -= self.mean
+        img /= self.std
+        # HWC to CHW
+        img = img.transpose(2, 0, 1)
+        # Add batch dimension, only one image
+        img = np.expand_dims(img, axis=0)
+        return img
+
+    def __call__(self, img):
+        pred_output = self.table_cls(img)[0]
+        pred_idxs = list(np.argmax(pred_output, axis=1))
+        predict_cla = max(set(pred_idxs), key=pred_idxs.count)
+        return self.cls[predict_cla]
+
 
 class QanythingCls:
     def __init__(self, model_path):
diff --git a/table_cls/utils/__init__.py b/table_cls/utils/__init__.py
diff --git a/table_cls/utils/download_model.py b/table_cls/utils/download_model.py
@@ -0,0 +1,67 @@
+import io
+from pathlib import Path
+from typing import Optional, Union
+
+import requests
+from tqdm import tqdm
+
+from .logger import get_logger
+
+logger = get_logger("DownloadModel")
+
+PROJECT_DIR = Path(__file__).resolve().parent.parent
+DEFAULT_MODEL_DIR = PROJECT_DIR / "models"
+
+
+class DownloadModel:
+    @classmethod
+    def download(
+        cls,
+        model_full_url: Union[str, Path],
+        save_dir: Union[str, Path, None] = None,
+        save_model_name: Optional[str] = None,
+    ) -> str:
+        if save_dir is None:
+            save_dir = DEFAULT_MODEL_DIR
+
+        save_dir.mkdir(parents=True, exist_ok=True)
+
+        if save_model_name is None:
+            save_model_name = Path(model_full_url).name
+
+        save_file_path = save_dir / save_model_name
+        if save_file_path.exists():
+            logger.debug("%s already exists", save_file_path)
+            return str(save_file_path)
+
+        try:
+            logger.info("Download %s to %s", model_full_url, save_dir)
+            file = cls.download_as_bytes_with_progress(model_full_url, save_model_name)
+            cls.save_file(save_file_path, file)
+        except Exception as exc:
+            raise DownloadModelError from exc
+        return str(save_file_path)
+
+    @staticmethod
+    def download_as_bytes_with_progress(
+        url: Union[str, Path], name: Optional[str] = None
+    ) -> bytes:
+        resp = requests.get(str(url), stream=True, allow_redirects=True, timeout=180)
+        total = int(resp.headers.get("content-length", 0))
+        bio = io.BytesIO()
+        with tqdm(
+            desc=name, total=total, unit="b", unit_scale=True, unit_divisor=1024
+        ) as pbar:
+            for chunk in resp.iter_content(chunk_size=65536):
+                pbar.update(len(chunk))
+                bio.write(chunk)
+        return bio.getvalue()
+
+    @staticmethod
+    def save_file(save_path: Union[str, Path], file: bytes):
+        with open(save_path, "wb") as f:
+            f.write(file)
+
+
+class DownloadModelError(Exception):
+    pass
diff --git a/table_cls/utils/logger.py b/table_cls/utils/logger.py
@@ -0,0 +1,21 @@
+# -*- encoding: utf-8 -*-
+# @Author: Jocker1212
+# @Contact: xinyijianggo@gmail.com
+import logging
+from functools import lru_cache
+
+
+@lru_cache(maxsize=32)
+def get_logger(name: str) -> logging.Logger:
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.DEBUG)
+
+    fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s"
+    format_str = logging.Formatter(fmt)
+
+    sh = logging.StreamHandler()
+    sh.setLevel(logging.DEBUG)
+
+    logger.addHandler(sh)
+    sh.setFormatter(format_str)
+    return logger
diff --git a/table_cls/utils/utils.py b/table_cls/utils/utils.py