Update LoadImage

SWHL · SWHL · commit d328ffcc4f86 · 2024-03-10T14:19:11.000+08:00
diff --git a/lineless_table_rec/main.py b/lineless_table_rec/main.py
@@ -6,14 +6,14 @@
 import time
 import traceback
 from pathlib import Path
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Tuple, Union
 
 import cv2
 import numpy as np
 from rapidocr_onnxruntime import RapidOCR
 
 from .lineless_table_process import DetProcess, get_affine_transform_upper_left
-from .utils import LoadImage, OrtInferSession
+from .utils import InputType, LoadImage, OrtInferSession
 from .utils_table_recover import (
     get_rotate_crop_image,
     match_ocr_cell,
@@ -29,6 +29,8 @@
 class LinelessTableRecognition:
     def __init__(
         self,
+        detect_model_path: Union[str, Path] = detect_model_path,
+        process_model_path: Union[str, Path] = process_model_path,
     ):
         self.mean = np.array([0.408, 0.447, 0.470], dtype=np.float32).reshape(1, 1, 3)
         self.std = np.array([0.289, 0.274, 0.278], dtype=np.float32).reshape(1, 1, 3)
@@ -43,7 +45,7 @@ def __init__(
         self.det_process = DetProcess()
         self.ocr = RapidOCR()
 
-    def __call__(self, content: Dict[str, Any]) -> str:
+    def __call__(self, content: InputType) -> str:
         ss = time.perf_counter()
         img = self.load_img(content)
 
@@ -92,8 +94,8 @@ def preprocess(self, img: np.ndarray) -> Dict[str, Any]:
         }
         return {"img": images, "meta": meta}
 
-    def infer(self, input: Dict[str, Any]) -> Tuple[np.ndarray, np.ndarray]:
-        hm, st, wh, ax, cr, reg = self.det_session([input["img"]])
+    def infer(self, input_content: Dict[str, Any]) -> Tuple[np.ndarray, np.ndarray]:
+        hm, st, wh, ax, cr, reg = self.det_session([input_content["img"]])
         output = {
             "hm": hm,
             "st": st,
@@ -103,7 +105,7 @@ def infer(self, input: Dict[str, Any]) -> Tuple[np.ndarray, np.ndarray]:
             "reg": reg,
         }
         slct_logi_feat, slct_dets_feat, slct_output_dets = self.det_process(
-            output, input["meta"]
+            output, input_content["meta"]
         )
 
         slct_output_dets = slct_output_dets.reshape(-1, 4, 2)
diff --git a/lineless_table_rec/utils.py b/lineless_table_rec/utils.py
@@ -10,7 +10,7 @@
 from PIL import Image, UnidentifiedImageError
 
 root_dir = Path(__file__).resolve().parent
-InputType = Union[str, np.ndarray, bytes, Path]
+InputType = Union[str, np.ndarray, bytes, Path, Image.Image]
 
 
 class OrtInferSession:
@@ -91,8 +91,9 @@ def __call__(self, img: InputType) -> np.ndarray:
                 f"The img type {type(img)} does not in {InputType.__args__}"
             )
 
+        origin_img_type = type(img)
         img = self.load_img(img)
-        img = self.convert_img(img)
+        img = self.convert_img(img, origin_img_type)
         return img
 
     def load_img(self, img: InputType) -> np.ndarray:
@@ -111,9 +112,12 @@ def load_img(self, img: InputType) -> np.ndarray:
         if isinstance(img, np.ndarray):
             return img
 
+        if isinstance(img, Image.Image):
+            return np.array(img)
+
         raise LoadImageError(f"{type(img)} is not supported!")
 
-    def convert_img(self, img: np.ndarray):
+    def convert_img(self, img: np.ndarray, origin_img_type):
         if img.ndim == 2:
             return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
 
@@ -125,31 +129,20 @@ def convert_img(self, img: np.ndarray):
             if channel == 2:
                 return self.cvt_two_to_three(img)
 
+            if channel == 3:
+                if issubclass(origin_img_type, (str, Path, bytes, Image.Image)):
+                    return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+                return img
+
             if channel == 4:
                 return self.cvt_four_to_three(img)
 
-            if channel == 3:
-                return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-
             raise LoadImageError(
                 f"The channel({channel}) of the img is not in [1, 2, 3, 4]"
             )
 
         raise LoadImageError(f"The ndim({img.ndim}) of the img is not in [2, 3]")
 
-    @staticmethod
-    def cvt_four_to_three(img: np.ndarray) -> np.ndarray:
-        """RGBA → BGR"""
-        r, g, b, a = cv2.split(img)
-        new_img = cv2.merge((b, g, r))
-
-        not_a = cv2.bitwise_not(a)
-        not_a = cv2.cvtColor(not_a, cv2.COLOR_GRAY2BGR)
-
-        new_img = cv2.bitwise_and(new_img, new_img, mask=a)
-        new_img = cv2.add(new_img, not_a)
-        return new_img
-
     @staticmethod
     def cvt_two_to_three(img: np.ndarray) -> np.ndarray:
         """gray + alpha → BGR"""
@@ -164,6 +157,19 @@ def cvt_two_to_three(img: np.ndarray) -> np.ndarray:
         new_img = cv2.add(new_img, not_a)
         return new_img
 
+    @staticmethod
+    def cvt_four_to_three(img: np.ndarray) -> np.ndarray:
+        """RGBA → BGR"""
+        r, g, b, a = cv2.split(img)
+        new_img = cv2.merge((b, g, r))
+
+        not_a = cv2.bitwise_not(a)
+        not_a = cv2.cvtColor(not_a, cv2.COLOR_GRAY2BGR)
+
+        new_img = cv2.bitwise_and(new_img, new_img, mask=a)
+        new_img = cv2.add(new_img, not_a)
+        return new_img
+
     @staticmethod
     def verify_exist(file_path: Union[str, Path]):
         if not Path(file_path).exists():