Merge pull request #121 from VikParuchuri/dev

VikParuchuri · web-flow · commit c5f5e779435e · 2024-05-28T14:43:55.000-07:00
Fix rotate and copy bugs
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "surya-ocr"
-version = "0.4.11"
+version = "0.4.12"
 description = "OCR, layout, reading order, and line detection in 90+ languages"
 authors = ["Vik Paruchuri <vik.paruchuri@gmail.com>"]
 readme = "README.md"
diff --git a/surya/detection.py b/surya/detection.py
@@ -30,6 +30,8 @@ def batch_detection(images: List, model: SegformerForRegressionMask, processor,
         batch_size = get_batch_size()
     heatmap_count = model.config.num_labels
 
+    images = [image.convert("RGB") for image in images]  # also copies the images
+
     orig_sizes = [image.size for image in images]
     splits_per_image = [get_total_splits(size, processor) for size in orig_sizes]
 
diff --git a/surya/model/recognition/processor.py b/surya/model/recognition/processor.py
@@ -35,6 +35,7 @@ def numpy_resize(cls, image: np.ndarray, size, interpolation=cv2.INTER_LANCZOS4)
         max_width, max_height = size["width"], size["height"]
 
         if (height == max_height and width <= max_width) or (width == max_width and height <= max_height):
+            image = image.transpose(2, 0, 1)
             return image
 
         scale = min(max_width / width, max_height / height)
diff --git a/surya/ocr.py b/surya/ocr.py
@@ -9,10 +9,12 @@
 
 
 def run_recognition(images: List[Image.Image], langs: List[List[str]], rec_model, rec_processor, bboxes: List[List[List[int]]] = None, polygons: List[List[List[List[int]]]] = None, batch_size=None) -> List[OCRResult]:
-    images = convert_if_not_rgb(images)
     # Polygons need to be in corner format - [[x1, y1], [x2, y2], [x3, y3], [x4, y4]], bboxes in [x1, y1, x2, y2] format
     assert bboxes is not None or polygons is not None
     assert len(images) == len(langs), "You need to pass in one list of languages for each image"
+
+    images = convert_if_not_rgb(images)
+
     slice_map = []
     all_slices = []
     all_langs = []
diff --git a/surya/ordering.py b/surya/ordering.py
@@ -38,7 +38,7 @@ def batch_ordering(images: List, bboxes: List[List[List[float]]], model: OrderVi
     if batch_size is None:
         batch_size = get_batch_size()
 
-    images = convert_if_not_rgb(images)
+    images = [image.convert("RGB") for image in images] # also copies the images
 
     output_order = []
     for i in tqdm(range(0, len(images), batch_size), desc="Finding reading order"):
diff --git a/surya/recognition.py b/surya/recognition.py
@@ -29,7 +29,7 @@ def batch_recognition(images: List, languages: List[List[str]], model, processor
     for l in languages:
         assert len(l) <= settings.RECOGNITION_MAX_LANGS, f"OCR only supports up to {settings.RECOGNITION_MAX_LANGS} languages per image, you passed {l}."
 
-    images = convert_if_not_rgb(images)
+    images = [image.convert("RGB") for image in images] # also copies the images
     if batch_size is None:
         batch_size = get_batch_size()