Skip to content

Commit c5f5e77

Browse files
authored
Merge pull request #121 from VikParuchuri/dev
Fix rotate and copy bugs
2 parents 53135d0 + 4485273 commit c5f5e77

File tree

7 files changed

+272
-261
lines changed

7 files changed

+272
-261
lines changed

Diff for: poetry.lock

+263-257
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "surya-ocr"
3-
version = "0.4.11"
3+
version = "0.4.12"
44
description = "OCR, layout, reading order, and line detection in 90+ languages"
55
authors = ["Vik Paruchuri <[email protected]>"]
66
readme = "README.md"

Diff for: surya/detection.py

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ def batch_detection(images: List, model: SegformerForRegressionMask, processor,
3030
batch_size = get_batch_size()
3131
heatmap_count = model.config.num_labels
3232

33+
images = [image.convert("RGB") for image in images] # also copies the images
34+
3335
orig_sizes = [image.size for image in images]
3436
splits_per_image = [get_total_splits(size, processor) for size in orig_sizes]
3537

Diff for: surya/model/recognition/processor.py

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def numpy_resize(cls, image: np.ndarray, size, interpolation=cv2.INTER_LANCZOS4)
3535
max_width, max_height = size["width"], size["height"]
3636

3737
if (height == max_height and width <= max_width) or (width == max_width and height <= max_height):
38+
image = image.transpose(2, 0, 1)
3839
return image
3940

4041
scale = min(max_width / width, max_height / height)

Diff for: surya/ocr.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99

1010

1111
def run_recognition(images: List[Image.Image], langs: List[List[str]], rec_model, rec_processor, bboxes: List[List[List[int]]] = None, polygons: List[List[List[List[int]]]] = None, batch_size=None) -> List[OCRResult]:
12-
images = convert_if_not_rgb(images)
1312
# Polygons need to be in corner format - [[x1, y1], [x2, y2], [x3, y3], [x4, y4]], bboxes in [x1, y1, x2, y2] format
1413
assert bboxes is not None or polygons is not None
1514
assert len(images) == len(langs), "You need to pass in one list of languages for each image"
15+
16+
images = convert_if_not_rgb(images)
17+
1618
slice_map = []
1719
all_slices = []
1820
all_langs = []

Diff for: surya/ordering.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def batch_ordering(images: List, bboxes: List[List[List[float]]], model: OrderVi
3838
if batch_size is None:
3939
batch_size = get_batch_size()
4040

41-
images = convert_if_not_rgb(images)
41+
images = [image.convert("RGB") for image in images] # also copies the images
4242

4343
output_order = []
4444
for i in tqdm(range(0, len(images), batch_size), desc="Finding reading order"):

Diff for: surya/recognition.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def batch_recognition(images: List, languages: List[List[str]], model, processor
2929
for l in languages:
3030
assert len(l) <= settings.RECOGNITION_MAX_LANGS, f"OCR only supports up to {settings.RECOGNITION_MAX_LANGS} languages per image, you passed {l}."
3131

32-
images = convert_if_not_rgb(images)
32+
images = [image.convert("RGB") for image in images] # also copies the images
3333
if batch_size is None:
3434
batch_size = get_batch_size()
3535

0 commit comments

Comments
 (0)