Skip to content

Commit

Permalink
A ~working Modal.com OCR implementation. Uses modal.py, the .env fiel…
Browse files Browse the repository at this point in the history
…d is added to the config
  • Loading branch information
zakhar-kogan committed Nov 30, 2024
1 parent 270f982 commit 9722c4d
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class Config(BaseSettings):
VK_TOKEN: str | None = None

MYSTIC_TOKEN: str | None = None
MODAL_ENDPOINT: str | None = None

# @model_validator(mode="after")
# def validate_sentry_non_local(self) -> "Config":
Expand Down
63 changes: 63 additions & 0 deletions src/storage/ocr/modal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import uuid
from typing import Any

import httpx

from src.config import settings
from src.storage.schemas import OcrResult

HEADERS = {
"accept": "application/json",
"Content-Type": "application/octet-stream"
}

async def ocr_modal(
file_content: bytes,
language: str = "en",
endpoint: str = settings.MODAL_ENDPOINT,
) -> dict[str, Any]:

async with httpx.AsyncClient() as client:
response = await client.post(
endpoint,
params={"lang": language},
headers=HEADERS,
data=file_content,
)
response.raise_for_status()
return response.json()


async def ocr_content(content: bytes, language: str = "ru") -> OcrResult | None:
try:
ocr_result = await ocr_modal(content, language)
except Exception as e:
print(f"Modal OCR error: {e}")
return None

if ocr_result is None:
print(f"Modal OCR returned no result: {ocr_result}.")
return None

try:
# Ensure raw_result is a dictionary
if isinstance(ocr_result, list):
raw_result = {"outputs": [{"value": ocr_result}]}
else:
raw_result = ocr_result

# Extract text from list structure
if isinstance(ocr_result, list):
full_text = " ".join([r[1] for r in ocr_result if len(r) > 1])
else:
rows = ocr_result.get("outputs", [{}])[0].get("value", [])
full_text = " ".join([r[1] for r in rows if len(r) > 1])

return OcrResult(
model="easyocr",
text=full_text,
raw_result=raw_result,
)
except Exception as e:
print(f"Error parsing OCR result: {e}")
return None
18 changes: 18 additions & 0 deletions src/storage/ocr/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from pathlib import Path

with open("test.jpg", "rb") as image_file:
img = image_file.read()

from modal import ocr_content


async def test_ocr_content():
ocr_result = await ocr_content(img, "en")

return ocr_result


if __name__ == "__main__":
import asyncio

asyncio.run(test_ocr_content(), debug=True)
Binary file added test.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 9722c4d

Please sign in to comment.