generated from zhanymkanov/fastapi_production_template
-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
A ~working Modal.com OCR implementation. Uses modal.py, the .env fiel…
…d is added to the config
- Loading branch information
1 parent
270f982
commit 9722c4d
Showing
4 changed files
with
82 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import uuid | ||
from typing import Any | ||
|
||
import httpx | ||
|
||
from src.config import settings | ||
from src.storage.schemas import OcrResult | ||
|
||
HEADERS = { | ||
"accept": "application/json", | ||
"Content-Type": "application/octet-stream" | ||
} | ||
|
||
async def ocr_modal( | ||
file_content: bytes, | ||
language: str = "en", | ||
endpoint: str = settings.MODAL_ENDPOINT, | ||
) -> dict[str, Any]: | ||
|
||
async with httpx.AsyncClient() as client: | ||
response = await client.post( | ||
endpoint, | ||
params={"lang": language}, | ||
headers=HEADERS, | ||
data=file_content, | ||
) | ||
response.raise_for_status() | ||
return response.json() | ||
|
||
|
||
async def ocr_content(content: bytes, language: str = "ru") -> OcrResult | None: | ||
try: | ||
ocr_result = await ocr_modal(content, language) | ||
except Exception as e: | ||
print(f"Modal OCR error: {e}") | ||
return None | ||
|
||
if ocr_result is None: | ||
print(f"Modal OCR returned no result: {ocr_result}.") | ||
return None | ||
|
||
try: | ||
# Ensure raw_result is a dictionary | ||
if isinstance(ocr_result, list): | ||
raw_result = {"outputs": [{"value": ocr_result}]} | ||
else: | ||
raw_result = ocr_result | ||
|
||
# Extract text from list structure | ||
if isinstance(ocr_result, list): | ||
full_text = " ".join([r[1] for r in ocr_result if len(r) > 1]) | ||
else: | ||
rows = ocr_result.get("outputs", [{}])[0].get("value", []) | ||
full_text = " ".join([r[1] for r in rows if len(r) > 1]) | ||
|
||
return OcrResult( | ||
model="easyocr", | ||
text=full_text, | ||
raw_result=raw_result, | ||
) | ||
except Exception as e: | ||
print(f"Error parsing OCR result: {e}") | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from pathlib import Path | ||
|
||
with open("test.jpg", "rb") as image_file: | ||
img = image_file.read() | ||
|
||
from modal import ocr_content | ||
|
||
|
||
async def test_ocr_content(): | ||
ocr_result = await ocr_content(img, "en") | ||
|
||
return ocr_result | ||
|
||
|
||
if __name__ == "__main__": | ||
import asyncio | ||
|
||
asyncio.run(test_ocr_content(), debug=True) |