Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modal OCR beta #91

Merged
merged 3 commits into from
Dec 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ HIKERAPI_TOKEN=asfdg # instagram parsing

# ocr
MYSTIC_TOKEN=444
MODAL_ENDPOINT=http://your-endpoint.modal.run

# optional config
REDIS_MAX_CONNECTIONS=128
Expand Down
1 change: 1 addition & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Config(BaseSettings):
HIKERAPI_TOKEN: str | None = None

MYSTIC_TOKEN: str | None = None
MODAL_ENDPOINT: str | None = None

REDIS_MAX_CONNECTIONS: int = 128

Expand Down
2 changes: 1 addition & 1 deletion src/flows/storage/memes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
etl_memes_from_raw_telegram_posts,
etl_memes_from_raw_vk_posts,
)
from src.storage.ocr.mystic import ocr_content
from src.storage.ocr.modal import ocr_content
from src.storage.schemas import OcrResult
from src.storage.service import (
find_meme_duplicate,
Expand Down
59 changes: 59 additions & 0 deletions src/storage/ocr/modal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from typing import Any

import httpx

from src.config import settings
from src.storage.schemas import OcrResult

HEADERS = {"accept": "application/json", "Content-Type": "application/octet-stream"}


async def ocr_modal(
file_content: bytes,
language: str = "en",
endpoint: str = settings.MODAL_ENDPOINT,
) -> dict[str, Any]:
async with httpx.AsyncClient() as client:
response = await client.post(
endpoint,
params={"lang": language},
headers=HEADERS,
data=file_content,
)
response.raise_for_status()
return response.json()


async def ocr_content(content: bytes, language: str = "ru") -> OcrResult | None:
try:
ocr_result = await ocr_modal(content, language)
except Exception as e:
print(f"Modal OCR error: {e}")
return None

if ocr_result is None:
print(f"Modal OCR returned no result: {ocr_result}.")
return None

try:
# Ensure raw_result is a dictionary
if isinstance(ocr_result, list):
raw_result = {"outputs": [{"value": ocr_result}]}
else:
raw_result = ocr_result

# Extract text from list structure
if isinstance(ocr_result, list):
full_text = " ".join([r[1] for r in ocr_result if len(r) > 1])
else:
rows = ocr_result.get("outputs", [{}])[0].get("value", [])
full_text = " ".join([r[1] for r in rows if len(r) > 1])

return OcrResult(
model="easyocr",
text=full_text,
raw_result=raw_result,
)
except Exception as e:
print(f"Error parsing OCR result: {e}")
return None
5 changes: 4 additions & 1 deletion src/tgbot/handlers/upload/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ async def create_meme_from_meme_raw_upload(
else meme_upload["date"]
)

if meme_upload["media"].get("duration") and meme_upload["media"].get("duration") > 0:
if (
meme_upload["media"].get("duration")
and meme_upload["media"].get("duration") > 0
):
meme_type = MemeType.VIDEO
else:
meme_type = MemeType.IMAGE
Expand Down
Loading