ffmemes · ohld · Dec 1, 2024 · Nov 30, 2024 · Nov 30, 2024 · Dec 1, 2024
diff --git a/.env.example b/.env.example
@@ -37,6 +37,7 @@ HIKERAPI_TOKEN=asfdg  # instagram parsing
 
 # ocr
 MYSTIC_TOKEN=444
+MODAL_ENDPOINT=http://your-endpoint.modal.run
 
 # optional config
 REDIS_MAX_CONNECTIONS=128

diff --git a/src/config.py b/src/config.py
@@ -37,6 +37,7 @@ class Config(BaseSettings):
     HIKERAPI_TOKEN: str | None = None
 
     MYSTIC_TOKEN: str | None = None
+    MODAL_ENDPOINT: str | None = None
 
     REDIS_MAX_CONNECTIONS: int = 128
 

diff --git a/src/flows/storage/memes.py b/src/flows/storage/memes.py
@@ -12,7 +12,7 @@
     etl_memes_from_raw_telegram_posts,
     etl_memes_from_raw_vk_posts,
 )
-from src.storage.ocr.mystic import ocr_content
+from src.storage.ocr.modal import ocr_content
 from src.storage.schemas import OcrResult
 from src.storage.service import (
     find_meme_duplicate,

diff --git a/src/storage/ocr/modal.py b/src/storage/ocr/modal.py
@@ -0,0 +1,59 @@
+from typing import Any
+
+import httpx
+
+from src.config import settings
+from src.storage.schemas import OcrResult
+
+HEADERS = {"accept": "application/json", "Content-Type": "application/octet-stream"}
+
+
+async def ocr_modal(
+    file_content: bytes,
+    language: str = "en",
+    endpoint: str = settings.MODAL_ENDPOINT,
+) -> dict[str, Any]:
+    async with httpx.AsyncClient() as client:
+        response = await client.post(
+            endpoint,
+            params={"lang": language},
+            headers=HEADERS,
+            data=file_content,
+        )
+        response.raise_for_status()
+        return response.json()
+
+
+async def ocr_content(content: bytes, language: str = "ru") -> OcrResult | None:
+    try:
+        ocr_result = await ocr_modal(content, language)
+    except Exception as e:
+        print(f"Modal OCR error: {e}")
+        return None
+
+    if ocr_result is None:
+        print(f"Modal OCR returned no result: {ocr_result}.")
+        return None
+
+    try:
+        # Ensure raw_result is a dictionary
+        if isinstance(ocr_result, list):
+            raw_result = {"outputs": [{"value": ocr_result}]}
+        else:
+            raw_result = ocr_result
+
+        # Extract text from list structure
+        if isinstance(ocr_result, list):
+            full_text = " ".join([r[1] for r in ocr_result if len(r) > 1])
+        else:
+            rows = ocr_result.get("outputs", [{}])[0].get("value", [])
+            full_text = " ".join([r[1] for r in rows if len(r) > 1])
+
+        return OcrResult(
+            model="easyocr",
+            text=full_text,
+            raw_result=raw_result,
+        )
+    except Exception as e:
+        print(f"Error parsing OCR result: {e}")
+        return None
diff --git a/src/tgbot/handlers/upload/service.py b/src/tgbot/handlers/upload/service.py
@@ -78,7 +78,10 @@ async def create_meme_from_meme_raw_upload(
         else meme_upload["date"]
     )
 
-    if meme_upload["media"].get("duration") and meme_upload["media"].get("duration") > 0:
+    if (
+        meme_upload["media"].get("duration")
+        and meme_upload["media"].get("duration") > 0
+    ):
         meme_type = MemeType.VIDEO
     else:
         meme_type = MemeType.IMAGE