From 0df9b9260a1683206c053b49e51db7fc2a05b517 Mon Sep 17 00:00:00 2001 From: lollanboll Date: Sun, 10 Mar 2024 11:31:52 +0100 Subject: [PATCH] Fix bug #21 --- src/dbas/firebase_migrations/translate.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/dbas/firebase_migrations/translate.py b/src/dbas/firebase_migrations/translate.py index b458ac54d..75b932353 100644 --- a/src/dbas/firebase_migrations/translate.py +++ b/src/dbas/firebase_migrations/translate.py @@ -6,6 +6,7 @@ # Download the Punkt tokenizer models to split scentances nltk.download('punkt') from nltk.tokenize import sent_tokenize +import re # Max characters that can be translated at a time MAX_CHARS = 4900 @@ -42,6 +43,8 @@ def translate_text(text, dest_language='en'): if temp_text: try: translated_text += translator.translate(temp_text, dest=dest_language).text + translated_text = re.sub(r"\.([A-Z])", r". \1", translated_text) # Ensure single space after period before uppercase letter + translated_text = re.sub(r'(?<=\d)\s*\.\s*(?=\d)', '.', translated_text) # remove spaces inbetween " 4. 0 gram" for example except Exception as e: print(f"Error translating text: {str(e)}")