From b4843c758bcf2fbe9ac777efdbb0b2af0e96a2c0 Mon Sep 17 00:00:00 2001 From: lollanboll Date: Wed, 28 Feb 2024 10:34:20 +0100 Subject: [PATCH 1/3] Suggestion on solution for bug #21 --- src/dbas/translate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dbas/translate.py b/src/dbas/translate.py index ca5cc6799..dce394f19 100644 --- a/src/dbas/translate.py +++ b/src/dbas/translate.py @@ -28,6 +28,8 @@ def translate_text(text, dest_language='en'): else: # Translate the accumulated text translated_text += translator.translate(temp_text, dest=dest_language).text + " " + translated_text = translated_text.replace('. ', '.').replace('.', '. ') + # Start accumulating sentences again temp_text = sentence From 84e7d98f2f98cddc158446697102efc22372dc53 Mon Sep 17 00:00:00 2001 From: lollanboll Date: Wed, 28 Feb 2024 10:36:33 +0100 Subject: [PATCH 2/3] Suggestion on solution for bug #21 --- src/dbas/translate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dbas/translate.py b/src/dbas/translate.py index dce394f19..04b64ccd4 100644 --- a/src/dbas/translate.py +++ b/src/dbas/translate.py @@ -1,4 +1,3 @@ - # pip install googletrans==4.0.0-rc1 nltk from googletrans import Translator import nltk From 0df9b9260a1683206c053b49e51db7fc2a05b517 Mon Sep 17 00:00:00 2001 From: lollanboll Date: Sun, 10 Mar 2024 11:31:52 +0100 Subject: [PATCH 3/3] Fix bug #21 --- src/dbas/firebase_migrations/translate.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/dbas/firebase_migrations/translate.py b/src/dbas/firebase_migrations/translate.py index b458ac54d..75b932353 100644 --- a/src/dbas/firebase_migrations/translate.py +++ b/src/dbas/firebase_migrations/translate.py @@ -6,6 +6,7 @@ # Download the Punkt tokenizer models to split scentances nltk.download('punkt') from nltk.tokenize import sent_tokenize +import re # Max characters that can be translated at a time MAX_CHARS = 4900 @@ -42,6 +43,8 @@ def translate_text(text, dest_language='en'): if temp_text: try: translated_text += translator.translate(temp_text, dest=dest_language).text + translated_text = re.sub(r"\.([A-Z])", r". \1", translated_text) # Ensure single space after period before uppercase letter + translated_text = re.sub(r'(?<=\d)\s*\.\s*(?=\d)', '.', translated_text) # remove spaces inbetween " 4. 0 gram" for example except Exception as e: print(f"Error translating text: {str(e)}")