Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
lollanboll committed Mar 10, 2024
1 parent 84e7d98 commit 0df9b92
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/dbas/firebase_migrations/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# Download the Punkt tokenizer models to split scentances
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
import re

# Max characters that can be translated at a time
MAX_CHARS = 4900
Expand Down Expand Up @@ -42,6 +43,8 @@ def translate_text(text, dest_language='en'):
if temp_text:
try:
translated_text += translator.translate(temp_text, dest=dest_language).text
translated_text = re.sub(r"\.([A-Z])", r". \1", translated_text) # Ensure single space after period before uppercase letter
translated_text = re.sub(r'(?<=\d)\s*\.\s*(?=\d)', '.', translated_text) # remove spaces inbetween " 4. 0 gram" for example
except Exception as e:
print(f"Error translating text: {str(e)}")

Expand Down

0 comments on commit 0df9b92

Please sign in to comment.