From 04a67e20ba8ca3d6ef5603c3c4f02513c5547b6b Mon Sep 17 00:00:00 2001 From: benoit74 Date: Fri, 2 Aug 2024 13:54:33 +0000 Subject: [PATCH] Replace locale Python package by babel --- .github/workflows/Tests.yaml | 3 -- CHANGELOG.md | 5 ++ README.md | 3 -- src/zimscraperlib/i18n.py | 95 +++++++++++++++++++++++------------- tests/i18n/test_i18n.py | 35 ++++++++++--- 5 files changed, 94 insertions(+), 47 deletions(-) diff --git a/.github/workflows/Tests.yaml b/.github/workflows/Tests.yaml index 3e4de467..0fd2de44 100644 --- a/.github/workflows/Tests.yaml +++ b/.github/workflows/Tests.yaml @@ -20,9 +20,6 @@ jobs: - name: install ffmpeg and gifsicle run: sudo apt update && sudo apt install ffmpeg gifsicle - - name: add required locales for tests - run: sudo locale-gen fr_FR.UTF-8 pt_BR.UTF-8 && sudo update-locale - - name: Set up Python ${{ matrix.python }} uses: actions/setup-python@v4 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c833e6b..179abcea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add svg2png image conversion function #113 - Add `conversion.convert_svg2png` image conversion function + support for SVG in `probing.format_for` #113 - Add `i18n.Lang` class used as typed result of i18n operations #151 +- Add `i18n.UnknownLocaleError` exception, raised when the locale passed to `i18n.setlocale` is unknown #134 ## Changed @@ -34,6 +35,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - When a type issue arises in metadata checks, wrong value type is displayed in exception - **BREAKING** `i18n.get_language_details()`, `i18n.get_iso_lang_data()`, `i18n.find_language_names()` and `i18n.update_with_macro` now process / return a new typed `Lang` class #151 - **BREAKING** Rename `i18.NotFound` to `i18n.NotFoundError` +- Replace `locale` Python package by `babel` to read translation files #134 +- **BREAKING** Return value of `i18n.setlocale` is now either `language[_territory]` (it was `language_territory.codeset`) #134 +- **BREAKING** Replace `i18n.Locale` by `i18n.Translator` (and use instance methods instead of class methods, to allow support of multiple translators at once) #134 +- Replace `i18n.Locale` by `i18n.Translator` class #134 ### Fixed diff --git a/README.md b/README.md index 903829c8..835832d2 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,6 @@ zimscraperlib>=1.1,<1.2 * Pillow * FFmpeg * gifsicle (>=1.92) -* locale (with at least `fr_FR.UTF-8` and `pt_BR.utf8` locales installed for tests to pass) ## macOS @@ -52,8 +51,6 @@ sudo apt install libmagic1 wget ffmpeg \ apk add ffmpeg gifsicle libmagic wget libjpeg ``` -**Nota:** Alpine does not have `locale` support, so i18n features do not work on Alpine, see https://github.com/openzim/python-scraperlib/issues/134 ; there is one corresponding test which is failing. - # Contribution This project adheres to openZIM's [Contribution Guidelines](https://github.com/openzim/overview/wiki/Contributing). diff --git a/src/zimscraperlib/i18n.py b/src/zimscraperlib/i18n.py index 333739d4..09d60f77 100644 --- a/src/zimscraperlib/i18n.py +++ b/src/zimscraperlib/i18n.py @@ -3,12 +3,12 @@ from __future__ import annotations -import gettext -import locale import pathlib import re import babel +import babel.core +import babel.support import iso639 import iso639.exceptions @@ -19,50 +19,77 @@ class NotFoundError(ValueError): pass -class Locale: - short = "en" - name = "en_US.UTF-8" - locale_dir = None - domain = "messages" - translation = gettext.translation("messages", fallback=True) +class UnknownLocaleError(ValueError): + """Exception raised when the locale to used in not known""" - @classmethod - def setup(cls, locale_dir: pathlib.Path, locale_name: str): - cls.name = locale_name - cls.locale_dir = str(locale_dir) + pass + + +class Translator: + """Translate messages to a given locale""" - if "." in locale_name: - cls.lang, cls.encoding = locale_name.split(".") + def setlocale( + self, root_dir: pathlib.Path, locale_name: str, locale_subdir: str = "locale" + ) -> str: + """Load translation files for for a given locale. Call this early. + + root_dir: path where a `locale_subdir` folder exist + locale_name: name of the locale to load + locale_subdir: subfolder of `root_dir` containing translations ("locale" by + default) + """ + locale_dir = root_dir / locale_subdir + try: + locale = babel.Locale.parse(locale_name) + except babel.core.UnknownLocaleError as exc: + raise UnknownLocaleError("Unknown locale") from exc + self.translations = babel.support.Translations.load(locale_dir, locale) + if locale.language != "en" and not (self.translations._catalog): + raise RuntimeError( + f"Failed to find language files for {locale_name} " + f"({locale.language}) in {locale_dir}" + ) + if locale.territory: + return f"{locale.language}_{locale.territory}" else: - cls.lang, cls.encoding = locale_name, "UTF-8" + return locale.language + + def _(self, message: str) -> str: + """Translate a message - computed = locale.setlocale(locale.LC_ALL, (cls.lang, cls.encoding)) + Nota: setlocale must have been called prior to using this function + """ + if not hasattr(self, "translations"): + raise RuntimeError( + "Translation not initialized, you must call setlocale first" + ) + return self.translations.gettext(message) - gettext.bindtextdomain(cls.domain, cls.locale_dir) - gettext.textdomain(cls.domain) - cls.translation = gettext.translation( - cls.domain, cls.locale_dir, languages=[cls.lang], fallback=True - ) - return computed +DEFAULT_TRANSLATOR = Translator() def _(text: str) -> str: - """translates text according to setup'd locale""" - return Locale.translation.gettext(text) + """Translate a message + Nota: setlocale must have been called prior to using this function + """ + return DEFAULT_TRANSLATOR._(text) -def setlocale(root_dir: pathlib.Path, locale_name: str): - """set the desired locale for gettext. - call this early""" - try: - return Locale.setup(root_dir / "locale", locale_name) - except locale.Error as exc: - raise locale.Error( - f"Failed to setup '{locale_name}' locale. If this locale is not installed " - "on this system, please install it first." - ) from exc +def setlocale( + root_dir: pathlib.Path, locale_name: str, locale_subdir: str = "locale" +) -> str: + """Load translation files for for a given locale. Call this early. + + root_dir: path where a `locale_subdir` folder exist + locale_name: name of the locale to load + locale_subdir: subfolder of `root_dir` containing translations ("locale" by default) + """ + + return DEFAULT_TRANSLATOR.setlocale( + root_dir=root_dir, locale_name=locale_name, locale_subdir=locale_subdir + ) class Lang(dict): diff --git a/tests/i18n/test_i18n.py b/tests/i18n/test_i18n.py index bbff772a..303b750f 100644 --- a/tests/i18n/test_i18n.py +++ b/tests/i18n/test_i18n.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 # vim: ai ts=4 sts=4 et sw=4 nu -import locale import pathlib from unittest.mock import Mock @@ -10,6 +9,8 @@ from zimscraperlib.i18n import ( Lang, NotFoundError, + Translator, + UnknownLocaleError, _, find_language_names, get_language_details, @@ -17,17 +18,37 @@ ) +@pytest.fixture() +def translator() -> Translator: + """Fixture to not reuse default translator across tests and test edge cases""" + return Translator() + + @pytest.mark.parametrize( "code,expected", - [("en", "en_US.UTF-8"), ("en_us", "en_US.UTF-8"), ("en.utf8", "en_US.UTF-8")], + [("en", "en"), ("en_us", "en_US"), ("en.utf8", "en")], ) -def test_setlocale(tmp_path, code, expected): - assert setlocale(tmp_path, code) == expected +def test_setlocale(tmp_path, translator, code, expected): + assert translator.setlocale(tmp_path, code) == expected + + +def test_setlocale_wrong_path(tmp_path, translator): + with pytest.raises(RuntimeError, match="Failed to find language files for"): + translator.setlocale(tmp_path, "fra") # wrong path supplied + + +def test_setlocale_wrong_lang(translator): + with pytest.raises(UnknownLocaleError, match="Unknown locale"): + translator.setlocale( + pathlib.Path(__file__).parent, "qqq" # wrong lang supplied + ) -def test_selocale_unsupported(tmp_path): - with pytest.raises(locale.Error): - setlocale(tmp_path, "bam") +def test_setlocale_not_called(translator): + with pytest.raises( + RuntimeError, match="Translation not initialized, you must call setlocale first" + ): + translator._("Hello World!") @pytest.mark.parametrize(