Skip to content

Commit b1a432a

Browse files
authored
Merge pull request #183 from openzim/i18_typing
Process / return a new typed Lang class in i18n methods
2 parents 0573638 + ac9249c commit b1a432a

File tree

3 files changed

+139
-30
lines changed

3 files changed

+139
-30
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1919
- New `creator.Creator.convert_and_check_metadata` to convert metadata to bytes or str for known use cases and check proper type is passed to libzim
2020
- Add svg2png image conversion function #113
2121
- Add `conversion.convert_svg2png` image conversion function + support for SVG in `probing.format_for` #113
22+
- Add `i18n.Lang` class used as typed result of i18n operations #151
2223

2324
## Changed
2425

@@ -31,6 +32,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3132
- **BREAKING** `creator.Creator.add_metadata` and `creator.Creator.validate_metadata` now only accepts `bytes | str` as value (it must have been converted before call)
3233
- **BREAKING** second argument of `creator.Creator.add_metadata` has been renamed to `value` instead of `content` to align with other methods
3334
- When a type issue arises in metadata checks, wrong value type is displayed in exception
35+
- **BREAKING** `i18n.get_language_details()`, `i18n.get_iso_lang_data()`, `i18n.find_language_names()` and `i18n.update_with_macro` now process / return a new typed `Lang` class #151
36+
- **BREAKING** Rename `i18.NotFound` to `i18n.NotFoundError`
3437

3538
### Fixed
3639

src/zimscraperlib/i18n.py

+77-29
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,63 @@ def setlocale(root_dir: pathlib.Path, locale_name: str):
6565
) from exc
6666

6767

68-
def get_iso_lang_data(lang: str) -> tuple[dict, dict | None]:
69-
"""ISO-639-x languages details for lang. Raises NotFound
68+
class Lang(dict):
7069

71-
Included keys: iso-639-1, iso-639-2b, iso-639-2t, iso-639-3, iso-639-5
72-
english, iso_types
70+
@property
71+
def iso_639_1(self) -> str | None:
72+
"""ISO-639-1 language code"""
73+
return self["iso-639-1"]
7374

74-
See get_language_details() for details"""
75+
@property
76+
def iso_639_2b(self) -> str | None:
77+
"""ISO-639-2b language code"""
78+
return self["iso-639-2b"]
79+
80+
@property
81+
def iso_639_2t(self) -> str | None:
82+
"""ISO-639-2t language code"""
83+
return self["iso-639-2t"]
84+
85+
@property
86+
def iso_639_3(self) -> str | None:
87+
"""ISO-639-3 language code"""
88+
return self["iso-639-3"]
89+
90+
@property
91+
def iso_639_5(self) -> str | None:
92+
"""ISO-639-5 language code"""
93+
return self["iso-639-5"]
94+
95+
@property
96+
def english(self) -> str:
97+
"""language name in English"""
98+
return self["english"]
99+
100+
@property
101+
def native(self) -> str:
102+
"""language name in native language"""
103+
return self["native"]
104+
105+
@property
106+
def iso_types(self) -> list[str]:
107+
"""list of supported iso types"""
108+
return self["iso_types"]
109+
110+
@property
111+
def query(self) -> list[str]:
112+
"""Query issued for these language details"""
113+
return self["query"]
114+
115+
@property
116+
def querytype(self) -> list[str]:
117+
"""Type of query issued to retrieve language details"""
118+
return self["querytype"]
119+
120+
121+
def get_iso_lang_data(lang: str) -> tuple[Lang, Lang | None]:
122+
"""ISO-639-x languages details for lang. Raises NotFoundError
123+
124+
Returns a tuple (main_language, macro_language | None)"""
75125

76126
iso_types = []
77127

@@ -105,9 +155,9 @@ def replace_types(new_type: str) -> str:
105155
if str(getattr(isolang, code_type)).lower() == lang.lower():
106156
iso_types.append(replace_types(code_type))
107157

108-
lang_data = {
109-
f"iso-639-{lang_}": getattr(isolang, f"pt{lang_}") for lang_ in ISO_LEVELS
110-
}
158+
lang_data = Lang(
159+
**{f"iso-639-{lang_}": getattr(isolang, f"pt{lang_}") for lang_ in ISO_LEVELS}
160+
)
111161
lang_data.update({"english": isolang.name, "iso_types": iso_types})
112162

113163
if isolang.macro():
@@ -118,53 +168,51 @@ def replace_types(new_type: str) -> str:
118168
return lang_data, None
119169

120170

121-
def find_language_names(
122-
query: str, lang_data: dict | None = None
123-
) -> tuple[str | None, str | None]:
124-
"""(native, english) language names for lang with help from language_details dict
171+
def find_language_names(query: str, lang_data: Lang | None = None) -> tuple[str, str]:
172+
"""(native, english) language names for lang with help from lang_data
125173
126174
Falls back to English name if available or query if not"""
127175
if lang_data is None:
128-
lang_data = get_language_details(query, failsafe=True) or {}
176+
lang_data = get_language_details(query, failsafe=True)
177+
if not lang_data:
178+
return query, query
179+
129180
try:
130181
query_locale = babel.Locale.parse(query)
131-
return query_locale.get_display_name(), query_locale.get_display_name("en")
182+
if native_display_name := query_locale.get_display_name():
183+
if english_display_name := query_locale.get_display_name("en"):
184+
return native_display_name, english_display_name
132185
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
133186
pass
134187

135188
# ISO code lookup order matters (most qualified first)!
136189
for iso_level in [f"iso-639-{lang_}" for lang_ in reversed(ISO_LEVELS)]:
137190
try:
138191
query_locale = babel.Locale.parse(lang_data.get(iso_level))
139-
return query_locale.get_display_name(), query_locale.get_display_name("en")
192+
if native_display_name := query_locale.get_display_name():
193+
if english_display_name := query_locale.get_display_name("en"):
194+
return native_display_name, english_display_name
140195
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
141196
pass
142-
default = lang_data.get("english", query)
197+
default = lang_data.get("english") or query
143198
return default, default
144199

145200

146-
def update_with_macro(lang_data: dict, macro_data: dict | None):
201+
def update_with_macro(lang_data: Lang, macro_data: Lang | None):
147202
"""update empty keys from lang_data with ones of macro_data"""
148203
if macro_data:
149204
for key, value in macro_data.items():
150-
if key in lang_data and not lang_data[key]:
205+
if key in lang_data and not lang_data.get(key):
151206
lang_data[key] = value
152207
return lang_data
153208

154209

155-
def get_language_details(query: str, *, failsafe: bool | None = False) -> dict | None:
210+
def get_language_details(
211+
query: str, failsafe: bool | None = False # noqa: FBT002
212+
) -> Lang | None:
156213
"""language details dict from query.
157214
158-
Raises NotFound or return `und` language details if failsafe
159-
160-
iso-639-1: str ISO-639-1 language code
161-
iso-639-2b: str ISO-639-2b language code
162-
iso-639-2t: str ISO-639-2t language code
163-
iso-639-3: str ISO-639-3 language code
164-
iso-639-5: str ISO-639-5 language code
165-
english: str language name in English
166-
native: str language name in is native language
167-
iso_types: [str] list of supported iso types
215+
When query fails, either raises NotFoundError or return None, based on failsafe
168216
169217
"""
170218

tests/i18n/test_i18n.py

+59-1
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
import locale
55
import pathlib
6+
from unittest.mock import Mock
67

78
import pytest
89

910
from zimscraperlib.i18n import (
11+
Lang,
1012
NotFoundError,
1113
_,
1214
find_language_names,
@@ -190,7 +192,19 @@ def test_lang_details(query, expected):
190192
with pytest.raises(NotFoundError):
191193
get_language_details(query)
192194
else:
193-
assert get_language_details(query) == expected
195+
result = get_language_details(query)
196+
assert result == expected
197+
if result:
198+
assert result.iso_639_1 == expected.get("iso-639-1")
199+
assert result.iso_639_2b == expected.get("iso-639-2b")
200+
assert result.iso_639_2t == expected.get("iso-639-2t")
201+
assert result.iso_639_3 == expected.get("iso-639-3")
202+
assert result.iso_639_5 == expected.get("iso-639-5")
203+
assert result.english == expected.get("english")
204+
assert result.native == expected.get("native")
205+
assert result.iso_types == expected.get("iso_types")
206+
assert result.query == expected.get("query")
207+
assert result.querytype == expected.get("querytype")
194208

195209

196210
@pytest.mark.parametrize(
@@ -201,6 +215,7 @@ def test_lang_details(query, expected):
201215
("bm", ("bamanakan", "Bambara")),
202216
("zh", ("中文", "Chinese")),
203217
("ar", ("العربية", "Arabic")),
218+
("qq", ("qq", "qq")),
204219
],
205220
)
206221
def test_lang_name(query, expected):
@@ -214,3 +229,46 @@ def test_lang_name(query, expected):
214229
def test_translation(lang, expected):
215230
setlocale(pathlib.Path(__file__).parent, lang)
216231
assert _("Hello World!") == expected
232+
233+
234+
@pytest.mark.parametrize(
235+
"dict_data",
236+
[{}, {"iso-639-1": "ar"}],
237+
)
238+
def test_lang_equals(dict_data):
239+
assert Lang(dict_data) == Lang(dict_data)
240+
assert Lang(dict_data) == Lang({**dict_data})
241+
242+
243+
@pytest.mark.parametrize(
244+
"dict_data_left, dict_data_right",
245+
[
246+
({}, {"iso-639-1": "ar"}),
247+
({"iso-639-1": "ar"}, {"iso-639-1": "ab"}),
248+
({"iso-639-1": "ar"}, {"iso-639-2": "ar"}),
249+
],
250+
)
251+
def test_lang_not_equals(dict_data_left, dict_data_right):
252+
assert Lang(dict_data_left) != Lang(dict_data_right)
253+
assert Lang(dict_data_left) != "foo"
254+
255+
256+
@pytest.mark.parametrize(
257+
"babel_native_return, babel_english_return, expected_native, expected_english",
258+
[
259+
("Native value", "English value", "Native value", "English value"),
260+
(None, "English value", "German", "German"),
261+
("Native value", None, "German", "German"),
262+
],
263+
)
264+
def test_find_language_names(
265+
mocker, babel_native_return, babel_english_return, expected_native, expected_english
266+
):
267+
mock_locale = Mock()
268+
mock_locale.get_display_name.side_effect = lambda lang=None: (
269+
babel_native_return if lang is None else babel_english_return
270+
)
271+
272+
mocker.patch("babel.Locale.parse", return_value=mock_locale)
273+
274+
assert find_language_names("de") == (expected_native, expected_english)

0 commit comments

Comments
 (0)