Skip to content

Commit cb01a0f

Browse files
authored
Merge pull request #111 from openzim/handle_descriptions
Add utility function to compute/check ZIM descriptions
2 parents 4f8c3cc + 591c6dc commit cb01a0f

File tree

6 files changed

+242
-6
lines changed

6 files changed

+242
-6
lines changed

Diff for: CHANGELOG.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@ All notable changes to this project are documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.5.0).
77

8-
## [3.1.1]
8+
## [Unreleased]
9+
10+
### Added
11+
- Add utility function to compute/check ZIM descriptions #110
12+
13+
## [3.1.1] - 2023-07-18
914

1015
### Changed
1116

Diff for: src/zimscraperlib/constants.py

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
),
4949
}
5050

51+
RECOMMENDED_MAX_TITLE_LENGTH = 30
5152
MAXIMUM_DESCRIPTION_METADATA_LENGTH = 80
5253
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH = 4000
5354

Diff for: src/zimscraperlib/inputs.py

+57-2
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,18 @@
55
import pathlib
66
import shutil
77
import tempfile
8-
from typing import Optional, Union
8+
from typing import Optional, Tuple, Union
99

1010
from . import logger
11+
from .constants import MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH
12+
from .constants import MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH
1113
from .download import stream_file
1214

1315

1416
def handle_user_provided_file(
1517
source: Optional[Union[pathlib.Path, str]] = None,
1618
dest: Optional[pathlib.Path] = None,
17-
in_dir: pathlib.Path = None,
19+
in_dir: Optional[pathlib.Path] = None,
1820
nocopy: bool = False,
1921
) -> Union[pathlib.Path, None]:
2022
"""path to downloaded or copied a user provided file (URL or path)
@@ -49,3 +51,56 @@ def handle_user_provided_file(
4951
shutil.copy(source, dest)
5052

5153
return dest
54+
55+
56+
def compute_descriptions(
57+
default_description: str,
58+
user_description: Optional[str],
59+
user_long_description: Optional[str],
60+
) -> Tuple[str, Optional[str]]:
61+
"""Computes short and long descriptions compliant with ZIM standard.
62+
63+
Based on provided parameters, the function computes a short and a long description
64+
which are compliant with the ZIM standard (in terms of length).
65+
66+
User description(s) are used if set. They are checked to not exceed ZIM standard
67+
maximum length ; an error is thrown otherwise ; if ok, they are returned.
68+
69+
If user_description is not set, the description is computed based on the default
70+
description, truncated if needed.
71+
72+
If user_long_description is not set and default description is too long for the
73+
description field, the long_description is computed based on the default description
74+
(truncated if needed), otherwise no long description is returned.
75+
76+
args:
77+
default_description: the description which will be used if user descriptions
78+
are not set (typically fetched online)
79+
user_description: the description set by the user (typically set by a
80+
CLI argument)
81+
user_long_description: the long description set by the user (typically set by a
82+
CLI argument)
83+
84+
Returns a tuple of (description, long_description)
85+
"""
86+
87+
if user_description and len(user_description) > MAX_DESC_LENGTH:
88+
raise ValueError(
89+
f"Description too long ({len(user_description)}>{MAX_DESC_LENGTH})"
90+
)
91+
if user_long_description and len(user_long_description) > MAX_LONG_DESC_LENGTH:
92+
raise ValueError(
93+
f"LongDescription too long ({len(user_long_description)}"
94+
f">{MAX_LONG_DESC_LENGTH})"
95+
)
96+
97+
if not user_long_description and len(default_description) > MAX_DESC_LENGTH:
98+
user_long_description = default_description[0:MAX_LONG_DESC_LENGTH]
99+
if len(default_description) > MAX_LONG_DESC_LENGTH:
100+
user_long_description = user_long_description[:-1] + "…"
101+
if not user_description:
102+
user_description = default_description[0:MAX_DESC_LENGTH]
103+
if len(default_description) > MAX_DESC_LENGTH:
104+
user_description = user_description[:-1] + "…"
105+
106+
return (user_description, user_long_description)

Diff for: src/zimscraperlib/zim/metadata.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
MANDATORY_ZIM_METADATA_KEYS,
99
MAXIMUM_DESCRIPTION_METADATA_LENGTH,
1010
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH,
11+
RECOMMENDED_MAX_TITLE_LENGTH,
1112
)
1213
from ..i18n import is_valid_iso_639_3
1314
from ..image.probing import is_valid_image
@@ -40,7 +41,7 @@ def validate_standard_str_types(name: str, value: str):
4041

4142
def validate_title(name: str, value: str):
4243
"""ensures Title metadata is within recommended length"""
43-
if name == "Title" and len(value) > 30:
44+
if name == "Title" and len(value) > RECOMMENDED_MAX_TITLE_LENGTH:
4445
raise ValueError(f"{name} is too long.")
4546

4647

Diff for: tests/inputs/test_inputs.py

+174-1
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,17 @@
33
# vim: ai ts=4 sts=4 et sw=4 nu
44

55
import pathlib
6+
from typing import Optional
67

78
import pytest
89

9-
from zimscraperlib.inputs import handle_user_provided_file
10+
from zimscraperlib.constants import (
11+
MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
12+
)
13+
from zimscraperlib.constants import (
14+
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
15+
)
16+
from zimscraperlib.inputs import compute_descriptions, handle_user_provided_file
1017

1118

1219
def test_with_none():
@@ -72,3 +79,169 @@ def test_remote_indir(tmp_path, valid_http_url):
7279
assert fpath is not None
7380
assert fpath.exists()
7481
assert fpath.parent == tmp_path
82+
83+
84+
TEXT_NOT_USED = "text not used"
85+
86+
LONG_TEXT = (
87+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor "
88+
"incididunt ut labore et dolore magna aliqua. At erat pellentesque adipiscing "
89+
"commodo elit at imperdiet. Rutrum tellus pellentesque eu tincidunt tortor aliquam"
90+
" nulla facilisi. Eget lorem dolor sed viverra ipsum nunc. Ipsum nunc aliquet "
91+
"bibendum enim facilisis gravida neque convallis. Aliquam malesuada bibendum arcu "
92+
"vitae elementum curabitur. Platea dictumst quisque sagittis purus sit amet "
93+
"volutpat. Blandit libero volutpat sed cras ornare. In eu mi bibendum neque "
94+
"egestas. Egestas dui id ornare arcu odio. Pulvinar neque laoreet suspendisse "
95+
"interdum. Fames ac turpis egestas integer eget aliquet nibh praesent tristique. Et"
96+
" egestas quis ipsum suspendisse ultrices gravida dictum fusce. Malesuada fames ac "
97+
"turpis egestas. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada "
98+
"proin libero. In arcu cursus euismod quis viverra. Faucibus in ornare quam viverra"
99+
". Curabitur vitae nunc sed velit dignissim sodales ut eu sem. Velit scelerisque in"
100+
" dictum non consectetur a erat nam. Proin fermentum leo vel orci porta non. Fames"
101+
" ac turpis egestas sed tempus. Vitae justo eget magna fermentum iaculis eu non. "
102+
"Imperdiet massa tincidunt nunc pulvinar sapien et ligula. Laoreet sit amet cursus "
103+
"sit amet dictum sit amet. Quis hendrerit dolor magna eget. Orci ac auctor augue "
104+
"mauris augue. Consequat interdum varius sit amet mattis. At ultrices mi tempus "
105+
"imperdiet nulla malesuada pellentesque elit. Volutpat est velit egestas dui. "
106+
"Potenti nullam ac tortor vitae. At tempor commodo ullamcorper a lacus vestibulum "
107+
"sed arcu non. Duis ut diam quam nulla. Vestibulum mattis ullamcorper velit sed "
108+
"ullamcorper. Sit amet commodo nulla facilisi nullam vehicula. Faucibus purus in "
109+
"massa tempor nec feugiat. Sem fringilla ut morbi tincidunt augue interdum velit. "
110+
"Etiam dignissim diam quis enim lobortis scelerisque fermentum dui. Nunc vel risus "
111+
"commodo viverra maecenas accumsan. Aenean sed adipiscing diam donec adipiscing "
112+
"tristique. Maecenas accumsan lacus vel facilisis volutpat est velit egestas. Nulla"
113+
" aliquet porttitor lacus luctus accumsan tortor posuere ac. Habitant morbi "
114+
"tristique senectus et netus et. Eget mi proin sed libero enim sed faucibus turpis "
115+
"in. Vulputate enim nulla aliquet porttitor lacus. Dui ut ornare lectus sit amet "
116+
"est. Quam lacus suspendisse faucibus interdum posuere. Sagittis orci a scelerisque"
117+
" purus semper eget duis at tellus. Tellus molestie nunc non blandit massa. Feugiat"
118+
" vivamus at augue eget arcu dictum varius duis at. Varius morbi enim nunc faucibus"
119+
" a pellentesque sit. Id aliquet lectus proin nibh nisl condimentum id venenatis a."
120+
" Tortor dignissim convallis aenean et tortor at risus viverra adipiscing. Aliquam "
121+
"malesuada bibendum arcu vitae elementum curabitur vitae nunc sed. Habitasse platea"
122+
" dictumst quisque sagittis purus sit amet volutpat. Vitae auctor eu augue ut "
123+
"lectus. At varius vel pharetra vel turpis nunc eget. Dictum at tempor commodo "
124+
"ullamcorper a lacus vestibulum sed arcu. Pellentesque massa placerat duis "
125+
"ultricies. Enim nunc faucibus a pellentesque sit amet porttitor eget dolor. "
126+
"Volutpat blandit aliquam etiam erat velit scelerisque in. Amet mattis vulputate "
127+
"enim nulla aliquet porttitor. Egestas maecenas pharetra convallis posuere morbi "
128+
"leo urna molestie. Duis ut diam quam nulla porttitor massa id. In fermentum "
129+
"posuere urna nec tincidunt praesent. Turpis egestas sed tempus urna et pharetra "
130+
"pharetra massa. Tellus molestie nunc non blandit massa. Diam phasellus vestibulum "
131+
"lorem sed risus ultricies. Egestas erat imperdiet sed euismod nisi porta lorem. "
132+
"Quam viverra orci sagittis eu volutpat odio facilisis mauris sit. Ornare aenean "
133+
"euismod elementum nisi quis. Laoreet non curabitur gravida arcu ac tortor "
134+
"dignissim convallis aenean. Sagittis aliquam malesuada bibendum arcu vitae "
135+
"elementum. Sed blandit libero volutpat sed cras ornare. Sagittis eu volutpat odio "
136+
"facilisis mauris. Facilisis volutpat est velit egestas dui id ornare arcu odio. "
137+
"Eu feugiat pretium nibh."
138+
)
139+
140+
141+
@pytest.mark.parametrize(
142+
"user_description, user_long_description, default_description, raises, "
143+
"expected_description, expected_long_description",
144+
[
145+
# user description set and is short, user long descripion not set, default
146+
# description doe not matter
147+
(
148+
LONG_TEXT[0:MAX_DESC_LENGTH],
149+
None,
150+
TEXT_NOT_USED,
151+
False,
152+
LONG_TEXT[0:MAX_DESC_LENGTH],
153+
None,
154+
),
155+
# user description set and is too long, default description does not matter
156+
(LONG_TEXT[0 : MAX_DESC_LENGTH + 1], None, TEXT_NOT_USED, True, None, None),
157+
# user description not set and default description is short enough
158+
(
159+
None,
160+
None,
161+
LONG_TEXT[0:MAX_DESC_LENGTH],
162+
False,
163+
LONG_TEXT[0:MAX_DESC_LENGTH],
164+
None,
165+
),
166+
# user description not set and default description is too long for description
167+
# but ok for long description
168+
(
169+
None,
170+
None,
171+
LONG_TEXT[0 : MAX_DESC_LENGTH + 1],
172+
False,
173+
LONG_TEXT[0 : MAX_DESC_LENGTH - 1] + "…",
174+
LONG_TEXT[0 : MAX_DESC_LENGTH + 1],
175+
),
176+
(
177+
None,
178+
None,
179+
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
180+
False,
181+
LONG_TEXT[0 : MAX_DESC_LENGTH - 1] + "…",
182+
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
183+
),
184+
# user description not set and default description is too long for description
185+
# and long description
186+
(
187+
None,
188+
None,
189+
LONG_TEXT[0 : MAX_LONG_DESC_LENGTH + 1],
190+
False,
191+
LONG_TEXT[0 : MAX_DESC_LENGTH - 1] + "…",
192+
LONG_TEXT[0 : MAX_LONG_DESC_LENGTH - 1] + "…",
193+
),
194+
# user description set and is short, user long descripion set and is short,
195+
# default description does not matter
196+
(
197+
LONG_TEXT[0:MAX_DESC_LENGTH],
198+
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
199+
TEXT_NOT_USED,
200+
False,
201+
LONG_TEXT[0:MAX_DESC_LENGTH],
202+
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
203+
),
204+
# user description set and is short, user long descripion set and is too long,
205+
# default description does not matter
206+
(
207+
LONG_TEXT[0:MAX_DESC_LENGTH],
208+
LONG_TEXT[0 : MAX_LONG_DESC_LENGTH + 1],
209+
TEXT_NOT_USED,
210+
True,
211+
None,
212+
None,
213+
),
214+
# user description not set, user long descripion set and is short,
215+
# default description set to something different than long desc
216+
(
217+
None,
218+
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
219+
LONG_TEXT[10:MAX_LONG_DESC_LENGTH],
220+
False,
221+
LONG_TEXT[10 : MAX_DESC_LENGTH + 9] + "…",
222+
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
223+
),
224+
],
225+
)
226+
def test_description(
227+
user_description: str,
228+
user_long_description: Optional[str],
229+
default_description: str,
230+
*,
231+
raises: bool,
232+
expected_description: str,
233+
expected_long_description: str,
234+
):
235+
if raises:
236+
with pytest.raises(ValueError):
237+
compute_descriptions(
238+
default_description, user_description, user_long_description
239+
)
240+
return
241+
else:
242+
(description, long_description) = compute_descriptions(
243+
default_description, user_description, user_long_description
244+
)
245+
246+
assert description == expected_description
247+
assert long_description == expected_long_description

Diff for: tox.ini

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@ deps =
1111
pytest-cov
1212
-r{toxinidir}/requirements.txt
1313
commands =
14-
pytest --runslow --runinstalled --cov=zimscraperlib --cov-report=term --cov-report term-missing
14+
pytest --runslow --runinstalled --cov=zimscraperlib --cov-report=term --cov-report term-missing {posargs}
1515

1616
[testenv:black]
1717
deps =
1818
black>=23.1.0,<24
1919
commands = black --check .
2020

2121
[testenv:isort]
22+
base_python=py38
2223
deps =
2324
isort>=5.12.0,<5.13
2425
commands = isort --profile black --check src tests

0 commit comments

Comments
 (0)