Skip to content

Commit 75eed4e

Browse files
committed
Revisit typing aournd add_metadata and validate_metadata + create new convert_and_check_metadata
1 parent dc57e64 commit 75eed4e

File tree

3 files changed

+42
-27
lines changed

3 files changed

+42
-27
lines changed

Diff for: CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111

1212
- Add utility function to compute ZIM Tags #164, including deduplication #156
1313
- Expose new `optimization.get_optimization_method` to get the proper optimization method to call for a given image format
14+
- New `creator.Creator.convert_and_check_metadata` to convert metadata to bytes or str for known use cases and check proper type is passed to libzim
1415

1516
## Changed
1617
- **BREAKING** Renamed `zimscraperlib.image.convertion` to `zimscraperlib.image.conversion` to fix typo
1718
- **BREAKING** Many changes in type hints to match the real underlying code
1819
- **BREAKING** Force all boolean arguments (and some other non-obvious parameters) to be keyword-only in function calls for clarity / disambiguation (see ruff rule FBT002)
1920
- Prefer to use `IO[bytes]` to `io.BytesIO` when possible since it is more generic
2021
- **BREAKING** `i18n.NotFound` renamed `i18n.NotFoundError`
22+
- **BREAKING** `types.get_mime_for_name` now returns `str | None`
23+
- **BREAKING** `creator.Creator.add_metadata` and `creator.Creator.validate_metadata` now only accepts `bytes | str` as value (it must have been converted before call)
24+
- **BREAKING** second argument of `creator.Creator.add_metadata` has been renamed to `value` instead of `content` to align with other methods
25+
- When a type issue arises in metadata checks, wrong value type is displayed in exception
2126

2227
### Fixed
2328

2429
- Metadata length validation is buggy for unicode strings #158
2530
- Pillow 10.4.0 reveals improper type hints for image probing functions #177
31+
- Automated conversion of `datetime.date` or `datetime.datetime` Date metadata now returns a `str` instead of a `bytes`
2632

2733
## [3.4.0] - 2024-06-21
2834

Diff for: src/zimscraperlib/zim/creator.py

+33-22
Original file line numberDiff line numberDiff line change
@@ -221,22 +221,14 @@ def start(self):
221221
del self._metadata["Illustration_48x48@1"]
222222
for name, value in self._metadata.items():
223223
if value:
224-
self.add_metadata(name, value)
224+
self.add_metadata(name, self.convert_and_check_metadata(name, value))
225225

226226
return self
227227

228228
def validate_metadata(
229229
self,
230230
name: str,
231-
value: (
232-
int
233-
| float
234-
| bytes
235-
| str
236-
| datetime.datetime
237-
| datetime.date
238-
| Iterable[str]
239-
),
231+
value: bytes | str,
240232
):
241233
"""Ensures metadata value for name is conform with the openZIM spec on Metadata
242234
@@ -255,24 +247,43 @@ def validate_metadata(
255247
validate_tags(name, value) # pyright: ignore
256248
validate_illustrations(name, value) # pyright: ignore
257249

250+
def convert_and_check_metadata(
251+
self,
252+
name: str,
253+
value: str | bytes | datetime.date | datetime.datetime | Iterable[str],
254+
) -> str | bytes:
255+
"""Convert metadata to appropriate type for few known usecase and check type
256+
257+
Date: converts date and datetime to string YYYY-MM-DD
258+
Tags: converts iterable to string with semi-colon separator
259+
260+
Also checks that final type is appropriate for libzim (str or bytes)
261+
"""
262+
if name == "Date" and isinstance(value, (datetime.date, datetime.datetime)):
263+
value = value.strftime("%Y-%m-%d")
264+
if (
265+
name == "Tags"
266+
and not isinstance(value, str)
267+
and not isinstance(value, bytes)
268+
and isinstance(value, Iterable)
269+
):
270+
value = ";".join(value)
271+
272+
if not isinstance(value, str) and not isinstance(value, bytes):
273+
raise ValueError(f"Invalid type for {name}: {type(value)}")
274+
275+
return value
276+
258277
def add_metadata(
259278
self,
260279
name: str,
261-
content: str | bytes | datetime.date | datetime.datetime | Iterable[str],
280+
value: str | bytes,
262281
mimetype: str = "text/plain;charset=UTF-8",
263282
):
264283
if not self.disable_metadata_checks:
265-
self.validate_metadata(name, content)
266-
if name == "Date" and isinstance(content, (datetime.date, datetime.datetime)):
267-
content = content.strftime("%Y-%m-%d").encode("UTF-8")
268-
if (
269-
name == "Tags"
270-
and not isinstance(content, str)
271-
and not isinstance(content, bytes)
272-
and isinstance(content, Iterable)
273-
):
274-
content = ";".join(content)
275-
super().add_metadata(name, content, mimetype)
284+
self.validate_metadata(name, value)
285+
286+
super().add_metadata(name, value, mimetype)
276287

277288
# there are many N803 problems, but they are intentional to match real tag name
278289
def config_metadata(

Diff for: src/zimscraperlib/zim/metadata.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,7 @@ def validate_required_values(name: str, value: Any):
3131

3232
def validate_standard_str_types(
3333
name: str,
34-
value: (
35-
int | float | bytes | str | datetime.datetime | datetime.date | Iterable[str]
36-
),
34+
value: str | bytes,
3735
):
3836
"""ensures standard string metadata are indeed str"""
3937
if name in (
@@ -50,7 +48,7 @@ def validate_standard_str_types(
5048
"Source",
5149
"Scraper",
5250
) and not isinstance(value, str):
53-
raise ValueError(f"Invalid type for {name}")
51+
raise ValueError(f"Invalid type for {name}: {type(value)}")
5452

5553

5654
def validate_title(name: str, value: str):
@@ -63,7 +61,7 @@ def validate_date(name: str, value: datetime.datetime | datetime.date | str):
6361
"""ensures Date metadata can be casted to an ISO 8601 string"""
6462
if name == "Date":
6563
if not isinstance(value, (datetime.datetime, datetime.date, str)):
66-
raise ValueError(f"Invalid type for {name}.")
64+
raise ValueError(f"Invalid type for {name}: {type(value)}")
6765
elif isinstance(value, str):
6866
match = re.match(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})", value)
6967
if not match:

0 commit comments

Comments
 (0)