Skip to content

Commit a25ec60

Browse files
authored
Merge pull request #135 from openzim/relax_metadata_check
Relax metadata check
2 parents 5558962 + 8d6795e commit a25ec60

File tree

3 files changed

+28
-5
lines changed

3 files changed

+28
-5
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- New `disable_metadata_checks` parameter in `zimscraperlib.zim.creator.Creator` initializer, allowing to disable metadata check at startup (assuming the user will validate them on its own) #119
13+
1014
### Changed
1115

1216
- Using openZIM Python bootstrap conventions (including hatch-openzim plugin) #120
@@ -16,6 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1620
- Rework the VideoWebmLow preset for faster encoding and smaller file size (preset has been bumped to version 2)
1721
- When reencoding a video, ffmpeg now uses only 1 CPU thread by default (new arg to `reencode` allows to override this default value)
1822

23+
## Fixed
24+
25+
- Fixed type hints of `zimscraperlib.zim.Item` and subclasses, and `zimscraperlib.image.optimization:convert_image`
26+
1927
## [3.2.0] - 2023-12-16
2028

2129
### Added

src/zimscraperlib/zim/creator.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,12 @@ class Creator(libzim.writer.Creator):
9797
a segmentation fault at garbage collection (on exit mostly).
9898
9999
Meaning you should exit right after an exception in your code (during zim creation)
100-
Use workaround_nocancel=False to disable the workaround."""
100+
Use workaround_nocancel=False to disable the workaround.
101+
102+
By default, all metadata are validated for compliance with openZIM guidelines and
103+
conventions. Set disable_metadata_checks=False to disable this validation (you can
104+
still do checks manually with the validation methods or your own logic).
105+
"""
101106

102107
def __init__(
103108
self,
@@ -106,6 +111,7 @@ def __init__(
106111
compression: Optional[str] = None,
107112
workaround_nocancel: Optional[bool] = True, # noqa: FBT002
108113
ignore_duplicates: Optional[bool] = False, # noqa: FBT002
114+
disable_metadata_checks: bool = True, # noqa: FBT001, FBT002
109115
):
110116
super().__init__(filename=filename)
111117
self._metadata = {}
@@ -123,6 +129,7 @@ def __init__(
123129

124130
self.workaround_nocancel = workaround_nocancel
125131
self.ignore_duplicates = ignore_duplicates
132+
self.disable_metadata_checks = disable_metadata_checks
126133

127134
def config_indexing(
128135
self, indexing: bool, language: Optional[str] = None # noqa: FBT001
@@ -141,9 +148,10 @@ def start(self):
141148
if not all(self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS):
142149
raise ValueError("Mandatory metadata are not all set.")
143150

144-
for name, value in self._metadata.items():
145-
if value:
146-
self.validate_metadata(name, value)
151+
if self.disable_metadata_checks:
152+
for name, value in self._metadata.items():
153+
if value:
154+
self.validate_metadata(name, value)
147155

148156
language = self._metadata.get("Language", "").split(",")
149157
if language[0] and not self.__indexing_configured:
@@ -187,7 +195,8 @@ def add_metadata(
187195
content: Union[str, bytes, datetime.date, datetime.datetime, Iterable[str]],
188196
mimetype: str = "text/plain;charset=UTF-8",
189197
):
190-
self.validate_metadata(name, content)
198+
if self.disable_metadata_checks:
199+
self.validate_metadata(name, content)
191200
if name == "Date" and isinstance(content, (datetime.date, datetime.datetime)):
192201
content = content.strftime("%Y-%m-%d").encode("UTF-8")
193202
if (

tests/zim/test_zim_creator.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,12 @@ def test_check_metadata(tmp_path):
507507
Creator(tmp_path, "").config_dev_metadata(LongDescription="T" * 5000).start()
508508

509509

510+
def test_relax_metadata(tmp_path):
511+
Creator(tmp_path, "", disable_metadata_checks=False).config_dev_metadata(
512+
Description="T" * 90
513+
).start()
514+
515+
510516
@pytest.mark.parametrize(
511517
"tags",
512518
[

0 commit comments

Comments
 (0)