Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- New `auto_metadata_check` parameter in `zimscraperlib.zim.creator.Creator` initializer, allowing to disable metadata check at startup (assuming the user will validate them on its own) #119

### Changed

- Using openZIM Python bootstrap conventions (including hatch-openzim plugin) #120
Expand All @@ -16,6 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Rework the VideoWebmLow preset for faster encoding and smaller file size (preset has been bumped to version 2)
- When reencoding a video, ffmpeg now uses only 1 CPU thread by default (new arg to `reencode` allows to override this default value)

## Fixed

- Fixed type hints of `zimscraperlib.zim.Item` and subclasses, and `zimscraperlib.image.optimization:convert_image`

## [3.2.0] - 2023-12-16

### Added
Expand Down
19 changes: 14 additions & 5 deletions src/zimscraperlib/zim/creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,12 @@ class Creator(libzim.writer.Creator):
a segmentation fault at garbage collection (on exit mostly).

Meaning you should exit right after an exception in your code (during zim creation)
Use workaround_nocancel=False to disable the workaround."""
Use workaround_nocancel=False to disable the workaround.

By default, all metadata are validated for compliance with openZIM guidelines and
conventions. Set auto_metadata_check=False to disable this validation (you can still
do checks manually with the validation methods or your own logic).
"""

def __init__(
self,
Expand All @@ -106,6 +111,7 @@ def __init__(
compression: Optional[str] = None,
workaround_nocancel: Optional[bool] = True, # noqa: FBT002
ignore_duplicates: Optional[bool] = False, # noqa: FBT002
auto_metadata_check: bool = True, # noqa: FBT001, FBT002
):
super().__init__(filename=filename)
self._metadata = {}
Expand All @@ -123,6 +129,7 @@ def __init__(

self.workaround_nocancel = workaround_nocancel
self.ignore_duplicates = ignore_duplicates
self.auto_metadata_check = auto_metadata_check

def config_indexing(
self, indexing: bool, language: Optional[str] = None # noqa: FBT001
Expand All @@ -141,9 +148,10 @@ def start(self):
if not all(self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS):
raise ValueError("Mandatory metadata are not all set.")

for name, value in self._metadata.items():
if value:
self.validate_metadata(name, value)
if self.auto_metadata_check:
for name, value in self._metadata.items():
if value:
self.validate_metadata(name, value)

language = self._metadata.get("Language", "").split(",")
if language[0] and not self.__indexing_configured:
Expand Down Expand Up @@ -187,7 +195,8 @@ def add_metadata(
content: Union[str, bytes, datetime.date, datetime.datetime, Iterable[str]],
mimetype: str = "text/plain;charset=UTF-8",
):
self.validate_metadata(name, content)
if self.auto_metadata_check:
self.validate_metadata(name, content)
if name == "Date" and isinstance(content, (datetime.date, datetime.datetime)):
content = content.strftime("%Y-%m-%d").encode("UTF-8")
if (
Expand Down
6 changes: 6 additions & 0 deletions tests/zim/test_zim_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,12 @@ def test_check_metadata(tmp_path):
Creator(tmp_path, "").config_dev_metadata(LongDescription="T" * 5000).start()


def test_relax_metadata(tmp_path):
Creator(tmp_path, "", auto_metadata_check=False).config_dev_metadata(
Description="T" * 90
).start()


@pytest.mark.parametrize(
"tags",
[
Expand Down