diff --git a/CHANGELOG.md b/CHANGELOG.md index fa3bbf95..ba6b749e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- New `disable_metadata_checks` parameter in `zimscraperlib.zim.creator.Creator` initializer, allowing to disable metadata check at startup (assuming the user will validate them on its own) #119 + ### Changed - Using openZIM Python bootstrap conventions (including hatch-openzim plugin) #120 @@ -16,6 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Rework the VideoWebmLow preset for faster encoding and smaller file size (preset has been bumped to version 2) - When reencoding a video, ffmpeg now uses only 1 CPU thread by default (new arg to `reencode` allows to override this default value) +## Fixed + +- Fixed type hints of `zimscraperlib.zim.Item` and subclasses, and `zimscraperlib.image.optimization:convert_image` + ## [3.2.0] - 2023-12-16 ### Added diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py index 4e25d1de..37dc1714 100644 --- a/src/zimscraperlib/zim/creator.py +++ b/src/zimscraperlib/zim/creator.py @@ -97,7 +97,12 @@ class Creator(libzim.writer.Creator): a segmentation fault at garbage collection (on exit mostly). Meaning you should exit right after an exception in your code (during zim creation) - Use workaround_nocancel=False to disable the workaround.""" + Use workaround_nocancel=False to disable the workaround. + + By default, all metadata are validated for compliance with openZIM guidelines and + conventions. Set disable_metadata_checks=False to disable this validation (you can + still do checks manually with the validation methods or your own logic). + """ def __init__( self, @@ -106,6 +111,7 @@ def __init__( compression: Optional[str] = None, workaround_nocancel: Optional[bool] = True, # noqa: FBT002 ignore_duplicates: Optional[bool] = False, # noqa: FBT002 + disable_metadata_checks: bool = True, # noqa: FBT001, FBT002 ): super().__init__(filename=filename) self._metadata = {} @@ -123,6 +129,7 @@ def __init__( self.workaround_nocancel = workaround_nocancel self.ignore_duplicates = ignore_duplicates + self.disable_metadata_checks = disable_metadata_checks def config_indexing( self, indexing: bool, language: Optional[str] = None # noqa: FBT001 @@ -141,9 +148,10 @@ def start(self): if not all(self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS): raise ValueError("Mandatory metadata are not all set.") - for name, value in self._metadata.items(): - if value: - self.validate_metadata(name, value) + if self.disable_metadata_checks: + for name, value in self._metadata.items(): + if value: + self.validate_metadata(name, value) language = self._metadata.get("Language", "").split(",") if language[0] and not self.__indexing_configured: @@ -187,7 +195,8 @@ def add_metadata( content: Union[str, bytes, datetime.date, datetime.datetime, Iterable[str]], mimetype: str = "text/plain;charset=UTF-8", ): - self.validate_metadata(name, content) + if self.disable_metadata_checks: + self.validate_metadata(name, content) if name == "Date" and isinstance(content, (datetime.date, datetime.datetime)): content = content.strftime("%Y-%m-%d").encode("UTF-8") if ( diff --git a/tests/zim/test_zim_creator.py b/tests/zim/test_zim_creator.py index a8770c4b..dc4d4741 100644 --- a/tests/zim/test_zim_creator.py +++ b/tests/zim/test_zim_creator.py @@ -507,6 +507,12 @@ def test_check_metadata(tmp_path): Creator(tmp_path, "").config_dev_metadata(LongDescription="T" * 5000).start() +def test_relax_metadata(tmp_path): + Creator(tmp_path, "", disable_metadata_checks=False).config_dev_metadata( + Description="T" * 90 + ).start() + + @pytest.mark.parametrize( "tags", [