From 12841a5ce209adaad515e2fff04e8738964e96e9 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 13 Feb 2024 17:17:36 +0100 Subject: [PATCH 1/3] Update CHANGELOG for last PR changes --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa3bbf95..719c3a77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Rework the VideoWebmLow preset for faster encoding and smaller file size (preset has been bumped to version 2) - When reencoding a video, ffmpeg now uses only 1 CPU thread by default (new arg to `reencode` allows to override this default value) +## Fixed + +- Fixed type hints of `zimscraperlib.zim.Item` and subclasses, and `zimscraperlib.image.optimization:convert_image` + ## [3.2.0] - 2023-12-16 ### Added From 1a9b8f7f3d3490cebd3ec09a2c4fd40f6232253f Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 13 Feb 2024 17:24:30 +0100 Subject: [PATCH 2/3] New `auto_metadata_check` parameter in `zimscraperlib.zim.creator.Creator` initializer --- CHANGELOG.md | 4 ++++ src/zimscraperlib/zim/creator.py | 19 ++++++++++++++----- tests/zim/test_zim_creator.py | 6 ++++++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 719c3a77..d4f8e00f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- New `auto_metadata_check` parameter in `zimscraperlib.zim.creator.Creator` initializer, allowing to disable metadata check at startup (assuming the user will validate them on its own) #119 + ### Changed - Using openZIM Python bootstrap conventions (including hatch-openzim plugin) #120 diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py index 4e25d1de..a8880752 100644 --- a/src/zimscraperlib/zim/creator.py +++ b/src/zimscraperlib/zim/creator.py @@ -97,7 +97,12 @@ class Creator(libzim.writer.Creator): a segmentation fault at garbage collection (on exit mostly). Meaning you should exit right after an exception in your code (during zim creation) - Use workaround_nocancel=False to disable the workaround.""" + Use workaround_nocancel=False to disable the workaround. + + By default, all metadata are validated for compliance with openZIM guidelines and + conventions. Set auto_metadata_check=False to disable this validation (you can still + do checks manually with the validation methods or your own logic). + """ def __init__( self, @@ -106,6 +111,7 @@ def __init__( compression: Optional[str] = None, workaround_nocancel: Optional[bool] = True, # noqa: FBT002 ignore_duplicates: Optional[bool] = False, # noqa: FBT002 + auto_metadata_check: bool = True, # noqa: FBT001, FBT002 ): super().__init__(filename=filename) self._metadata = {} @@ -123,6 +129,7 @@ def __init__( self.workaround_nocancel = workaround_nocancel self.ignore_duplicates = ignore_duplicates + self.auto_metadata_check = auto_metadata_check def config_indexing( self, indexing: bool, language: Optional[str] = None # noqa: FBT001 @@ -141,9 +148,10 @@ def start(self): if not all(self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS): raise ValueError("Mandatory metadata are not all set.") - for name, value in self._metadata.items(): - if value: - self.validate_metadata(name, value) + if self.auto_metadata_check: + for name, value in self._metadata.items(): + if value: + self.validate_metadata(name, value) language = self._metadata.get("Language", "").split(",") if language[0] and not self.__indexing_configured: @@ -187,7 +195,8 @@ def add_metadata( content: Union[str, bytes, datetime.date, datetime.datetime, Iterable[str]], mimetype: str = "text/plain;charset=UTF-8", ): - self.validate_metadata(name, content) + if self.auto_metadata_check: + self.validate_metadata(name, content) if name == "Date" and isinstance(content, (datetime.date, datetime.datetime)): content = content.strftime("%Y-%m-%d").encode("UTF-8") if ( diff --git a/tests/zim/test_zim_creator.py b/tests/zim/test_zim_creator.py index a8770c4b..0dd79871 100644 --- a/tests/zim/test_zim_creator.py +++ b/tests/zim/test_zim_creator.py @@ -507,6 +507,12 @@ def test_check_metadata(tmp_path): Creator(tmp_path, "").config_dev_metadata(LongDescription="T" * 5000).start() +def test_relax_metadata(tmp_path): + Creator(tmp_path, "", auto_metadata_check=False).config_dev_metadata( + Description="T" * 90 + ).start() + + @pytest.mark.parametrize( "tags", [ From 8d6795e1d36bec0687cb214393f0f1a9e7f90b50 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 13 Feb 2024 18:35:50 +0100 Subject: [PATCH 3/3] Rename auto_metadata_checks to disable_metadata_checks --- CHANGELOG.md | 2 +- src/zimscraperlib/zim/creator.py | 12 ++++++------ tests/zim/test_zim_creator.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4f8e00f..ba6b749e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- New `auto_metadata_check` parameter in `zimscraperlib.zim.creator.Creator` initializer, allowing to disable metadata check at startup (assuming the user will validate them on its own) #119 +- New `disable_metadata_checks` parameter in `zimscraperlib.zim.creator.Creator` initializer, allowing to disable metadata check at startup (assuming the user will validate them on its own) #119 ### Changed diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py index a8880752..37dc1714 100644 --- a/src/zimscraperlib/zim/creator.py +++ b/src/zimscraperlib/zim/creator.py @@ -100,8 +100,8 @@ class Creator(libzim.writer.Creator): Use workaround_nocancel=False to disable the workaround. By default, all metadata are validated for compliance with openZIM guidelines and - conventions. Set auto_metadata_check=False to disable this validation (you can still - do checks manually with the validation methods or your own logic). + conventions. Set disable_metadata_checks=False to disable this validation (you can + still do checks manually with the validation methods or your own logic). """ def __init__( @@ -111,7 +111,7 @@ def __init__( compression: Optional[str] = None, workaround_nocancel: Optional[bool] = True, # noqa: FBT002 ignore_duplicates: Optional[bool] = False, # noqa: FBT002 - auto_metadata_check: bool = True, # noqa: FBT001, FBT002 + disable_metadata_checks: bool = True, # noqa: FBT001, FBT002 ): super().__init__(filename=filename) self._metadata = {} @@ -129,7 +129,7 @@ def __init__( self.workaround_nocancel = workaround_nocancel self.ignore_duplicates = ignore_duplicates - self.auto_metadata_check = auto_metadata_check + self.disable_metadata_checks = disable_metadata_checks def config_indexing( self, indexing: bool, language: Optional[str] = None # noqa: FBT001 @@ -148,7 +148,7 @@ def start(self): if not all(self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS): raise ValueError("Mandatory metadata are not all set.") - if self.auto_metadata_check: + if self.disable_metadata_checks: for name, value in self._metadata.items(): if value: self.validate_metadata(name, value) @@ -195,7 +195,7 @@ def add_metadata( content: Union[str, bytes, datetime.date, datetime.datetime, Iterable[str]], mimetype: str = "text/plain;charset=UTF-8", ): - if self.auto_metadata_check: + if self.disable_metadata_checks: self.validate_metadata(name, content) if name == "Date" and isinstance(content, (datetime.date, datetime.datetime)): content = content.strftime("%Y-%m-%d").encode("UTF-8") diff --git a/tests/zim/test_zim_creator.py b/tests/zim/test_zim_creator.py index 0dd79871..dc4d4741 100644 --- a/tests/zim/test_zim_creator.py +++ b/tests/zim/test_zim_creator.py @@ -508,7 +508,7 @@ def test_check_metadata(tmp_path): def test_relax_metadata(tmp_path): - Creator(tmp_path, "", auto_metadata_check=False).config_dev_metadata( + Creator(tmp_path, "", disable_metadata_checks=False).config_dev_metadata( Description="T" * 90 ).start()