From 7e19b92cf3bfb1cd3c66ee92cbb9485c9471bebe Mon Sep 17 00:00:00 2001 From: benoit74 Date: Wed, 14 Feb 2024 08:08:38 +0100 Subject: [PATCH 1/4] Fix disable_metadata_checks behavior --- src/zimscraperlib/zim/creator.py | 8 ++++---- tests/zim/test_zim_creator.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py index 37dc1714..6d03ee3b 100644 --- a/src/zimscraperlib/zim/creator.py +++ b/src/zimscraperlib/zim/creator.py @@ -100,7 +100,7 @@ class Creator(libzim.writer.Creator): Use workaround_nocancel=False to disable the workaround. By default, all metadata are validated for compliance with openZIM guidelines and - conventions. Set disable_metadata_checks=False to disable this validation (you can + conventions. Set disable_metadata_checks=True to disable this validation (you can still do checks manually with the validation methods or your own logic). """ @@ -111,7 +111,7 @@ def __init__( compression: Optional[str] = None, workaround_nocancel: Optional[bool] = True, # noqa: FBT002 ignore_duplicates: Optional[bool] = False, # noqa: FBT002 - disable_metadata_checks: bool = True, # noqa: FBT001, FBT002 + disable_metadata_checks: bool = False, # noqa: FBT001, FBT002 ): super().__init__(filename=filename) self._metadata = {} @@ -148,7 +148,7 @@ def start(self): if not all(self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS): raise ValueError("Mandatory metadata are not all set.") - if self.disable_metadata_checks: + if not self.disable_metadata_checks: for name, value in self._metadata.items(): if value: self.validate_metadata(name, value) @@ -195,7 +195,7 @@ def add_metadata( content: Union[str, bytes, datetime.date, datetime.datetime, Iterable[str]], mimetype: str = "text/plain;charset=UTF-8", ): - if self.disable_metadata_checks: + if not self.disable_metadata_checks: self.validate_metadata(name, content) if name == "Date" and isinstance(content, (datetime.date, datetime.datetime)): content = content.strftime("%Y-%m-%d").encode("UTF-8") diff --git a/tests/zim/test_zim_creator.py b/tests/zim/test_zim_creator.py index dc4d4741..63e644dc 100644 --- a/tests/zim/test_zim_creator.py +++ b/tests/zim/test_zim_creator.py @@ -508,7 +508,7 @@ def test_check_metadata(tmp_path): def test_relax_metadata(tmp_path): - Creator(tmp_path, "", disable_metadata_checks=False).config_dev_metadata( + Creator(tmp_path, "", disable_metadata_checks=True).config_dev_metadata( Description="T" * 90 ).start() From d2895ee25e2bad0800c96e9071ca861b12a6392b Mon Sep 17 00:00:00 2001 From: benoit74 Date: Wed, 14 Feb 2024 08:37:33 +0100 Subject: [PATCH 2/4] libzim StringProvider support both bytes and str for content --- src/zimscraperlib/zim/items.py | 6 ++++-- src/zimscraperlib/zim/providers.py | 2 +- tests/zim/conftest.py | 20 ++++++++++++++++++++ tests/zim/test_zim_creator.py | 24 +++++++++++++++++++++++- 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/zimscraperlib/zim/items.py b/src/zimscraperlib/zim/items.py index 5f4c6ed0..0ecd654c 100644 --- a/src/zimscraperlib/zim/items.py +++ b/src/zimscraperlib/zim/items.py @@ -9,7 +9,7 @@ import re import tempfile import urllib.parse -from typing import Any, Optional +from typing import Any, Optional, Union import libzim.writer # pyright: ignore @@ -72,7 +72,7 @@ class StaticItem(Item): def __init__( self, - content: Optional[str] = None, + content: Optional[Union[str, bytes]] = None, fileobj: Optional[io.IOBase] = None, filepath: Optional[pathlib.Path] = None, path: Optional[str] = None, @@ -95,6 +95,8 @@ def get_contentprovider(self) -> libzim.writer.ContentProvider: # content was set manually content = getattr(self, "content", None) if content is not None: + if not isinstance(content, (str, bytes)): + raise AttributeError(f"Unexpected type for content: {type(content)}") return StringProvider(content=content, ref=self) # using a file-like object diff --git a/src/zimscraperlib/zim/providers.py b/src/zimscraperlib/zim/providers.py index a698b4f5..3180b247 100644 --- a/src/zimscraperlib/zim/providers.py +++ b/src/zimscraperlib/zim/providers.py @@ -31,7 +31,7 @@ def __init__( class StringProvider(libzim.writer.StringProvider): - def __init__(self, content: str, ref: Optional[object] = None): + def __init__(self, content: Union[str, bytes], ref: Optional[object] = None): super().__init__(content) self.ref = ref diff --git a/tests/zim/conftest.py b/tests/zim/conftest.py index f5a7f59b..1efb5129 100644 --- a/tests/zim/conftest.py +++ b/tests/zim/conftest.py @@ -24,6 +24,26 @@ def html_str(): """ +@pytest.fixture(scope="function") +def html_str_cn(): + """sample HTML content with chinese characters""" + return """ + +