diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py index 37dc1714..70ae11ed 100644 --- a/src/zimscraperlib/zim/creator.py +++ b/src/zimscraperlib/zim/creator.py @@ -100,7 +100,7 @@ class Creator(libzim.writer.Creator): Use workaround_nocancel=False to disable the workaround. By default, all metadata are validated for compliance with openZIM guidelines and - conventions. Set disable_metadata_checks=False to disable this validation (you can + conventions. Set disable_metadata_checks=True to disable this validation (you can still do checks manually with the validation methods or your own logic). """ @@ -111,7 +111,7 @@ def __init__( compression: Optional[str] = None, workaround_nocancel: Optional[bool] = True, # noqa: FBT002 ignore_duplicates: Optional[bool] = False, # noqa: FBT002 - disable_metadata_checks: bool = True, # noqa: FBT001, FBT002 + disable_metadata_checks: bool = False, # noqa: FBT001, FBT002 ): super().__init__(filename=filename) self._metadata = {} @@ -148,7 +148,7 @@ def start(self): if not all(self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS): raise ValueError("Mandatory metadata are not all set.") - if self.disable_metadata_checks: + if not self.disable_metadata_checks: for name, value in self._metadata.items(): if value: self.validate_metadata(name, value) @@ -195,7 +195,7 @@ def add_metadata( content: Union[str, bytes, datetime.date, datetime.datetime, Iterable[str]], mimetype: str = "text/plain;charset=UTF-8", ): - if self.disable_metadata_checks: + if not self.disable_metadata_checks: self.validate_metadata(name, content) if name == "Date" and isinstance(content, (datetime.date, datetime.datetime)): content = content.strftime("%Y-%m-%d").encode("UTF-8") @@ -303,14 +303,6 @@ def add_item_for( if should_compress is not None: hints[libzim.writer.Hint.COMPRESS] = should_compress - kwargs = { - "path": path, - "title": title or "", - "mimetype": mimetype, - "filepath": fpath if fpath is not None else "", - "hints": hints, - "content": content, - } if delete_fpath and fpath: cb = [delete_callback, fpath] if callback and callable(callback): @@ -320,7 +312,16 @@ def add_item_for( callback = tuple(cb) self.add_item( - StaticItem(**kwargs), callback=callback, duplicate_ok=duplicate_ok + StaticItem( + path=path, + title=title, + mimetype=mimetype, + filepath=fpath, + hints=hints, + content=content, + ), + callback=callback, + duplicate_ok=duplicate_ok, ) return path diff --git a/src/zimscraperlib/zim/items.py b/src/zimscraperlib/zim/items.py index 5f4c6ed0..3176a194 100644 --- a/src/zimscraperlib/zim/items.py +++ b/src/zimscraperlib/zim/items.py @@ -9,7 +9,7 @@ import re import tempfile import urllib.parse -from typing import Any, Optional +from typing import Any, Optional, Union import libzim.writer # pyright: ignore @@ -34,13 +34,13 @@ def __init__( **kwargs: Any, ): super().__init__() - if path: + if path is not None: kwargs["path"] = path - if title: + if title is not None: kwargs["title"] = title - if mimetype: + if mimetype is not None: kwargs["mimetype"] = mimetype - if hints: + if hints is not None: kwargs["hints"] = hints for k, v in kwargs.items(): setattr(self, k, v) @@ -72,7 +72,7 @@ class StaticItem(Item): def __init__( self, - content: Optional[str] = None, + content: Optional[Union[str, bytes]] = None, fileobj: Optional[io.IOBase] = None, filepath: Optional[pathlib.Path] = None, path: Optional[str] = None, @@ -81,11 +81,11 @@ def __init__( hints: Optional[dict] = None, **kwargs: Any, ): - if content: + if content is not None: kwargs["content"] = content - if fileobj: + if fileobj is not None: kwargs["fileobj"] = fileobj - if filepath: + if filepath is not None: kwargs["filepath"] = filepath super().__init__( path=path, title=title, mimetype=mimetype, hints=hints, **kwargs @@ -95,6 +95,8 @@ def get_contentprovider(self) -> libzim.writer.ContentProvider: # content was set manually content = getattr(self, "content", None) if content is not None: + if not isinstance(content, (str, bytes)): + raise AttributeError(f"Unexpected type for content: {type(content)}") return StringProvider(content=content, ref=self) # using a file-like object @@ -153,7 +155,7 @@ def __init__( use_disk: Optional[bool] = None, **kwargs: Any, ): - if use_disk: + if use_disk is not None: kwargs["use_disk"] = use_disk super().__init__( path=path, title=title, mimetype=mimetype, hints=hints, **kwargs diff --git a/src/zimscraperlib/zim/providers.py b/src/zimscraperlib/zim/providers.py index a698b4f5..3180b247 100644 --- a/src/zimscraperlib/zim/providers.py +++ b/src/zimscraperlib/zim/providers.py @@ -31,7 +31,7 @@ def __init__( class StringProvider(libzim.writer.StringProvider): - def __init__(self, content: str, ref: Optional[object] = None): + def __init__(self, content: Union[str, bytes], ref: Optional[object] = None): super().__init__(content) self.ref = ref diff --git a/tests/zim/conftest.py b/tests/zim/conftest.py index f5a7f59b..1efb5129 100644 --- a/tests/zim/conftest.py +++ b/tests/zim/conftest.py @@ -24,6 +24,26 @@ def html_str(): """ +@pytest.fixture(scope="function") +def html_str_cn(): + """sample HTML content with chinese characters""" + return """ + +