openzim
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/zimscraperlib/download.py‎
Lines changed: 18 additions & 13 deletions b/‎src/zimscraperlib/download.py‎
Lines changed: 18 additions & 13 deletions
diff --git a/‎src/zimscraperlib/filesystem.py‎
Lines changed: 1 addition & 1 deletion b/‎src/zimscraperlib/filesystem.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/zimscraperlib/html.py‎
Lines changed: 10 additions & 10 deletions b/‎src/zimscraperlib/html.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎src/zimscraperlib/i18n.py‎
Lines changed: 12 additions & 16 deletions b/‎src/zimscraperlib/i18n.py‎
Lines changed: 12 additions & 16 deletions
diff --git a/‎src/zimscraperlib/image/conversion.py‎
Lines changed: 2 additions & 0 deletions b/‎src/zimscraperlib/image/conversion.py‎
Lines changed: 2 additions & 0 deletions
@@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## Changed
 - **BREAKING** Renamed `zimscraperlib.image.convertion` to `zimscraperlib.image.conversion` to fix typo
 - **BREAKING** Many changes in type hints to match the real underlying code
-- **BREAKING** Force all boolean arguments to be keyword-only in function calls for clarity / disambiguation (see ruff rule FBT002)
+- **BREAKING** Force all boolean arguments (and some other non-obvious parameters) to be keyword-only in function calls for clarity / disambiguation (see ruff rule FBT002)
 - Prefer to use `IO[bytes]` to `io.BytesIO` when possible since it is more generic
 
 ### Fixed
 
@@ -9,6 +9,9 @@
 from typing import IO, ClassVar
 
 import requests
+import requests.adapters
+import requests.structures
+import urllib3.util
 import yt_dlp as youtube_dl
 
 from zimscraperlib import logger
@@ -41,7 +44,8 @@ def download(
         self,
         url: str,
         options: dict | None,
-        wait: bool | None = True,  # noqa: FBT002
+        *,
+        wait: bool | None = True,
     ) -> bool | Future:
         """Downloads video using initialized executor.
 
@@ -51,9 +55,7 @@ def download(
 
         Returns download result of future (wait=False)"""
 
-        future = self.executor.submit(
-            self._run_youtube_dl, url, options  # pyright: ignore
-        )
+        future = self.executor.submit(self._run_youtube_dl, url, options or {})
         if not wait:
             return future
         if not future.exception():
@@ -142,8 +144,8 @@ def save_large_file(url: str, fpath: pathlib.Path) -> None:
 
 def _get_retry_adapter(
     max_retries: int | None = 5,
-) -> requests.adapters.BaseAdapter:  # pyright: ignore
-    retries = requests.packages.urllib3.util.retry.Retry(  # pyright: ignore
+) -> requests.adapters.BaseAdapter:
+    retries = urllib3.util.retry.Retry(
         total=max_retries,  # total number of retries
         connect=max_retries,  # connection errors
         read=max_retries,  # read errors
@@ -160,7 +162,7 @@ def _get_retry_adapter(
         ],  # force retry on the following codes
     )
 
-    return requests.adapters.HTTPAdapter(max_retries=retries)  # pyright: ignore
+    return requests.adapters.HTTPAdapter(max_retries=retries)
 
 
 def get_session(max_retries: int | None = 5) -> requests.Session:
@@ -176,11 +178,12 @@ def stream_file(
     byte_stream: IO[bytes] | None = None,
     block_size: int | None = 1024,
     proxies: dict | None = None,
-    only_first_block: bool | None = False,  # noqa: FBT002
     max_retries: int | None = 5,
     headers: dict[str, str] | None = None,
     session: requests.Session | None = None,
-) -> tuple[int, requests.structures.CaseInsensitiveDict]:  # pyright: ignore
+    *,
+    only_first_block: bool | None = False,
+) -> tuple[int, requests.structures.CaseInsensitiveDict]:
     """Stream data from a URL to either a BytesIO object or a file
     Arguments -
         fpath - Path of the file where data is sent
@@ -211,12 +214,14 @@ def stream_file(
     total_downloaded = 0
     if fpath is not None:
         fp = open(fpath, "wb")
-    else:
+    elif (
+        byte_stream is not None
+    ):  # pragma: no branch (we use a precise condition to help type checker)
         fp = byte_stream
 
     for data in resp.iter_content(block_size):
         total_downloaded += len(data)
-        fp.write(data)  # pyright: ignore
+        fp.write(data)
 
         # stop downloading/reading if we're just testing first block
         if only_first_block:
@@ -225,7 +230,7 @@ def stream_file(
     logger.debug(f"Downloaded {total_downloaded} bytes from {url}")
 
     if fpath:
-        fp.close()  # pyright: ignore
+        fp.close()
     else:
-        fp.seek(0)  # pyright: ignore
+        fp.seek(0)
     return total_downloaded, resp.headers
@@ -30,7 +30,7 @@ def get_file_mimetype(fpath: pathlib.Path) -> str:
         return get_content_mimetype(fh.read(2048))
 
 
-def get_content_mimetype(content: bytes) -> str:
+def get_content_mimetype(content: bytes | str) -> str:
     """MIME Type of content retrieved from magic headers"""
 
     try:
 
@@ -7,24 +7,22 @@
 import pathlib
 from typing import BinaryIO, TextIO
 
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, element
 
 from zimscraperlib.types import ARTICLE_MIME
 
 
-def find_title_in(content: str | BinaryIO | TextIO, mime_type: str) -> str:
+def find_title_in(content: str | BinaryIO | TextIO, mime_type: str | None) -> str:
     """Extracted title from HTML content
 
     blank on failure to extract and non-HTML files"""
     if mime_type != ARTICLE_MIME:
         return ""
-    try:
-        return BeautifulSoup(content, "lxml").find("title").text  # pyright: ignore
-    except Exception:
-        return ""
+    title_tag = BeautifulSoup(content, "lxml").find("title")
+    return title_tag.text if title_tag else ""
 
 
-def find_title_in_file(fpath: pathlib.Path, mime_type: str) -> str:
+def find_title_in_file(fpath: pathlib.Path, mime_type: str | None) -> str:
     """Extracted title from an HTML file"""
     try:
         with open(fpath) as fh:
@@ -45,15 +43,17 @@ def find_language_in(content: str | BinaryIO | TextIO, mime_type: str) -> str:
         for key in keylist:
             node = soup.find(nodename)
             if node:
-                if not node.has_attr(key):  # pyright: ignore
+                if not isinstance(node, element.Tag) or (
+                    isinstance(node, element.Tag) and not node.has_attr(key)
+                ):
                     continue
                 if (
                     nodename == "meta"
-                    and not node.attrs.get("http-equiv", "").lower()  # pyright: ignore
+                    and not node.attrs.get("http-equiv", "").lower()
                     == "content-language"
                 ):
                     continue
-                return node.attrs[key]  # pyright: ignore
+                return node.attrs[key]
     return ""
 
 
 
@@ -15,7 +15,7 @@
 ISO_LEVELS = ["1", "2b", "2t", "3", "5"]
 
 
-class NotFound(ValueError):  # noqa: N818
+class NotFoundError(ValueError):
     pass
 
 
@@ -75,7 +75,7 @@ def get_iso_lang_data(lang: str) -> tuple[dict, dict | None]:
         iso639.exceptions.InvalidLanguageValue,
         iso639.exceptions.DeprecatedLanguageValue,
     ) as exc:
-        raise NotFound("Not a valid iso language name/code") from exc
+        raise NotFoundError("Not a valid iso language name/code") from exc
 
     def replace_types(new_type: str) -> str:
         # convert new iso_types from iso639-lang Pypi package to old iso_types from
@@ -112,34 +112,32 @@ def replace_types(new_type: str) -> str:
     return lang_data, None
 
 
-def find_language_names(query: str, lang_data: dict | None = None) -> tuple[str, str]:
+def find_language_names(
+    query: str, lang_data: dict | None = None
+) -> tuple[str | None, str | None]:
     """(native, english) language names for lang with help from language_details dict
 
     Falls back to English name if available or query if not"""
     if lang_data is None:
         lang_data = get_language_details(query, failsafe=True) or {}
     try:
         query_locale = babel.Locale.parse(query)
-        return query_locale.get_display_name(), query_locale.get_display_name(
-            "en"
-        )  # pyright: ignore
+        return query_locale.get_display_name(), query_locale.get_display_name("en")
     except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
         pass
 
     # ISO code lookup order matters (most qualified first)!
     for iso_level in [f"iso-639-{lang_}" for lang_ in reversed(ISO_LEVELS)]:
         try:
             query_locale = babel.Locale.parse(lang_data.get(iso_level))
-            return query_locale.get_display_name(), query_locale.get_display_name(
-                "en"
-            )  # pyright: ignore
+            return query_locale.get_display_name(), query_locale.get_display_name("en")
         except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
             pass
     default = lang_data.get("english", query)
     return default, default
 
 
-def update_with_macro(lang_data: dict, macro_data: dict):
+def update_with_macro(lang_data: dict, macro_data: dict | None):
     """update empty keys from lang_data with ones of macro_data"""
     if macro_data:
         for key, value in macro_data.items():
@@ -148,9 +146,7 @@ def update_with_macro(lang_data: dict, macro_data: dict):
     return lang_data
 
 
-def get_language_details(
-    query: str, failsafe: bool | None = False  # noqa: FBT002
-) -> dict:
+def get_language_details(query: str, *, failsafe: bool | None = False) -> dict | None:
     """language details dict from query.
 
     Raises NotFound or return `und` language details if failsafe
@@ -186,12 +182,12 @@ def get_language_details(
 
     try:
         lang_data, macro_data = get_iso_lang_data(adjusted_query)
-    except NotFound as exc:
+    except NotFoundError as exc:
         if failsafe:
-            return None  # pyright: ignore
+            return None
         raise exc
 
-    iso_data = update_with_macro(lang_data, macro_data)  # pyright: ignore
+    iso_data = update_with_macro(lang_data, macro_data)
     native_name, english_name = find_language_names(native_query, iso_data)
     iso_data.update(
         {
 
@@ -32,6 +32,8 @@ def convert_image(
     fmt = params.pop("fmt").upper() if "fmt" in params else None  # requested format
     if not fmt:
         fmt = format_for(dst)
+    if not fmt:
+        raise ValueError("Impossible to guess destination image format")
     with pilopen(src) as image:
         if image.mode == "RGBA" and fmt in ALPHA_NOT_SUPPORTED or colorspace:
             image = image.convert(colorspace or "RGB")  # noqa: PLW2901