Skip to content

Commit 63bc653

Browse files
committed
Fix all type hints, qa and typing issues
1 parent f71a2d7 commit 63bc653

32 files changed

+477
-282
lines changed

Diff for: CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1414
## Changed
1515
- **BREAKING** Renamed `zimscraperlib.image.convertion` to `zimscraperlib.image.conversion` to fix typo
1616
- **BREAKING** Many changes in type hints to match the real underlying code
17-
- **BREAKING** Force all boolean arguments to be keyword-only in function calls for clarity / disambiguation (see ruff rule FBT002)
17+
- **BREAKING** Force all boolean arguments (and some other non-obvious parameters) to be keyword-only in function calls for clarity / disambiguation (see ruff rule FBT002)
1818
- Prefer to use `IO[bytes]` to `io.BytesIO` when possible since it is more generic
1919

2020
### Fixed

Diff for: src/zimscraperlib/download.py

+18-13
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
from typing import IO, ClassVar
1010

1111
import requests
12+
import requests.adapters
13+
import requests.structures
14+
import urllib3.util
1215
import yt_dlp as youtube_dl
1316

1417
from zimscraperlib import logger
@@ -41,7 +44,8 @@ def download(
4144
self,
4245
url: str,
4346
options: dict | None,
44-
wait: bool | None = True, # noqa: FBT002
47+
*,
48+
wait: bool | None = True,
4549
) -> bool | Future:
4650
"""Downloads video using initialized executor.
4751
@@ -51,9 +55,7 @@ def download(
5155
5256
Returns download result of future (wait=False)"""
5357

54-
future = self.executor.submit(
55-
self._run_youtube_dl, url, options # pyright: ignore
56-
)
58+
future = self.executor.submit(self._run_youtube_dl, url, options or {})
5759
if not wait:
5860
return future
5961
if not future.exception():
@@ -142,8 +144,8 @@ def save_large_file(url: str, fpath: pathlib.Path) -> None:
142144

143145
def _get_retry_adapter(
144146
max_retries: int | None = 5,
145-
) -> requests.adapters.BaseAdapter: # pyright: ignore
146-
retries = requests.packages.urllib3.util.retry.Retry( # pyright: ignore
147+
) -> requests.adapters.BaseAdapter:
148+
retries = urllib3.util.retry.Retry(
147149
total=max_retries, # total number of retries
148150
connect=max_retries, # connection errors
149151
read=max_retries, # read errors
@@ -160,7 +162,7 @@ def _get_retry_adapter(
160162
], # force retry on the following codes
161163
)
162164

163-
return requests.adapters.HTTPAdapter(max_retries=retries) # pyright: ignore
165+
return requests.adapters.HTTPAdapter(max_retries=retries)
164166

165167

166168
def get_session(max_retries: int | None = 5) -> requests.Session:
@@ -176,11 +178,12 @@ def stream_file(
176178
byte_stream: IO[bytes] | None = None,
177179
block_size: int | None = 1024,
178180
proxies: dict | None = None,
179-
only_first_block: bool | None = False, # noqa: FBT002
180181
max_retries: int | None = 5,
181182
headers: dict[str, str] | None = None,
182183
session: requests.Session | None = None,
183-
) -> tuple[int, requests.structures.CaseInsensitiveDict]: # pyright: ignore
184+
*,
185+
only_first_block: bool | None = False,
186+
) -> tuple[int, requests.structures.CaseInsensitiveDict]:
184187
"""Stream data from a URL to either a BytesIO object or a file
185188
Arguments -
186189
fpath - Path of the file where data is sent
@@ -211,12 +214,14 @@ def stream_file(
211214
total_downloaded = 0
212215
if fpath is not None:
213216
fp = open(fpath, "wb")
214-
else:
217+
elif (
218+
byte_stream is not None
219+
): # pragma: no branch (we use a precise condition to help type checker)
215220
fp = byte_stream
216221

217222
for data in resp.iter_content(block_size):
218223
total_downloaded += len(data)
219-
fp.write(data) # pyright: ignore
224+
fp.write(data)
220225

221226
# stop downloading/reading if we're just testing first block
222227
if only_first_block:
@@ -225,7 +230,7 @@ def stream_file(
225230
logger.debug(f"Downloaded {total_downloaded} bytes from {url}")
226231

227232
if fpath:
228-
fp.close() # pyright: ignore
233+
fp.close()
229234
else:
230-
fp.seek(0) # pyright: ignore
235+
fp.seek(0)
231236
return total_downloaded, resp.headers

Diff for: src/zimscraperlib/filesystem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def get_file_mimetype(fpath: pathlib.Path) -> str:
3030
return get_content_mimetype(fh.read(2048))
3131

3232

33-
def get_content_mimetype(content: bytes) -> str:
33+
def get_content_mimetype(content: bytes | str) -> str:
3434
"""MIME Type of content retrieved from magic headers"""
3535

3636
try:

Diff for: src/zimscraperlib/html.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,22 @@
77
import pathlib
88
from typing import BinaryIO, TextIO
99

10-
from bs4 import BeautifulSoup
10+
from bs4 import BeautifulSoup, element
1111

1212
from zimscraperlib.types import ARTICLE_MIME
1313

1414

15-
def find_title_in(content: str | BinaryIO | TextIO, mime_type: str) -> str:
15+
def find_title_in(content: str | BinaryIO | TextIO, mime_type: str | None) -> str:
1616
"""Extracted title from HTML content
1717
1818
blank on failure to extract and non-HTML files"""
1919
if mime_type != ARTICLE_MIME:
2020
return ""
21-
try:
22-
return BeautifulSoup(content, "lxml").find("title").text # pyright: ignore
23-
except Exception:
24-
return ""
21+
title_tag = BeautifulSoup(content, "lxml").find("title")
22+
return title_tag.text if title_tag else ""
2523

2624

27-
def find_title_in_file(fpath: pathlib.Path, mime_type: str) -> str:
25+
def find_title_in_file(fpath: pathlib.Path, mime_type: str | None) -> str:
2826
"""Extracted title from an HTML file"""
2927
try:
3028
with open(fpath) as fh:
@@ -45,15 +43,17 @@ def find_language_in(content: str | BinaryIO | TextIO, mime_type: str) -> str:
4543
for key in keylist:
4644
node = soup.find(nodename)
4745
if node:
48-
if not node.has_attr(key): # pyright: ignore
46+
if not isinstance(node, element.Tag) or (
47+
isinstance(node, element.Tag) and not node.has_attr(key)
48+
):
4949
continue
5050
if (
5151
nodename == "meta"
52-
and not node.attrs.get("http-equiv", "").lower() # pyright: ignore
52+
and not node.attrs.get("http-equiv", "").lower()
5353
== "content-language"
5454
):
5555
continue
56-
return node.attrs[key] # pyright: ignore
56+
return node.attrs[key]
5757
return ""
5858

5959

Diff for: src/zimscraperlib/i18n.py

+12-16
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
ISO_LEVELS = ["1", "2b", "2t", "3", "5"]
1616

1717

18-
class NotFound(ValueError): # noqa: N818
18+
class NotFoundError(ValueError):
1919
pass
2020

2121

@@ -75,7 +75,7 @@ def get_iso_lang_data(lang: str) -> tuple[dict, dict | None]:
7575
iso639.exceptions.InvalidLanguageValue,
7676
iso639.exceptions.DeprecatedLanguageValue,
7777
) as exc:
78-
raise NotFound("Not a valid iso language name/code") from exc
78+
raise NotFoundError("Not a valid iso language name/code") from exc
7979

8080
def replace_types(new_type: str) -> str:
8181
# convert new iso_types from iso639-lang Pypi package to old iso_types from
@@ -112,34 +112,32 @@ def replace_types(new_type: str) -> str:
112112
return lang_data, None
113113

114114

115-
def find_language_names(query: str, lang_data: dict | None = None) -> tuple[str, str]:
115+
def find_language_names(
116+
query: str, lang_data: dict | None = None
117+
) -> tuple[str | None, str | None]:
116118
"""(native, english) language names for lang with help from language_details dict
117119
118120
Falls back to English name if available or query if not"""
119121
if lang_data is None:
120122
lang_data = get_language_details(query, failsafe=True) or {}
121123
try:
122124
query_locale = babel.Locale.parse(query)
123-
return query_locale.get_display_name(), query_locale.get_display_name(
124-
"en"
125-
) # pyright: ignore
125+
return query_locale.get_display_name(), query_locale.get_display_name("en")
126126
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
127127
pass
128128

129129
# ISO code lookup order matters (most qualified first)!
130130
for iso_level in [f"iso-639-{lang_}" for lang_ in reversed(ISO_LEVELS)]:
131131
try:
132132
query_locale = babel.Locale.parse(lang_data.get(iso_level))
133-
return query_locale.get_display_name(), query_locale.get_display_name(
134-
"en"
135-
) # pyright: ignore
133+
return query_locale.get_display_name(), query_locale.get_display_name("en")
136134
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
137135
pass
138136
default = lang_data.get("english", query)
139137
return default, default
140138

141139

142-
def update_with_macro(lang_data: dict, macro_data: dict):
140+
def update_with_macro(lang_data: dict, macro_data: dict | None):
143141
"""update empty keys from lang_data with ones of macro_data"""
144142
if macro_data:
145143
for key, value in macro_data.items():
@@ -148,9 +146,7 @@ def update_with_macro(lang_data: dict, macro_data: dict):
148146
return lang_data
149147

150148

151-
def get_language_details(
152-
query: str, failsafe: bool | None = False # noqa: FBT002
153-
) -> dict:
149+
def get_language_details(query: str, *, failsafe: bool | None = False) -> dict | None:
154150
"""language details dict from query.
155151
156152
Raises NotFound or return `und` language details if failsafe
@@ -186,12 +182,12 @@ def get_language_details(
186182

187183
try:
188184
lang_data, macro_data = get_iso_lang_data(adjusted_query)
189-
except NotFound as exc:
185+
except NotFoundError as exc:
190186
if failsafe:
191-
return None # pyright: ignore
187+
return None
192188
raise exc
193189

194-
iso_data = update_with_macro(lang_data, macro_data) # pyright: ignore
190+
iso_data = update_with_macro(lang_data, macro_data)
195191
native_name, english_name = find_language_names(native_query, iso_data)
196192
iso_data.update(
197193
{

Diff for: src/zimscraperlib/image/conversion.py

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ def convert_image(
3232
fmt = params.pop("fmt").upper() if "fmt" in params else None # requested format
3333
if not fmt:
3434
fmt = format_for(dst)
35+
if not fmt:
36+
raise ValueError("Impossible to guess destination image format")
3537
with pilopen(src) as image:
3638
if image.mode == "RGBA" and fmt in ALPHA_NOT_SUPPORTED or colorspace:
3739
image = image.convert(colorspace or "RGB") # noqa: PLW2901

0 commit comments

Comments
 (0)