Skip to content

Commit 8b6cbac

Browse files
authored
Merge pull request #202 from openzim/tests-reg
Fix CI on main
2 parents 335d527 + e3fef00 commit 8b6cbac

File tree

10 files changed

+48
-30
lines changed

10 files changed

+48
-30
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ requires-python = ">=3.8,<3.13"
88
description = "Collection of python tools to re-use common code across scrapers"
99
readme = "README.md"
1010
dependencies = [
11-
"iso639-lang>=2.2.3,<3.0",
11+
"iso639-lang>=2.4.0,<3.0",
1212
"requests>=2.25.1,<3.0",
1313
"colorthief==0.2.1",
1414
"python-resize-image>=1.1.19,<1.2",
@@ -83,7 +83,7 @@ features = ["dev"]
8383
features = ["scripts", "test"]
8484

8585
[[tool.hatch.envs.test.matrix]]
86-
python = ["3.8", "3.9", "3.10", "3.11"]
86+
python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
8787

8888
[tool.hatch.envs.test.scripts]
8989
run = "inv test --args '{args}'"

src/zimscraperlib/i18n.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,9 @@ def replace_types(new_type: str) -> str:
111111
)
112112
lang_data.update({"english": isolang.name, "iso_types": iso_types})
113113

114-
if isolang.macro():
115-
return (
116-
lang_data,
117-
get_iso_lang_data(isolang.macro().name)[0],
118-
) # first item in the returned tuple
119-
return lang_data, None
114+
# first item in the returned tuple
115+
macro = isolang.macro()
116+
return (lang_data, get_iso_lang_data(macro.name)[0] if macro else None)
120117

121118

122119
def find_language_names(query: str, lang_data: Lang | None = None) -> tuple[str, str]:

src/zimscraperlib/zim/archive.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import libzim.suggestion # SuggestionSearcher # pyright: ignore
1919

2020
from zimscraperlib.zim._libkiwix import convertTags, parseMimetypeCounter
21-
from zimscraperlib.zim.items import Item
2221

2322

2423
class Archive(libzim.reader.Archive):
@@ -61,7 +60,7 @@ def get_entry_by_id(self, id_: int) -> libzim.reader.Entry:
6160
"""Entry from its Id in ZIM"""
6261
return self._get_entry_by_id(id_)
6362

64-
def get_item(self, path: str) -> Item:
63+
def get_item(self, path: str) -> libzim.reader.Item:
6564
"""Item from a path"""
6665
return self.get_entry_by_path(path).get_item()
6766

src/zimscraperlib/zim/creator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
import libzim.writer # pyright: ignore
3232
import PIL.Image
33-
import regex
33+
import regex # pyright: ignore [reportMissingModuleSource]
3434

3535
from zimscraperlib import logger
3636
from zimscraperlib.constants import (
@@ -491,7 +491,7 @@ def add_redirect(
491491

492492
try:
493493
try:
494-
super().add_redirection(path, title, target_path, hints)
494+
super().add_redirection(path, title or path, target_path, hints)
495495
except RuntimeError as exc:
496496
if not DUPLICATE_EXC_STR.match(str(exc)) or not duplicate_ok:
497497
raise exc

src/zimscraperlib/zim/items.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import re
1111
import tempfile
1212
import urllib.parse
13+
from collections.abc import Callable
1314
from typing import Any
1415

1516
import libzim.writer # pyright: ignore
@@ -65,6 +66,11 @@ def get_hints(self) -> dict:
6566
return getattr(self, "hints", {})
6667

6768

69+
def no_indexing_indexdata() -> IndexData:
70+
"""IndexData asking libzim not to index this item"""
71+
return IndexData("", "")
72+
73+
6874
class StaticItem(Item):
6975
"""scraperlib Item with auto contentProvider from `content` or `filepath`
7076
@@ -107,19 +113,17 @@ def __init__(
107113
path=path, title=title, mimetype=mimetype, hints=hints, **kwargs
108114
)
109115
if index_data:
110-
self.get_indexdata = lambda: index_data
116+
self.get_indexdata: Callable[[], IndexData] = lambda: index_data
111117
elif not auto_index:
112-
self.get_indexdata = lambda: IndexData("", "") # index nothing
118+
self.get_indexdata = no_indexing_indexdata # index nothing
113119
else:
114120
self._get_auto_index() # consider to add auto index
115121

116122
# Populate item title from index data if title is not set by caller
117-
if (
118-
(not hasattr(self, "title") or not self.title)
119-
and hasattr(self, "get_indexdata")
120-
and self.get_indexdata().get_title()
121-
):
122-
self.title = self.get_indexdata().get_title()
123+
if (not getattr(self, "title", None)) and hasattr(self, "get_indexdata"):
124+
title = self.get_indexdata().get_title()
125+
if title:
126+
self.title = title
123127

124128
def get_contentprovider(self) -> libzim.writer.ContentProvider:
125129
# content was set manually

src/zimscraperlib/zim/providers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import io
1515
import pathlib
16+
from typing import Generator
1617

1718
import libzim.writer # pyright: ignore
1819
import requests
@@ -61,7 +62,7 @@ def __init__(
6162
def get_size(self) -> int:
6263
return self.size # pyright: ignore
6364

64-
def gen_blob(self) -> libzim.writer.Blob:
65+
def gen_blob(self) -> Generator[libzim.writer.Blob, None, None]:
6566
yield libzim.writer.Blob(self.fileobj.getvalue()) # pragma: no cover
6667

6768

@@ -92,7 +93,7 @@ def get_size_of(url) -> int | None:
9293
def get_size(self) -> int:
9394
return self.size # pyright: ignore
9495

95-
def gen_blob(self) -> libzim.writer.Blob: # pragma: no cover
96+
def gen_blob(self) -> Generator[libzim.writer.Blob, None, None]: # pragma: no cover
9697
for chunk in self.resp.iter_content(10 * 1024):
9798
if chunk:
9899
yield libzim.writer.Blob(chunk)

tests/i18n/test_i18n.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,11 @@
8181
{
8282
"iso-639-1": "",
8383
"iso-639-2b": "afa",
84-
"iso-639-2t": "",
84+
"iso-639-2t": "afa",
8585
"iso-639-3": "",
8686
"iso-639-5": "afa",
8787
"english": "Afro-Asiatic languages",
88-
"iso_types": ["part2b", "part5"],
88+
"iso_types": ["part2b", "part2t", "part5"],
8989
"querytype": "purecode",
9090
"query": "afa",
9191
"native": "Afro-Asiatic languages",
@@ -96,7 +96,7 @@
9696
{
9797
"iso-639-1": "",
9898
"iso-639-2b": "afa",
99-
"iso-639-2t": "",
99+
"iso-639-2t": "afa",
100100
"iso-639-3": "",
101101
"iso-639-5": "afa",
102102
"english": "Afro-Asiatic languages",

tests/zim/test_fs.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python
22
# vim: ai ts=4 sts=4 et sw=4 nu
33

4+
import pathlib
45
import shutil
56
import subprocess
67
import sys
@@ -147,7 +148,17 @@ def test_make_zim_file_no_file_on_error(tmp_path, png_image, build_data):
147148
print("Program exiting")
148149
"""
149150

150-
py = subprocess.run([sys.executable, "-c", pycode], check=False)
151+
py = subprocess.run(
152+
[sys.executable, "-c", pycode],
153+
check=False,
154+
# using python3.9 on macOS15, calling this failed to find zimscraperlib
155+
# making the subprocess exit with 1
156+
env=(
157+
{"PYTHONPATH": str(pathlib.Path.cwd() / "src")}
158+
if sys.version_info[:2] == (3, 9)
159+
else None
160+
),
161+
)
151162
# returncode will be either 0 or -11, depending on garbage collection
152163
# in scrapers, we want to be able to fail on errors and absolutely don't want to
153164
# create a ZIM file, so SEGFAULT on exit it (somewhat) OK

tests/zim/test_indexing.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -296,10 +296,12 @@ def test_get_pdf_index_data(
296296
filepath=encrypted_pdf_file if pdf_no == 1 else big_pdf_file
297297
)
298298
assert index_data.get_title() == expected_title
299-
assert (
300-
index_data.get_content()
301-
== (encrypted_pdf_content if pdf_no == 1 else big_pdf_content).read_text()
299+
# actual index content is dependent on the MuPDF version used by PyMuPDF
300+
# this checks that index is large-enough
301+
content_size = len(
302+
(encrypted_pdf_content if pdf_no == 1 else big_pdf_content).read_text()
302303
)
304+
assert len(index_data.get_content()) >= content_size * 0.9
303305
assert index_data.has_indexdata()
304306
assert index_data.get_wordcount() == expected_word_count
305307
assert index_data.get_keywords() == ""

tests/zim/test_zim_creator.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,15 @@ def get_size(self) -> int:
3838

3939

4040
class SpecialURLProviderItem(StaticItem):
41+
url: str
42+
4143
def get_contentprovider(self):
4244
return SpecialURLProvider(self.url)
4345

4446

4547
class FileLikeProviderItem(StaticItem):
48+
fileobj: io.BytesIO
49+
4650
def get_contentprovider(self):
4751
if not self.fileobj:
4852
raise AttributeError("fileobj cannot be None")
@@ -125,7 +129,7 @@ def test_create_without_workaround(tmp_path):
125129
fpath, "welcome", workaround_nocancel=False
126130
).config_dev_metadata() as creator:
127131
with pytest.raises(RuntimeError, match="AttributeError"):
128-
creator.add_item("hello")
132+
creator.add_item("hello") # pyright: ignore [reportArgumentType]
129133

130134

131135
def test_noindexlanguage(tmp_path):

0 commit comments

Comments
 (0)