Skip to content

Commit 954c253

Browse files
committed
Add expected args to Item and subclasses init method
1 parent 44f3181 commit 954c253

File tree

3 files changed

+62
-22
lines changed

3 files changed

+62
-22
lines changed

Diff for: src/zimscraperlib/zim/filesystem.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ def __init__(
4747
root: pathlib.Path,
4848
filepath: pathlib.Path,
4949
):
50-
super().__init__(root=root, filepath=filepath)
50+
super().__init__()
51+
self.root = root
52+
self.filepath = filepath
5153
# first look inside the file's magic headers
5254
self.mimetype = get_file_mimetype(self.filepath)
5355
# most web-specific files are plain text. In this case, use extension

Diff for: src/zimscraperlib/zim/items.py

+57-21
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import re
1010
import tempfile
1111
import urllib.parse
12-
from typing import Any
12+
from typing import Any, Optional
1313

1414
import libzim.writer # pyright: ignore
1515

@@ -25,8 +25,19 @@
2525
class Item(libzim.writer.Item):
2626
"""libzim.writer.Item returning props for path/title/mimetype"""
2727

28-
def __init__(self, **kwargs: Any):
28+
def __init__(
29+
self,
30+
path: Optional[str] = None,
31+
title: Optional[str] = None,
32+
mimetype: Optional[str] = None,
33+
hints: Optional[dict] = None,
34+
**kwargs: Any,
35+
):
2936
super().__init__()
37+
self.path = path
38+
self.title = title
39+
self.mimetype = mimetype
40+
self.hints = hints
3041
for k, v in kwargs.items():
3142
setattr(self, k, v)
3243

@@ -35,16 +46,16 @@ def should_index(self):
3546
return self.get_mimetype().startswith("text/html")
3647

3748
def get_path(self) -> str:
38-
return getattr(self, "path", "")
49+
return self.path or ""
3950

4051
def get_title(self) -> str:
41-
return getattr(self, "title", "")
52+
return self.title or ""
4253

4354
def get_mimetype(self) -> str:
44-
return getattr(self, "mimetype", "")
55+
return self.mimetype or ""
4556

4657
def get_hints(self) -> dict:
47-
return getattr(self, "hints", {})
58+
return self.hints or {}
4859

4960

5061
class StaticItem(Item):
@@ -55,19 +66,37 @@ class StaticItem(Item):
5566
more efficiently: now when the libzim destroys the CP, python will destroy
5667
the Item and we can be notified that we're effectively through with our content"""
5768

69+
def __init__(
70+
self,
71+
content: Optional[str] = None,
72+
fileobj: Optional[io.IOBase] = None,
73+
filepath: Optional[pathlib.Path] = None,
74+
path: Optional[str] = None,
75+
title: Optional[str] = None,
76+
mimetype: Optional[str] = None,
77+
hints: Optional[dict] = None,
78+
**kwargs: Any,
79+
):
80+
super().__init__(
81+
path=path, title=title, mimetype=mimetype, hints=hints, **kwargs
82+
)
83+
self.content = content
84+
self.fileobj = fileobj
85+
self.filepath = filepath
86+
5887
def get_contentprovider(self) -> libzim.writer.ContentProvider:
5988
# content was set manually
60-
if getattr(self, "content", None) is not None:
89+
if self.content is not None:
6190
return StringProvider(content=self.content, ref=self)
6291

6392
# using a file-like object
64-
if getattr(self, "fileobj", None):
93+
if self.fileobj:
6594
return FileLikeProvider(
6695
fileobj=self.fileobj, ref=self, size=getattr(self, "size", None)
6796
)
6897

6998
# we had to download locally to get size
70-
if getattr(self, "filepath", None):
99+
if self.filepath:
71100
return FileProvider(
72101
filepath=self.filepath, ref=self, size=getattr(self, "size", None)
73102
)
@@ -104,10 +133,22 @@ def download_for_size(url, on_disk, tmp_dir=None):
104133
size, _ = stream_file(url.geturl(), fpath=fpath, byte_stream=stream)
105134
return fpath or stream, size
106135

107-
def __init__(self, url: str, **kwargs: Any):
108-
super().__init__(**kwargs)
136+
def __init__(
137+
self,
138+
url: str,
139+
path: Optional[str] = None,
140+
title: Optional[str] = None,
141+
mimetype: Optional[str] = None,
142+
hints: Optional[dict] = None,
143+
*,
144+
use_disk: bool = False,
145+
**kwargs: Any,
146+
):
147+
super().__init__(
148+
path=path, title=title, mimetype=mimetype, hints=hints, **kwargs
149+
)
109150
self.url = urllib.parse.urlparse(url)
110-
use_disk = getattr(self, "use_disk", False)
151+
self.use_disk = use_disk
111152

112153
# fetch headers to retrieve size and type
113154
try:
@@ -136,7 +177,7 @@ def __init__(self, url: str, **kwargs: Any):
136177
except Exception:
137178
# we couldn't retrieve size so we have to download resource to
138179
target, self.size = self.download_for_size(
139-
self.url, on_disk=use_disk, tmp_dir=getattr(self, "tmp_dir", None)
180+
self.url, on_disk=self.use_disk, tmp_dir=getattr(self, "tmp_dir", None)
140181
)
141182
# downloaded to disk and using a file path from now on
142183
if use_disk:
@@ -146,16 +187,11 @@ def __init__(self, url: str, **kwargs: Any):
146187
self.fileobj = target
147188

148189
def get_path(self) -> str:
149-
return getattr(self, "path", re.sub(r"^/", "", self.url.path))
150-
151-
def get_title(self) -> str:
152-
return getattr(self, "title", "")
190+
return self.path or re.sub(r"^/", "", self.url.path)
153191

154192
def get_mimetype(self) -> str:
155-
return getattr(
156-
self,
157-
"mimetype",
158-
self.headers.get("Content-Type", "application/octet-stream"),
193+
return self.mimetype or self.headers.get(
194+
"Content-Type", "application/octet-stream"
159195
)
160196

161197
def get_contentprovider(self):

Diff for: tests/zim/test_zim_creator.py

+2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ def get_contentprovider(self):
4040

4141
class FileLikeProviderItem(StaticItem):
4242
def get_contentprovider(self):
43+
if not self.fileobj:
44+
raise AttributeError("fileobj cannot be None")
4345
return FileLikeProvider(self.fileobj)
4446

4547

0 commit comments

Comments
 (0)