Skip to content

Commit 824504f

Browse files
committed
Do not store useless properties on Items
We might have many Items, some of them long-lived, so it is maybe better to not store many properties with a None value because they are unused and hence keep a lean memory footprint
1 parent 954c253 commit 824504f

File tree

1 file changed

+40
-24
lines changed

1 file changed

+40
-24
lines changed

src/zimscraperlib/zim/items.py

+40-24
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,14 @@ def __init__(
3434
**kwargs: Any,
3535
):
3636
super().__init__()
37-
self.path = path
38-
self.title = title
39-
self.mimetype = mimetype
40-
self.hints = hints
37+
if path:
38+
kwargs["path"] = path
39+
if title:
40+
kwargs["title"] = title
41+
if mimetype:
42+
kwargs["mimetype"] = mimetype
43+
if hints:
44+
kwargs["hints"] = hints
4145
for k, v in kwargs.items():
4246
setattr(self, k, v)
4347

@@ -46,16 +50,16 @@ def should_index(self):
4650
return self.get_mimetype().startswith("text/html")
4751

4852
def get_path(self) -> str:
49-
return self.path or ""
53+
return getattr(self, "path", "")
5054

5155
def get_title(self) -> str:
52-
return self.title or ""
56+
return getattr(self, "title", "")
5357

5458
def get_mimetype(self) -> str:
55-
return self.mimetype or ""
59+
return getattr(self, "mimetype", "")
5660

5761
def get_hints(self) -> dict:
58-
return self.hints or {}
62+
return getattr(self, "hints", {})
5963

6064

6165
class StaticItem(Item):
@@ -77,28 +81,34 @@ def __init__(
7781
hints: Optional[dict] = None,
7882
**kwargs: Any,
7983
):
84+
if content:
85+
kwargs["content"] = content
86+
if fileobj:
87+
kwargs["fileobj"] = fileobj
88+
if filepath:
89+
kwargs["filepath"] = filepath
8090
super().__init__(
8191
path=path, title=title, mimetype=mimetype, hints=hints, **kwargs
8292
)
83-
self.content = content
84-
self.fileobj = fileobj
85-
self.filepath = filepath
8693

8794
def get_contentprovider(self) -> libzim.writer.ContentProvider:
8895
# content was set manually
89-
if self.content is not None:
90-
return StringProvider(content=self.content, ref=self)
96+
content = getattr(self, "content", None)
97+
if content is not None:
98+
return StringProvider(content=content, ref=self)
9199

92100
# using a file-like object
93-
if self.fileobj:
101+
fileobj = getattr(self, "fileobj", None)
102+
if fileobj:
94103
return FileLikeProvider(
95-
fileobj=self.fileobj, ref=self, size=getattr(self, "size", None)
104+
fileobj=fileobj, ref=self, size=getattr(self, "size", None)
96105
)
97106

98107
# we had to download locally to get size
99-
if self.filepath:
108+
filepath = getattr(self, "filepath", None)
109+
if filepath:
100110
return FileProvider(
101-
filepath=self.filepath, ref=self, size=getattr(self, "size", None)
111+
filepath=filepath, ref=self, size=getattr(self, "size", None)
102112
)
103113

104114
raise NotImplementedError("No data to provide`")
@@ -140,15 +150,16 @@ def __init__(
140150
title: Optional[str] = None,
141151
mimetype: Optional[str] = None,
142152
hints: Optional[dict] = None,
143-
*,
144-
use_disk: bool = False,
153+
use_disk: Optional[bool] = None,
145154
**kwargs: Any,
146155
):
156+
if use_disk:
157+
kwargs["use_disk"] = use_disk
147158
super().__init__(
148159
path=path, title=title, mimetype=mimetype, hints=hints, **kwargs
149160
)
150161
self.url = urllib.parse.urlparse(url)
151-
self.use_disk = use_disk
162+
use_disk = getattr(self, "use_disk", False)
152163

153164
# fetch headers to retrieve size and type
154165
try:
@@ -177,7 +188,7 @@ def __init__(
177188
except Exception:
178189
# we couldn't retrieve size so we have to download resource to
179190
target, self.size = self.download_for_size(
180-
self.url, on_disk=self.use_disk, tmp_dir=getattr(self, "tmp_dir", None)
191+
self.url, on_disk=use_disk, tmp_dir=getattr(self, "tmp_dir", None)
181192
)
182193
# downloaded to disk and using a file path from now on
183194
if use_disk:
@@ -187,11 +198,16 @@ def __init__(
187198
self.fileobj = target
188199

189200
def get_path(self) -> str:
190-
return self.path or re.sub(r"^/", "", self.url.path)
201+
return getattr(self, "path", re.sub(r"^/", "", self.url.path))
202+
203+
def get_title(self) -> str:
204+
return getattr(self, "title", "")
191205

192206
def get_mimetype(self) -> str:
193-
return self.mimetype or self.headers.get(
194-
"Content-Type", "application/octet-stream"
207+
return getattr(
208+
self,
209+
"mimetype",
210+
self.headers.get("Content-Type", "application/octet-stream"),
195211
)
196212

197213
def get_contentprovider(self):

0 commit comments

Comments
 (0)