9
9
import re
10
10
import tempfile
11
11
import urllib .parse
12
- from typing import Any
12
+ from typing import Any , Optional
13
13
14
14
import libzim .writer # pyright: ignore
15
15
25
25
class Item (libzim .writer .Item ):
26
26
"""libzim.writer.Item returning props for path/title/mimetype"""
27
27
28
- def __init__ (self , ** kwargs : Any ):
28
+ def __init__ (
29
+ self ,
30
+ path : Optional [str ] = None ,
31
+ title : Optional [str ] = None ,
32
+ mimetype : Optional [str ] = None ,
33
+ hints : Optional [dict ] = None ,
34
+ ** kwargs : Any ,
35
+ ):
29
36
super ().__init__ ()
37
+ self .path = path
38
+ self .title = title
39
+ self .mimetype = mimetype
40
+ self .hints = hints
30
41
for k , v in kwargs .items ():
31
42
setattr (self , k , v )
32
43
@@ -35,16 +46,16 @@ def should_index(self):
35
46
return self .get_mimetype ().startswith ("text/html" )
36
47
37
48
def get_path (self ) -> str :
38
- return getattr ( self , " path" , "" )
49
+ return self . path or ""
39
50
40
51
def get_title (self ) -> str :
41
- return getattr ( self , " title" , "" )
52
+ return self . title or ""
42
53
43
54
def get_mimetype (self ) -> str :
44
- return getattr ( self , " mimetype" , "" )
55
+ return self . mimetype or ""
45
56
46
57
def get_hints (self ) -> dict :
47
- return getattr ( self , " hints" , {})
58
+ return self . hints or {}
48
59
49
60
50
61
class StaticItem (Item ):
@@ -55,19 +66,37 @@ class StaticItem(Item):
55
66
more efficiently: now when the libzim destroys the CP, python will destroy
56
67
the Item and we can be notified that we're effectively through with our content"""
57
68
69
+ def __init__ (
70
+ self ,
71
+ content : Optional [str ] = None ,
72
+ fileobj : Optional [io .IOBase ] = None ,
73
+ filepath : Optional [pathlib .Path ] = None ,
74
+ path : Optional [str ] = None ,
75
+ title : Optional [str ] = None ,
76
+ mimetype : Optional [str ] = None ,
77
+ hints : Optional [dict ] = None ,
78
+ ** kwargs : Any ,
79
+ ):
80
+ super ().__init__ (
81
+ path = path , title = title , mimetype = mimetype , hints = hints , ** kwargs
82
+ )
83
+ self .content = content
84
+ self .fileobj = fileobj
85
+ self .filepath = filepath
86
+
58
87
def get_contentprovider (self ) -> libzim .writer .ContentProvider :
59
88
# content was set manually
60
- if getattr ( self , " content" , None ) is not None :
89
+ if self . content is not None :
61
90
return StringProvider (content = self .content , ref = self )
62
91
63
92
# using a file-like object
64
- if getattr ( self , " fileobj" , None ) :
93
+ if self . fileobj :
65
94
return FileLikeProvider (
66
95
fileobj = self .fileobj , ref = self , size = getattr (self , "size" , None )
67
96
)
68
97
69
98
# we had to download locally to get size
70
- if getattr ( self , " filepath" , None ) :
99
+ if self . filepath :
71
100
return FileProvider (
72
101
filepath = self .filepath , ref = self , size = getattr (self , "size" , None )
73
102
)
@@ -104,10 +133,22 @@ def download_for_size(url, on_disk, tmp_dir=None):
104
133
size , _ = stream_file (url .geturl (), fpath = fpath , byte_stream = stream )
105
134
return fpath or stream , size
106
135
107
- def __init__ (self , url : str , ** kwargs : Any ):
108
- super ().__init__ (** kwargs )
136
+ def __init__ (
137
+ self ,
138
+ url : str ,
139
+ path : Optional [str ] = None ,
140
+ title : Optional [str ] = None ,
141
+ mimetype : Optional [str ] = None ,
142
+ hints : Optional [dict ] = None ,
143
+ * ,
144
+ use_disk : bool = False ,
145
+ ** kwargs : Any ,
146
+ ):
147
+ super ().__init__ (
148
+ path = path , title = title , mimetype = mimetype , hints = hints , ** kwargs
149
+ )
109
150
self .url = urllib .parse .urlparse (url )
110
- use_disk = getattr ( self , " use_disk" , False )
151
+ self . use_disk = use_disk
111
152
112
153
# fetch headers to retrieve size and type
113
154
try :
@@ -136,7 +177,7 @@ def __init__(self, url: str, **kwargs: Any):
136
177
except Exception :
137
178
# we couldn't retrieve size so we have to download resource to
138
179
target , self .size = self .download_for_size (
139
- self .url , on_disk = use_disk , tmp_dir = getattr (self , "tmp_dir" , None )
180
+ self .url , on_disk = self . use_disk , tmp_dir = getattr (self , "tmp_dir" , None )
140
181
)
141
182
# downloaded to disk and using a file path from now on
142
183
if use_disk :
@@ -146,16 +187,11 @@ def __init__(self, url: str, **kwargs: Any):
146
187
self .fileobj = target
147
188
148
189
def get_path (self ) -> str :
149
- return getattr (self , "path" , re .sub (r"^/" , "" , self .url .path ))
150
-
151
- def get_title (self ) -> str :
152
- return getattr (self , "title" , "" )
190
+ return self .path or re .sub (r"^/" , "" , self .url .path )
153
191
154
192
def get_mimetype (self ) -> str :
155
- return getattr (
156
- self ,
157
- "mimetype" ,
158
- self .headers .get ("Content-Type" , "application/octet-stream" ),
193
+ return self .mimetype or self .headers .get (
194
+ "Content-Type" , "application/octet-stream"
159
195
)
160
196
161
197
def get_contentprovider (self ):
0 commit comments