99import re
1010import tempfile
1111import urllib .parse
12- from typing import Any
12+ from typing import Any , Optional
1313
1414import libzim .writer # pyright: ignore
1515
2525class Item (libzim .writer .Item ):
2626 """libzim.writer.Item returning props for path/title/mimetype"""
2727
28- def __init__ (self , ** kwargs : Any ):
28+ def __init__ (
29+ self ,
30+ path : Optional [str ] = None ,
31+ title : Optional [str ] = None ,
32+ mimetype : Optional [str ] = None ,
33+ hints : Optional [dict ] = None ,
34+ ** kwargs : Any ,
35+ ):
2936 super ().__init__ ()
37+ self .path = path
38+ self .title = title
39+ self .mimetype = mimetype
40+ self .hints = hints
3041 for k , v in kwargs .items ():
3142 setattr (self , k , v )
3243
@@ -35,16 +46,16 @@ def should_index(self):
3546 return self .get_mimetype ().startswith ("text/html" )
3647
3748 def get_path (self ) -> str :
38- return getattr ( self , " path" , "" )
49+ return self . path or ""
3950
4051 def get_title (self ) -> str :
41- return getattr ( self , " title" , "" )
52+ return self . title or ""
4253
4354 def get_mimetype (self ) -> str :
44- return getattr ( self , " mimetype" , "" )
55+ return self . mimetype or ""
4556
4657 def get_hints (self ) -> dict :
47- return getattr ( self , " hints" , {})
58+ return self . hints or {}
4859
4960
5061class StaticItem (Item ):
@@ -55,19 +66,37 @@ class StaticItem(Item):
5566 more efficiently: now when the libzim destroys the CP, python will destroy
5667 the Item and we can be notified that we're effectively through with our content"""
5768
69+ def __init__ (
70+ self ,
71+ content : Optional [str ] = None ,
72+ fileobj : Optional [io .IOBase ] = None ,
73+ filepath : Optional [pathlib .Path ] = None ,
74+ path : Optional [str ] = None ,
75+ title : Optional [str ] = None ,
76+ mimetype : Optional [str ] = None ,
77+ hints : Optional [dict ] = None ,
78+ ** kwargs : Any ,
79+ ):
80+ super ().__init__ (
81+ path = path , title = title , mimetype = mimetype , hints = hints , ** kwargs
82+ )
83+ self .content = content
84+ self .fileobj = fileobj
85+ self .filepath = filepath
86+
5887 def get_contentprovider (self ) -> libzim .writer .ContentProvider :
5988 # content was set manually
60- if getattr ( self , " content" , None ) is not None :
89+ if self . content is not None :
6190 return StringProvider (content = self .content , ref = self )
6291
6392 # using a file-like object
64- if getattr ( self , " fileobj" , None ) :
93+ if self . fileobj :
6594 return FileLikeProvider (
6695 fileobj = self .fileobj , ref = self , size = getattr (self , "size" , None )
6796 )
6897
6998 # we had to download locally to get size
70- if getattr ( self , " filepath" , None ) :
99+ if self . filepath :
71100 return FileProvider (
72101 filepath = self .filepath , ref = self , size = getattr (self , "size" , None )
73102 )
@@ -104,10 +133,22 @@ def download_for_size(url, on_disk, tmp_dir=None):
104133 size , _ = stream_file (url .geturl (), fpath = fpath , byte_stream = stream )
105134 return fpath or stream , size
106135
107- def __init__ (self , url : str , ** kwargs : Any ):
108- super ().__init__ (** kwargs )
136+ def __init__ (
137+ self ,
138+ url : str ,
139+ path : Optional [str ] = None ,
140+ title : Optional [str ] = None ,
141+ mimetype : Optional [str ] = None ,
142+ hints : Optional [dict ] = None ,
143+ * ,
144+ use_disk : bool = False ,
145+ ** kwargs : Any ,
146+ ):
147+ super ().__init__ (
148+ path = path , title = title , mimetype = mimetype , hints = hints , ** kwargs
149+ )
109150 self .url = urllib .parse .urlparse (url )
110- use_disk = getattr ( self , " use_disk" , False )
151+ self . use_disk = use_disk
111152
112153 # fetch headers to retrieve size and type
113154 try :
@@ -136,7 +177,7 @@ def __init__(self, url: str, **kwargs: Any):
136177 except Exception :
137178 # we couldn't retrieve size so we have to download resource to
138179 target , self .size = self .download_for_size (
139- self .url , on_disk = use_disk , tmp_dir = getattr (self , "tmp_dir" , None )
180+ self .url , on_disk = self . use_disk , tmp_dir = getattr (self , "tmp_dir" , None )
140181 )
141182 # downloaded to disk and using a file path from now on
142183 if use_disk :
@@ -146,16 +187,11 @@ def __init__(self, url: str, **kwargs: Any):
146187 self .fileobj = target
147188
148189 def get_path (self ) -> str :
149- return getattr (self , "path" , re .sub (r"^/" , "" , self .url .path ))
150-
151- def get_title (self ) -> str :
152- return getattr (self , "title" , "" )
190+ return self .path or re .sub (r"^/" , "" , self .url .path )
153191
154192 def get_mimetype (self ) -> str :
155- return getattr (
156- self ,
157- "mimetype" ,
158- self .headers .get ("Content-Type" , "application/octet-stream" ),
193+ return self .mimetype or self .headers .get (
194+ "Content-Type" , "application/octet-stream"
159195 )
160196
161197 def get_contentprovider (self ):
0 commit comments