@@ -82,7 +82,7 @@ def __str__(self) -> str:
82
82
return f"HttpUrl({ self .value } )"
83
83
84
84
def __repr__ (self ) -> str :
85
- return f"{ self .__str__ } - { super (). __repr__ () } " # pragma: no cover
85
+ return f"HttpUrl( { self .value } ) " # pragma: no cover
86
86
87
87
@property
88
88
def value (self ) -> str :
@@ -124,7 +124,7 @@ def __str__(self) -> str:
124
124
return f"ZimPath({ self .value } )"
125
125
126
126
def __repr__ (self ) -> str :
127
- return f"{ self .__str__ } - { super (). __repr__ () } " # pragma: no cover
127
+ return f"ZimPath( { self .value } ) " # pragma: no cover
128
128
129
129
@property
130
130
def value (self ) -> str :
@@ -147,6 +147,12 @@ def check_validity(cls, value: str) -> None:
147
147
raise ValueError (f"Unexpected password in value: { value } { parts .password } " )
148
148
149
149
150
+ class RewriteResult (NamedTuple ):
151
+ absolute_url : str
152
+ rewriten_url : str
153
+ zim_path : ZimPath | None
154
+
155
+
150
156
class ArticleUrlRewriter :
151
157
"""
152
158
Rewrite urls in article.
@@ -176,16 +182,11 @@ def __init__(
176
182
missing_zim_paths: list of ZIM paths which are known to already be missing
177
183
from the existing_zim_paths ; usefull only in complement with this variable ;
178
184
new missing entries will be added as URLs are normalized in this function
179
-
180
- Results:
181
- items_to_download: populated with the list of rewritten URLs, so that one
182
- might use it to download items after rewriting the document
183
185
"""
184
186
self .article_path = article_path or ArticleUrlRewriter .normalize (article_url )
185
187
self .article_url = article_url
186
188
self .existing_zim_paths = existing_zim_paths
187
189
self .missing_zim_paths = missing_zim_paths
188
- self .items_to_download : dict [ZimPath , HttpUrl ] = {}
189
190
190
191
def get_item_path (self , item_url : str , base_href : str | None ) -> ZimPath :
191
192
"""Utility to transform an item URL into a ZimPath"""
@@ -201,7 +202,7 @@ def __call__(
201
202
base_href : str | None ,
202
203
* ,
203
204
rewrite_all_url : bool = True ,
204
- ) -> str :
205
+ ) -> RewriteResult :
205
206
"""Rewrite a url contained in a article.
206
207
207
208
The url is "fully" rewrited to point to a normalized entry path
@@ -210,17 +211,25 @@ def __call__(
210
211
try :
211
212
item_url = item_url .strip ()
212
213
214
+ item_absolute_url = urljoin (
215
+ urljoin (self .article_url .value , base_href ), item_url
216
+ )
217
+
213
218
# Make case of standalone fragments more straightforward
214
219
if item_url .startswith ("#" ):
215
- return item_url
220
+ return RewriteResult (
221
+ absolute_url = item_absolute_url ,
222
+ rewriten_url = item_url ,
223
+ zim_path = None ,
224
+ )
216
225
217
226
item_scheme = urlsplit (item_url ).scheme
218
227
if item_scheme and item_scheme not in ("http" , "https" ):
219
- return item_url
220
-
221
- item_absolute_url = urljoin (
222
- urljoin ( self . article_url . value , base_href ), item_url
223
- )
228
+ return RewriteResult (
229
+ absolute_url = item_absolute_url ,
230
+ rewriten_url = item_url ,
231
+ zim_path = None ,
232
+ )
224
233
225
234
item_fragment = urlsplit (item_absolute_url ).fragment
226
235
@@ -229,9 +238,11 @@ def __call__(
229
238
if rewrite_all_url or (
230
239
self .existing_zim_paths and item_path in self .existing_zim_paths
231
240
):
232
- if item_path not in self .items_to_download :
233
- self .items_to_download [item_path ] = HttpUrl (item_absolute_url )
234
- return self .get_document_uri (item_path , item_fragment )
241
+ return RewriteResult (
242
+ absolute_url = item_absolute_url ,
243
+ rewriten_url = self .get_document_uri (item_path , item_fragment ),
244
+ zim_path = item_path ,
245
+ )
235
246
else :
236
247
if (
237
248
self .missing_zim_paths is not None
@@ -242,7 +253,11 @@ def __call__(
242
253
# with duplicate messages
243
254
self .missing_zim_paths .add (item_path )
244
255
# The url doesn't point to a known entry
245
- return item_absolute_url
256
+ return RewriteResult (
257
+ absolute_url = item_absolute_url ,
258
+ rewriten_url = item_absolute_url ,
259
+ zim_path = item_path ,
260
+ )
246
261
247
262
except Exception as exc : # pragma: no cover
248
263
item_scheme = (
@@ -275,7 +290,11 @@ def __call__(
275
290
f"rewrite_all_url: { rewrite_all_url } " ,
276
291
exc_info = exc ,
277
292
)
278
- return item_url
293
+ return RewriteResult (
294
+ absolute_url = item_absolute_url ,
295
+ rewriten_url = item_url ,
296
+ zim_path = None ,
297
+ )
279
298
280
299
def get_document_uri (self , item_path : ZimPath , item_fragment : str ) -> str :
281
300
"""Given an ZIM item path and its fragment, get the URI to use in document
0 commit comments