Skip to content

Commit ea6505f

Browse files
authored
Merge pull request #209 from openzim/mindtouch_changes
Add access to base_href and url_rewriter in HTML tag rewriting
2 parents 0cfd96b + ecdae6a commit ea6505f

File tree

2 files changed

+55
-2
lines changed

2 files changed

+55
-2
lines changed

src/zimscraperlib/rewriting/html.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,11 @@ def handle_starttag(self, tag: str, attrs: AttrsList, *, auto_close: bool = Fals
178178

179179
if (
180180
rewritten := rules.do_tag_rewrite(
181-
tag=tag, attrs=attrs, auto_close=auto_close
181+
tag=tag,
182+
attrs=attrs,
183+
url_rewriter=self.url_rewriter,
184+
base_href=self.base_href,
185+
auto_close=auto_close,
182186
)
183187
) is not None:
184188
self.send(rewritten)
@@ -468,6 +472,8 @@ def do_tag_rewrite(
468472
self,
469473
tag: str,
470474
attrs: AttrsList,
475+
url_rewriter: ArticleUrlRewriter,
476+
base_href: str | None,
471477
*,
472478
auto_close: bool,
473479
) -> str | None:
@@ -484,6 +490,8 @@ def do_tag_rewrite(
484490
for arg_name, arg_value in { # pyright: ignore[reportUnknownVariableType]
485491
"tag": tag,
486492
"attrs": attrs,
493+
"url_rewriter": url_rewriter,
494+
"base_href": base_href,
487495
"auto_close": auto_close,
488496
}.items()
489497
if arg_name in _cached_signature(rule.func).parameters

tests/rewriting/test_html_rewriting.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1269,50 +1269,95 @@ def rewrite2_tag(
12691269
)
12701270

12711271

1272+
@rules.rewrite_tag()
1273+
def rewrite3_tag(
1274+
tag: str,
1275+
base_href: str | None,
1276+
url_rewriter: ArticleUrlRewriter,
1277+
) -> str | None:
1278+
if tag != "rewrite3":
1279+
return
1280+
rewriten_url = url_rewriter(
1281+
"https://www.acme.com/foo.img", base_href=base_href
1282+
).rewriten_url
1283+
return (
1284+
f'<rewriten src="{rewriten_url}" base="{base_href}" />'
1285+
if base_href
1286+
else f'<rewriten src="{rewriten_url}" />'
1287+
)
1288+
1289+
12721290
@pytest.mark.parametrize(
1273-
"tag, attrs, auto_close, expected_result",
1291+
"tag, attrs, auto_close, base_href, expected_result",
12741292
[
12751293
pytest.param(
12761294
"foo",
12771295
[],
12781296
False,
12791297
None,
1298+
None,
12801299
id="do_not_rewrite_foo_tag",
12811300
),
12821301
pytest.param(
12831302
"rewrite1",
12841303
[("attr2", "value2")],
12851304
False,
1305+
None,
12861306
"<rewriten attr1=value1 />",
12871307
id="rewrite1_tag",
12881308
),
12891309
pytest.param(
12901310
"rewrite2",
12911311
[("attr2", "value2")],
12921312
False,
1313+
None,
12931314
'<rewriten attr2="value2">',
12941315
id="rewrite2_tag_no_close",
12951316
),
12961317
pytest.param(
12971318
"rewrite2",
12981319
[("attr2", "value2")],
12991320
True,
1321+
None,
13001322
'<rewriten attr2="value2"/>',
13011323
id="rewrite2_tag_auto_close",
13021324
),
1325+
pytest.param(
1326+
"rewrite3",
1327+
[("attr2", "value2")],
1328+
True,
1329+
None,
1330+
'<rewriten src="https://www.acme.com/foo.img?queryparam" />',
1331+
id="rewrite3_use_url_rewriter",
1332+
),
1333+
pytest.param(
1334+
"rewrite3",
1335+
[("attr2", "value2")],
1336+
True,
1337+
"http://acme.com/base_value",
1338+
'<rewriten src="https://www.acme.com/foo.img?queryparam" '
1339+
'base="http://acme.com/base_value" />',
1340+
id="rewrite3_use_url_rewriter",
1341+
),
13031342
],
13041343
)
13051344
def test_html_tag_rewrite_rules(
13061345
tag: str,
13071346
attrs: AttrsList,
1347+
base_href: str | None,
13081348
*,
13091349
auto_close: bool,
13101350
expected_result: str | None,
1351+
simple_url_rewriter_gen: Callable[[str, str], ArticleUrlRewriter],
13111352
):
13121353
assert (
13131354
rules.do_tag_rewrite(
13141355
tag=tag,
13151356
attrs=attrs,
1357+
url_rewriter=simple_url_rewriter_gen(
1358+
"http://www.example.com", "?queryparam"
1359+
),
1360+
base_href=base_href,
13161361
auto_close=auto_close,
13171362
)
13181363
== expected_result

0 commit comments

Comments
 (0)