Skip to content

Commit 36fd540

Browse files
committed
Fix srcset parsing when url contains commas
We split the parsing logic in two separate steps: 1/ Get all candidates and loop over them 2/ Eject candidates with their descriptor is invalid Signed-off-by: Kevin Decherf <[email protected]>
1 parent cc42f21 commit 36fd540

File tree

2 files changed

+16
-4
lines changed

2 files changed

+16
-4
lines changed

htmLawed.php

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -814,12 +814,19 @@ function hl_tag($t)
814814
$v = str_replace('­', ' ', (false !== strpos($v, '&') ? str_replace(['&#xad;', '&#173;', '&shy;'], ' ', $v) : $v)); // double-quoted char: soft-hyphen; appears here as "­" or hyphen or something else depending on viewing software
815815
if ('srcset' === $k) {
816816
$v2 = '';
817-
$pattern = "/(?:\s*[^\"',\s]+(?:\s+(?:\d+w|\d+(?:\.\d+)?x)\s*)?)/";
817+
// Following pattern tries to implement srcset spec
818+
// See https://html.spec.whatwg.org/dev/images.html#srcset-attributes
819+
// See https://html.spec.whatwg.org/#parse-a-srcset-attribute
820+
$pattern = "/(?:\s*(?:[^,\s][^\s]*[^,\s])(?:\s*\S*\s*))(?:,|$)/";
818821
preg_match_all($pattern, $v, $matches);
819822
$matches = call_user_func_array('array_merge', $matches);
820823
foreach ($matches as $k1 => $v1) {
821-
$v1 = explode(' ', ltrim($v1), 2);
824+
$v1 = explode(' ', trim($v1, ', '), 2);
822825
$k1 = isset($v1[1]) ? trim($v1[1]) : '';
826+
if ('' !== $k1 && !preg_match('/(?:\d+(?:\.\d*)?[wx])/', $k1)) {
827+
// We remove candidates with an invalid descriptor
828+
continue;
829+
}
823830
$v1 = trim($v1[0]);
824831
if (isset($v1[0])) {
825832
$v2 .= hl_prot($v1, $k) . (empty($k1) ? '' : ' ' . $k1) . ', ';

tests/HTMLawedTest.php

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,16 @@ public function dataForImgSrcsetAttribute()
1111
'<div><img src="a.jpg" alt="image a" srcset="a.jpg 100w, b.jpg 450w" /></div>',
1212
],
1313
'srcset with pixel ratio density' => [
14+
'<div><img src="a.jpg" alt="image a" srcset="a.jpg, b.jpg 1.5x,c.jpg 2x" /></div>',
1415
'<div><img src="a.jpg" alt="image a" srcset="a.jpg, b.jpg 1.5x, c.jpg 2x" /></div>',
1516
],
1617
'srcset with invalid descriptor' => [
1718
'<div><img src="a.jpg" alt="image a" srcset=" a.jpg , b.jpg x2" /></div>',
18-
'<div><img src="a.jpg" alt="image a" srcset="a.jpg, b.jpg, x2" /></div>',
19+
'<div><img src="a.jpg" alt="image a" srcset="a.jpg" /></div>',
20+
],
21+
'srcset with commas in resource path' => [
22+
'<div><img src="a.jpg" alt="image a" srcset="a.jpg,c_120 100w,b.jpg 450w" /></div>',
23+
'<div><img src="a.jpg" alt="image a" srcset="a.jpg,c_120 100w, b.jpg 450w" /></div>',
1924
],
2025
];
2126
}
@@ -27,6 +32,6 @@ public function testImgSrcsetAttribute($input, $expectedOutput = null)
2732
{
2833
$output = htmLawed($input);
2934

30-
$this->assertSame($output, $expectedOutput ?: $input);
35+
$this->assertSame($expectedOutput ?: $input, $output);
3136
}
3237
}

0 commit comments

Comments
 (0)