diff --git a/htmLawed.php b/htmLawed.php index e9fe93f..24b98d5 100755 --- a/htmLawed.php +++ b/htmLawed.php @@ -814,12 +814,19 @@ function hl_tag($t) $v = str_replace('­', ' ', (false !== strpos($v, '&') ? str_replace(['­', '­', '­'], ' ', $v) : $v)); // double-quoted char: soft-hyphen; appears here as "­" or hyphen or something else depending on viewing software if ('srcset' === $k) { $v2 = ''; - $pattern = "/(?:\s*[^\"',\s]+(?:\s+(?:\d+w|\d+(?:\.\d+)?x)\s*)?)/"; + // Following pattern tries to implement srcset spec + // See https://html.spec.whatwg.org/dev/images.html#srcset-attributes + // See https://html.spec.whatwg.org/#parse-a-srcset-attribute + $pattern = "/(?:\s*(?:[^,\s][^\s]*[^,\s])(?:\s*\S*\s*))(?:,|$)/"; preg_match_all($pattern, $v, $matches); $matches = call_user_func_array('array_merge', $matches); foreach ($matches as $k1 => $v1) { - $v1 = explode(' ', ltrim($v1), 2); + $v1 = explode(' ', trim($v1, ', '), 2); $k1 = isset($v1[1]) ? trim($v1[1]) : ''; + if ('' !== $k1 && !preg_match('/(?:\d+(?:\.\d*)?[wx])/', $k1)) { + // We remove candidates with an invalid descriptor + continue; + } $v1 = trim($v1[0]); if (isset($v1[0])) { $v2 .= hl_prot($v1, $k) . (empty($k1) ? '' : ' ' . $k1) . ', '; diff --git a/tests/HTMLawedTest.php b/tests/HTMLawedTest.php index 1c875d2..3b67e1c 100644 --- a/tests/HTMLawedTest.php +++ b/tests/HTMLawedTest.php @@ -11,11 +11,16 @@ public function dataForImgSrcsetAttribute() '
image a
', ], 'srcset with pixel ratio density' => [ + '
image a
', '
image a
', ], 'srcset with invalid descriptor' => [ '
image a
', - '
image a
', + '
image a
', + ], + 'srcset with commas in resource path' => [ + '
image a
', + '
image a
', ], ]; } @@ -27,6 +32,6 @@ public function testImgSrcsetAttribute($input, $expectedOutput = null) { $output = htmLawed($input); - $this->assertSame($output, $expectedOutput ?: $input); + $this->assertSame($expectedOutput ?: $input, $output); } }