diff --git a/htmLawed.php b/htmLawed.php index e9fe93f..24b98d5 100755 --- a/htmLawed.php +++ b/htmLawed.php @@ -814,12 +814,19 @@ function hl_tag($t) $v = str_replace('', ' ', (false !== strpos($v, '&') ? str_replace(['', '', ''], ' ', $v) : $v)); // double-quoted char: soft-hyphen; appears here as "" or hyphen or something else depending on viewing software if ('srcset' === $k) { $v2 = ''; - $pattern = "/(?:\s*[^\"',\s]+(?:\s+(?:\d+w|\d+(?:\.\d+)?x)\s*)?)/"; + // Following pattern tries to implement srcset spec + // See https://html.spec.whatwg.org/dev/images.html#srcset-attributes + // See https://html.spec.whatwg.org/#parse-a-srcset-attribute + $pattern = "/(?:\s*(?:[^,\s][^\s]*[^,\s])(?:\s*\S*\s*))(?:,|$)/"; preg_match_all($pattern, $v, $matches); $matches = call_user_func_array('array_merge', $matches); foreach ($matches as $k1 => $v1) { - $v1 = explode(' ', ltrim($v1), 2); + $v1 = explode(' ', trim($v1, ', '), 2); $k1 = isset($v1[1]) ? trim($v1[1]) : ''; + if ('' !== $k1 && !preg_match('/(?:\d+(?:\.\d*)?[wx])/', $k1)) { + // We remove candidates with an invalid descriptor + continue; + } $v1 = trim($v1[0]); if (isset($v1[0])) { $v2 .= hl_prot($v1, $k) . (empty($k1) ? '' : ' ' . $k1) . ', '; diff --git a/tests/HTMLawedTest.php b/tests/HTMLawedTest.php index 1c875d2..3b67e1c 100644 --- a/tests/HTMLawedTest.php +++ b/tests/HTMLawedTest.php @@ -11,11 +11,16 @@ public function dataForImgSrcsetAttribute() '







