From 96e0efa6f09271f34941f7078179921e194395ab Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Fri, 12 Aug 2022 13:06:04 +0200 Subject: Get content by parsing: fix base for protocol-relative URLs (#4500) * Get content by parsing: fix base for protocol-relative URLs * Guess missing URL scheme for base * Light refactoring --- app/Models/Entry.php | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'app/Models/Entry.php') diff --git a/app/Models/Entry.php b/app/Models/Entry.php index e383f9060..0be353c28 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -547,9 +547,13 @@ class FreshRSS_Entry extends Minz_Model { } $base = $xpath->evaluate('normalize-space(//base/@href)'); - if ($base != false && is_string($base)) { - $url = $base; + if ($base == false || !is_string($base)) { + $base = $url; + } elseif (substr($base, 0, 2) === '//') { + //Protocol-relative URLs "//www.example.net" + $base = (parse_url($url, PHP_URL_SCHEME) ?? 'https') . ':' . $base; } + $content = ''; $nodes = $xpath->query(new Gt\CssXPath\Translator($path)); if ($nodes != false) { @@ -557,7 +561,7 @@ class FreshRSS_Entry extends Minz_Model { $content .= $doc->saveHtml($node) . "\n"; } } - $html = trim(sanitizeHTML($content, $url)); + $html = trim(sanitizeHTML($content, $base)); return $html; } else { throw new Exception(); -- cgit v1.2.3