From 5ba5271e48e1527f9188e38c0a113f6431d36ebd Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 8 Dec 2024 16:29:29 +0100 Subject: Sanitize before CSS manipulation (#7073) Allows using the same CSS filters for content coming from RSS feeds and from Web scraping fix https://github.com/FreshRSS/FreshRSS/issues/7039 https://github.com/FreshRSS/FreshRSS/issues/7014#issuecomment-2508987606 https://github.com/FreshRSS/FreshRSS/pull/7037 --- app/Models/Entry.php | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'app/Models/Entry.php') diff --git a/app/Models/Entry.php b/app/Models/Entry.php index 7c0d02ddb..9854da131 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -844,7 +844,13 @@ HTML; $base = (parse_url($url, PHP_URL_SCHEME) ?? 'https') . ':' . $base; } - $content = ''; + unset($xpath, $doc); + $html = sanitizeHTML($html, $base); + $doc = new DOMDocument(); + $doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING); + $xpath = new DOMXPath($doc); + + $html = ''; $cssSelector = htmlspecialchars_decode($feed->pathEntries(), ENT_QUOTES); $cssSelector = trim($cssSelector, ', '); $nodes = $xpath->query((new Gt\CssXPath\Translator($cssSelector, '//'))->asXPath()); @@ -864,11 +870,10 @@ HTML; $filterednode->parentNode->removeChild($filterednode); } } - $content .= $doc->saveHTML($node) . "\n"; + $html .= $doc->saveHTML($node) . "\n"; } } - $html = trim(sanitizeHTML($content, $base)); - return $html; + return trim($html); } else { throw new Minz_Exception(); } -- cgit v1.2.3