diff options
| author | 2024-12-11 23:23:50 +0100 | |
|---|---|---|
| committer | 2024-12-11 23:23:50 +0100 | |
| commit | 272af0f3c44a4a90e05cf8b5b1972c0b22ece3ad (patch) | |
| tree | 15baf7a5d71783a2f3165c854853ccedf3454629 /app | |
| parent | 12c659fb2fa21342d76b1a76c8c921117b9a48e5 (diff) | |
Improved CSS filter (#7091)
* Improved CSS filter
Remove unwanted elements both before and after sanitizing
fix https://github.com/FreshRSS/FreshRSS/issues/7084
Improved
fix bug in https://github.com/FreshRSS/FreshRSS/commit/33fd07f6f26310d4806077cc87bcdf9b8b940e35#commitcomment-150152171
* fix typing
Diffstat (limited to 'app')
| -rw-r--r-- | app/Controllers/subscriptionController.php | 4 | ||||
| -rw-r--r-- | app/Models/Entry.php | 41 |
2 files changed, 34 insertions, 11 deletions
diff --git a/app/Controllers/subscriptionController.php b/app/Controllers/subscriptionController.php index 73680d9eb..b1b999a4a 100644 --- a/app/Controllers/subscriptionController.php +++ b/app/Controllers/subscriptionController.php @@ -299,7 +299,9 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController { } } - $feed->_attribute('path_entries_conditions', Minz_Request::paramTextToArray('path_entries_conditions', plaintext: true)); + $conditions = Minz_Request::paramTextToArray('path_entries_conditions', plaintext: true); + $conditions = array_filter(array_map('trim', $conditions)); + $feed->_attribute('path_entries_conditions', empty($conditions) ? null : $conditions); $feed->_attribute('path_entries_filter', Minz_Request::paramString('path_entries_filter', true)); $values = [ diff --git a/app/Models/Entry.php b/app/Models/Entry.php index e742912ad..47d244bee 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -814,9 +814,12 @@ HTML; if ($url === '' || $feed === null || $feed->pathEntries() === '') { return ''; } - if (!empty($feed->attributeArray('path_entries_conditions'))) { + + $conditions = $feed->attributeArray('path_entries_conditions') ?? []; + $conditions = array_filter(array_map(fn($v) => is_string($v) ? trim($v) : '', $conditions)); + if (count($conditions) > 0) { $found = false; - foreach ($feed->attributeArray('path_entries_conditions') as $condition) { + foreach ($conditions as $condition) { if (!is_string($condition) || trim($condition) === '') { continue; } @@ -860,22 +863,16 @@ HTML; $base = (parse_url($url, PHP_URL_SCHEME) ?? 'https') . ':' . $base; } - unset($xpath, $doc); - $html = sanitizeHTML($html, $base); - $doc = new DOMDocument(); - $utf8BOM = "\xEF\xBB\xBF"; - $doc->loadHTML($utf8BOM . $html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING); - $xpath = new DOMXPath($doc); - $html = ''; $cssSelector = htmlspecialchars_decode($feed->pathEntries(), ENT_QUOTES); $cssSelector = trim($cssSelector, ', '); + $path_entries_filter = trim($feed->attributeString('path_entries_filter') ?? '', ', '); $nodes = $xpath->query((new Gt\CssXPath\Translator($cssSelector, '//'))->asXPath()); if ($nodes != false) { - $path_entries_filter = trim($feed->attributeString('path_entries_filter') ?? ''); $filter_xpath = $path_entries_filter === '' ? '' : (new Gt\CssXPath\Translator($path_entries_filter, 'descendant-or-self::'))->asXPath(); foreach ($nodes as $node) { if ($filter_xpath !== '') { + // Remove unwanted elements once before sanitizing, for CSS selectors to also match original content $filterednodes = $xpath->query($filter_xpath, $node) ?: []; foreach ($filterednodes as $filterednode) { if ($filterednode === $node) { @@ -890,6 +887,30 @@ HTML; $html .= $doc->saveHTML($node) . "\n"; } } + + unset($xpath, $doc); + $html = sanitizeHTML($html, $base); + + if ($path_entries_filter !== '') { + // Remove unwanted elements again after sanitizing, for CSS selectors to also match sanitized content + $modified = false; + $doc = new DOMDocument(); + $utf8BOM = "\xEF\xBB\xBF"; + $doc->loadHTML($utf8BOM . $html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING); + $xpath = new DOMXPath($doc); + $filterednodes = $xpath->query((new Gt\CssXPath\Translator($path_entries_filter, '//'))->asXPath()) ?: []; + foreach ($filterednodes as $filterednode) { + if (!($filterednode instanceof DOMElement) || $filterednode->parentNode === null) { + continue; + } + $filterednode->parentNode->removeChild($filterednode); + $modified = true; + } + if ($modified) { + $html = $doc->saveHTML($doc->getElementsByTagName('body')->item(0) ?? $doc->firstElementChild) ?: $html; + } + } + return trim($html); } else { throw new Minz_Exception(); |
