aboutsummaryrefslogtreecommitdiff
path: root/app
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2024-12-11 23:23:50 +0100
committerGravatar GitHub <noreply@github.com> 2024-12-11 23:23:50 +0100
commit272af0f3c44a4a90e05cf8b5b1972c0b22ece3ad (patch)
tree15baf7a5d71783a2f3165c854853ccedf3454629 /app
parent12c659fb2fa21342d76b1a76c8c921117b9a48e5 (diff)
Improved CSS filter (#7091)
* Improved CSS filter Remove unwanted elements both before and after sanitizing fix https://github.com/FreshRSS/FreshRSS/issues/7084 Improved fix bug in https://github.com/FreshRSS/FreshRSS/commit/33fd07f6f26310d4806077cc87bcdf9b8b940e35#commitcomment-150152171 * fix typing
Diffstat (limited to 'app')
-rw-r--r--app/Controllers/subscriptionController.php4
-rw-r--r--app/Models/Entry.php41
2 files changed, 34 insertions, 11 deletions
diff --git a/app/Controllers/subscriptionController.php b/app/Controllers/subscriptionController.php
index 73680d9eb..b1b999a4a 100644
--- a/app/Controllers/subscriptionController.php
+++ b/app/Controllers/subscriptionController.php
@@ -299,7 +299,9 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController {
}
}
- $feed->_attribute('path_entries_conditions', Minz_Request::paramTextToArray('path_entries_conditions', plaintext: true));
+ $conditions = Minz_Request::paramTextToArray('path_entries_conditions', plaintext: true);
+ $conditions = array_filter(array_map('trim', $conditions));
+ $feed->_attribute('path_entries_conditions', empty($conditions) ? null : $conditions);
$feed->_attribute('path_entries_filter', Minz_Request::paramString('path_entries_filter', true));
$values = [
diff --git a/app/Models/Entry.php b/app/Models/Entry.php
index e742912ad..47d244bee 100644
--- a/app/Models/Entry.php
+++ b/app/Models/Entry.php
@@ -814,9 +814,12 @@ HTML;
if ($url === '' || $feed === null || $feed->pathEntries() === '') {
return '';
}
- if (!empty($feed->attributeArray('path_entries_conditions'))) {
+
+ $conditions = $feed->attributeArray('path_entries_conditions') ?? [];
+ $conditions = array_filter(array_map(fn($v) => is_string($v) ? trim($v) : '', $conditions));
+ if (count($conditions) > 0) {
$found = false;
- foreach ($feed->attributeArray('path_entries_conditions') as $condition) {
+ foreach ($conditions as $condition) {
if (!is_string($condition) || trim($condition) === '') {
continue;
}
@@ -860,22 +863,16 @@ HTML;
$base = (parse_url($url, PHP_URL_SCHEME) ?? 'https') . ':' . $base;
}
- unset($xpath, $doc);
- $html = sanitizeHTML($html, $base);
- $doc = new DOMDocument();
- $utf8BOM = "\xEF\xBB\xBF";
- $doc->loadHTML($utf8BOM . $html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
- $xpath = new DOMXPath($doc);
-
$html = '';
$cssSelector = htmlspecialchars_decode($feed->pathEntries(), ENT_QUOTES);
$cssSelector = trim($cssSelector, ', ');
+ $path_entries_filter = trim($feed->attributeString('path_entries_filter') ?? '', ', ');
$nodes = $xpath->query((new Gt\CssXPath\Translator($cssSelector, '//'))->asXPath());
if ($nodes != false) {
- $path_entries_filter = trim($feed->attributeString('path_entries_filter') ?? '');
$filter_xpath = $path_entries_filter === '' ? '' : (new Gt\CssXPath\Translator($path_entries_filter, 'descendant-or-self::'))->asXPath();
foreach ($nodes as $node) {
if ($filter_xpath !== '') {
+ // Remove unwanted elements once before sanitizing, for CSS selectors to also match original content
$filterednodes = $xpath->query($filter_xpath, $node) ?: [];
foreach ($filterednodes as $filterednode) {
if ($filterednode === $node) {
@@ -890,6 +887,30 @@ HTML;
$html .= $doc->saveHTML($node) . "\n";
}
}
+
+ unset($xpath, $doc);
+ $html = sanitizeHTML($html, $base);
+
+ if ($path_entries_filter !== '') {
+ // Remove unwanted elements again after sanitizing, for CSS selectors to also match sanitized content
+ $modified = false;
+ $doc = new DOMDocument();
+ $utf8BOM = "\xEF\xBB\xBF";
+ $doc->loadHTML($utf8BOM . $html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
+ $xpath = new DOMXPath($doc);
+ $filterednodes = $xpath->query((new Gt\CssXPath\Translator($path_entries_filter, '//'))->asXPath()) ?: [];
+ foreach ($filterednodes as $filterednode) {
+ if (!($filterednode instanceof DOMElement) || $filterednode->parentNode === null) {
+ continue;
+ }
+ $filterednode->parentNode->removeChild($filterednode);
+ $modified = true;
+ }
+ if ($modified) {
+ $html = $doc->saveHTML($doc->getElementsByTagName('body')->item(0) ?? $doc->firstElementChild) ?: $html;
+ }
+ }
+
return trim($html);
} else {
throw new Minz_Exception();