From bea4ed69fccc7de2375363d8940af9a96e986650 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 14 Sep 2024 00:55:41 +0200 Subject: Improve CSS filter to exclude content (#6807) * Improve CSS filter to exclude content Improve https://github.com/FreshRSS/FreshRSS/pull/6786 Fix bug when changing the CSS filter and reloading article, because original content was lost. Fix similar bug with pathEntries. * Forgotten i18n * Type check --- app/Models/Entry.php | 32 ++++++++++++++++++++++---------- app/i18n/cs/sub.php | 2 +- app/i18n/de/sub.php | 2 +- app/i18n/el/sub.php | 2 +- app/i18n/en-us/sub.php | 2 +- app/i18n/en/sub.php | 2 +- app/i18n/es/sub.php | 2 +- app/i18n/fr/sub.php | 2 +- app/i18n/he/sub.php | 2 +- app/i18n/hu/sub.php | 2 +- app/i18n/id/sub.php | 2 +- app/i18n/it/sub.php | 2 +- app/i18n/ja/sub.php | 2 +- app/i18n/ko/sub.php | 2 +- app/i18n/lv/sub.php | 2 +- app/i18n/nl/sub.php | 2 +- app/i18n/oc/sub.php | 2 +- app/i18n/pl/sub.php | 2 +- app/i18n/pt-br/sub.php | 2 +- app/i18n/ru/sub.php | 2 +- app/i18n/sk/sub.php | 2 +- app/i18n/tr/sub.php | 2 +- app/i18n/zh-cn/sub.php | 2 +- app/i18n/zh-tw/sub.php | 2 +- 24 files changed, 45 insertions(+), 33 deletions(-) diff --git a/app/Models/Entry.php b/app/Models/Entry.php index 35d1bed42..c1ae66f65 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -176,7 +176,8 @@ class FreshRSS_Entry extends Minz_Model { * Provides the original content without additional content potentially added by loadCompleteContent(). */ public function originalContent(): string { - return preg_replace('#.*#s', '', $this->content) ?? ''; + return $this->attributeString('original_content') ?? + preg_replace('#.*#s', '', $this->content) ?? ''; } /** @@ -897,13 +898,16 @@ HTML; $originalContent = $this->originalContent(); switch ($feed->attributeString('content_action')) { case 'prepend': + $this->_attribute('original_content'); $this->content = $fullContent . $originalContent; break; case 'append': + $this->_attribute('original_content'); $this->content = $originalContent . $fullContent; break; case 'replace': default: + $this->_attribute('original_content', $originalContent); $this->content = $fullContent; break; } @@ -915,12 +919,12 @@ HTML; } } } elseif (trim($feed->attributeString('path_entries_filter') ?? '') !== '') { + $originalContent = $this->attributeString('original_content') ?? $this->content; $doc = new DOMDocument(); $utf8BOM = "\xEF\xBB\xBF"; - if (!$doc->loadHTML($utf8BOM . $this->content, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) { + if (!$doc->loadHTML($utf8BOM . $originalContent, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) { return false; } - $modified = false; $xpath = new DOMXPath($doc); $filterednodes = $xpath->query((new Gt\CssXPath\Translator($feed->attributeString('path_entries_filter') ?? '', '//'))->asXPath()) ?: []; foreach ($filterednodes as $filterednode) { @@ -928,16 +932,24 @@ HTML; continue; } $filterednode->parentNode->removeChild($filterednode); - $modified = true; } - if ($modified) { - $html = $doc->saveHTML(); - if (!is_string($html)) { - return false; - } + $html = $doc->saveHTML($doc->getElementsByTagName('body')->item(0) ?? $doc->firstElementChild); + if (!is_string($html)) { + return false; + } + $html = preg_replace('%^\s*\s*|\s*\s*$%i', '', $html); + $this->_attribute('original_content'); + if (is_string($html) && $this->content !== $html) { + $this->_attribute('original_content', $originalContent); $this->content = $html; + return true; + } + } else { + $originalContent = $this->originalContent(); + if ($originalContent !== $this->content) { + $this->content = $originalContent; + return true; } - return $modified; } return false; } diff --git a/app/i18n/cs/sub.php b/app/i18n/cs/sub.php index c031e5efb..352189e90 100644 --- a/app/i18n/cs/sub.php +++ b/app/i18n/cs/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Přepínač CSS článku na původních webových stránkách', 'css_path_filter' => array( '_' => 'selektor CSS prvků, které mají být odstraněny', - 'help' => 'Selektor CSS může být seznam, například: .footer, .aside', + 'help' => 'Selektor CSS může být seznam, například: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Popis', 'empty' => 'Tento kanál je prázdný. Ověřte prosím, zda je stále udržován.', diff --git a/app/i18n/de/sub.php b/app/i18n/de/sub.php index ad622d911..f1dbed6fb 100644 --- a/app/i18n/de/sub.php +++ b/app/i18n/de/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'CSS-Selektor des Artikelinhaltes auf der Original-Webseite', 'css_path_filter' => array( '_' => 'CSS-Selector für die Elemente, die entfernt werden sollen', - 'help' => 'CSS-Selector könnte eine Liste sein, wie z.B.: .footer, .aside', + 'help' => 'CSS-Selector könnte eine Liste sein, wie z.B.: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Beschreibung', 'empty' => 'Dieser Feed ist leer. Bitte stellen Sie sicher, dass er noch gepflegt wird.', diff --git a/app/i18n/el/sub.php b/app/i18n/el/sub.php index b5bcc2eba..b69bd8f4c 100644 --- a/app/i18n/el/sub.php +++ b/app/i18n/el/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Article CSS selector on original website', // TODO 'css_path_filter' => array( '_' => 'CSS selector of the elements to remove', // TODO - 'help' => 'A CSS selector may be a list such as: .footer, .aside', // TODO + 'help' => 'A CSS selector may be a list such as: .footer, .aside, p[data-sanitized-class="menu"]', // TODO ), 'description' => 'Description', // TODO 'empty' => 'This feed is empty. Please verify that it is still maintained.', // TODO diff --git a/app/i18n/en-us/sub.php b/app/i18n/en-us/sub.php index 057f5158c..84fdb2e4d 100644 --- a/app/i18n/en-us/sub.php +++ b/app/i18n/en-us/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Article CSS selector on original website', // IGNORE 'css_path_filter' => array( '_' => 'CSS selector of the elements to remove', // IGNORE - 'help' => 'A CSS selector may be a list such as: .footer, .aside', // IGNORE + 'help' => 'A CSS selector may be a list such as: .footer, .aside, p[data-sanitized-class="menu"]', // IGNORE ), 'description' => 'Description', // IGNORE 'empty' => 'This feed is empty. Please verify that it is still maintained.', // IGNORE diff --git a/app/i18n/en/sub.php b/app/i18n/en/sub.php index 9b3f9e9e8..397cdb99c 100644 --- a/app/i18n/en/sub.php +++ b/app/i18n/en/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Article CSS selector on original website', 'css_path_filter' => array( '_' => 'CSS selector of the elements to remove', - 'help' => 'A CSS selector may be a list such as: .footer, .aside', + 'help' => 'A CSS selector may be a list such as: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Description', 'empty' => 'This feed is empty. Please verify that it is still maintained.', diff --git a/app/i18n/es/sub.php b/app/i18n/es/sub.php index a134beb57..21a21a879 100644 --- a/app/i18n/es/sub.php +++ b/app/i18n/es/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Ruta a la CSS de los artículos en la web original', 'css_path_filter' => array( '_' => 'Selector CSS de los elementos a remover', - 'help' => 'Un selector CSS puede ser una lista, por ejemplo: .footer, .aside', + 'help' => 'Un selector CSS puede ser una lista, por ejemplo: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Descripción', 'empty' => 'La fuente está vacía. Por favor, verifica que siga activa.', diff --git a/app/i18n/fr/sub.php b/app/i18n/fr/sub.php index 9bdbdbe5d..c8ba35d89 100644 --- a/app/i18n/fr/sub.php +++ b/app/i18n/fr/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Sélecteur CSS des articles sur le site d’origine', 'css_path_filter' => array( '_' => 'Sélecteur CSS des éléments à supprimer', - 'help' => 'Un sélecteur CSS peut être une liste comme : .footer, .aside', + 'help' => 'Un sélecteur CSS peut être une liste comme : .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Description', // IGNORE 'empty' => 'Ce flux est vide. Veuillez vérifier qu’il est toujours maintenu.', diff --git a/app/i18n/he/sub.php b/app/i18n/he/sub.php index 7ac401530..b7d67a308 100644 --- a/app/i18n/he/sub.php +++ b/app/i18n/he/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'נתיב הCSS של המאמר באתר המקורי', 'css_path_filter' => array( '_' => 'CSS selector of the elements to remove', // TODO - 'help' => 'A CSS selector may be a list such as: .footer, .aside', // TODO + 'help' => 'A CSS selector may be a list such as: .footer, .aside, p[data-sanitized-class="menu"]', // TODO ), 'description' => 'תיאור', 'empty' => 'הזנה זו ריקה. אנא ודאו שהיא עדיין מתוחזקת.', diff --git a/app/i18n/hu/sub.php b/app/i18n/hu/sub.php index 1b942f340..84e47fde9 100644 --- a/app/i18n/hu/sub.php +++ b/app/i18n/hu/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Cikk CSS selector az eredeti weblapon', 'css_path_filter' => array( '_' => 'A törlendő elemek CSS selectora', - 'help' => 'Egy CSS selector lehet egy lista például: .footer, .aside', + 'help' => 'Egy CSS selector lehet egy lista például: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Leírás', 'empty' => 'Ez a hírforrás üres. Ellenőrizd hogy van e tartalom rajta.', diff --git a/app/i18n/id/sub.php b/app/i18n/id/sub.php index 2cc0b8685..09354ab73 100644 --- a/app/i18n/id/sub.php +++ b/app/i18n/id/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Article CSS selector on original website', // TODO 'css_path_filter' => array( '_' => 'CSS selector of the elements to remove', // TODO - 'help' => 'A CSS selector may be a list such as: .footer, .aside', // TODO + 'help' => 'A CSS selector may be a list such as: .footer, .aside, p[data-sanitized-class="menu"]', // TODO ), 'description' => 'Description', // TODO 'empty' => 'This feed is empty. Please verify that it is still maintained.', // TODO diff --git a/app/i18n/it/sub.php b/app/i18n/it/sub.php index ae0a16123..b6f4506c9 100644 --- a/app/i18n/it/sub.php +++ b/app/i18n/it/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Percorso del foglio di stile CSS del sito di origine', 'css_path_filter' => array( '_' => 'Il selettore CSS degli elementi da rimuovere', - 'help' => 'Il selettore CSS potrebbe essere una lista, ad esempio: .footer, .aside', + 'help' => 'Il selettore CSS potrebbe essere una lista, ad esempio: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Descrizione', 'empty' => 'Questo feed non contiene articoli. Per favore verifica il sito direttamente.', diff --git a/app/i18n/ja/sub.php b/app/i18n/ja/sub.php index 614269caf..ad1b18622 100644 --- a/app/i18n/ja/sub.php +++ b/app/i18n/ja/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => '元のWebサイトから記事を抽出するCSSセレクタ', 'css_path_filter' => array( '_' => '要素を削除するCSSセレクタ', - 'help' => 'CSSセレクタは次のようなリストです: .footer, .aside', + 'help' => 'CSSセレクタは次のようなリストです: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => '説明', 'empty' => 'このフィードは空です。サイトが運営されているかどうかを確認してみてください。', diff --git a/app/i18n/ko/sub.php b/app/i18n/ko/sub.php index 57eb72ab7..42a263fe8 100644 --- a/app/i18n/ko/sub.php +++ b/app/i18n/ko/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => '웹사이트 상의 글 본문에 해당하는 CSS 경로', 'css_path_filter' => array( '_' => '제거할 요소의 CSS 선택자', - 'help' => 'CSS 선택자는 다음과 같은 목록일 수 있습니다: .footer, .aside', + 'help' => 'CSS 선택자는 다음과 같은 목록일 수 있습니다: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => '설명', 'empty' => '이 피드는 비어있습니다. 피드가 계속 운영되고 있는지 확인하세요.', diff --git a/app/i18n/lv/sub.php b/app/i18n/lv/sub.php index cc739194d..de86a6e08 100644 --- a/app/i18n/lv/sub.php +++ b/app/i18n/lv/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Raksta CSS selektors sākotnējā vietnē', 'css_path_filter' => array( '_' => 'Noņemamo elementu CSS selektors', - 'help' => 'CSS selektors var būt saraksts, piemēram.: .footer, .aside', + 'help' => 'CSS selektors var būt saraksts, piemēram.: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Apraksts', 'empty' => 'Šī barotne ir tukša. Lūdzu, pārbaudiet, vai tā joprojām tiek uzturēta.', diff --git a/app/i18n/nl/sub.php b/app/i18n/nl/sub.php index 036e631f0..c14ea5587 100644 --- a/app/i18n/nl/sub.php +++ b/app/i18n/nl/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'CSS-pad van artikelen op originele website', 'css_path_filter' => array( '_' => 'CSS selector van de elementen om te verwijderen', - 'help' => 'Een CSS selector kan een lijst zijn, zoals: .footer, .aside', + 'help' => 'Een CSS selector kan een lijst zijn, zoals: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Omschrijving', 'empty' => 'Deze feed is leeg. Controleer of deze nog actueel is.', diff --git a/app/i18n/oc/sub.php b/app/i18n/oc/sub.php index d503f0ead..12f14c00f 100644 --- a/app/i18n/oc/sub.php +++ b/app/i18n/oc/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Selector CSS dels articles sul site d’origina', 'css_path_filter' => array( '_' => 'Selector CSS de l’element de tirar', - 'help' => 'Un selector CSS pòt èsser una lista coma : .footer, .aside', + 'help' => 'Un selector CSS pòt èsser una lista coma : .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Descripcion', // IGNORE 'empty' => 'Aqueste flux es void. Assegurats-vos qu’es totjorn mantengut.', diff --git a/app/i18n/pl/sub.php b/app/i18n/pl/sub.php index 117ec1b88..f8bf7f2ff 100644 --- a/app/i18n/pl/sub.php +++ b/app/i18n/pl/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Selektor CSS dla wiadomości na pierwotnej stronie', 'css_path_filter' => array( '_' => 'Selektor CSS elementów do usunięcia', - 'help' => 'Selector CSS może być listą, na przykład: .footer, .aside', + 'help' => 'Selector CSS może być listą, na przykład: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Opis', 'empty' => 'Ten kanał jest pusty. Należy sprawdzić czy kanał w dalszym ciągu działa.', diff --git a/app/i18n/pt-br/sub.php b/app/i18n/pt-br/sub.php index db178e10c..581186309 100644 --- a/app/i18n/pt-br/sub.php +++ b/app/i18n/pt-br/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Caminho do CSS do artigo no site original', 'css_path_filter' => array( '_' => 'Seletor CSS dos elementos a serem removidos', - 'help' => 'O seletor CSS pode ser uma lista com: .footer, .aside', + 'help' => 'O seletor CSS pode ser uma lista com: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Descrição', 'empty' => 'Este feed está vazio. Por favor verifique ele ainda é mantido.', diff --git a/app/i18n/ru/sub.php b/app/i18n/ru/sub.php index 9c7047364..ab747abb4 100644 --- a/app/i18n/ru/sub.php +++ b/app/i18n/ru/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'CSS селектор статьи на сайте', 'css_path_filter' => array( '_' => 'CSS селектор элемента для удаления', - 'help' => 'CSS селектор может быть списком как: .footer, .aside', + 'help' => 'CSS селектор может быть списком как: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Описание', 'empty' => 'Лента пустая. Пожалуйста, убедитесь, что её до сих пор обслуживают.', diff --git a/app/i18n/sk/sub.php b/app/i18n/sk/sub.php index 8b30f5865..d0b1f5874 100644 --- a/app/i18n/sk/sub.php +++ b/app/i18n/sk/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Pôvodný CSS súbor článku z webovej stránky', 'css_path_filter' => array( '_' => 'CSS selektor elementu na odstránenie', - 'help' => 'CSS selektor môže byť zoznam ako: .footer, .aside', + 'help' => 'CSS selektor môže byť zoznam ako: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Popis', 'empty' => 'Tento kanál je prázdny. Overte, prosím, či je ešte spravovaný autorom.', diff --git a/app/i18n/tr/sub.php b/app/i18n/tr/sub.php index 6b7a0aaea..a550017ab 100644 --- a/app/i18n/tr/sub.php +++ b/app/i18n/tr/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => 'Makaleleri kendi CSS görünümü ile göster', 'css_path_filter' => array( '_' => 'Kaldırılacak elemana ait CSS seçicisi', - 'help' => 'CSS seçicisi şu şekilde olabilir: .footer, .aside', + 'help' => 'CSS seçicisi şu şekilde olabilir: .footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => 'Tanım', 'empty' => 'Bu akış boş. Lütfen akışın aktif olduğuna emin olun.', diff --git a/app/i18n/zh-cn/sub.php b/app/i18n/zh-cn/sub.php index f1d77db1b..36bbf115c 100644 --- a/app/i18n/zh-cn/sub.php +++ b/app/i18n/zh-cn/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => '原文的 CSS 选择器', 'css_path_filter' => array( '_' => '需移除元素的 CSS 选择器', - 'help' => '可设置多个 CSS 选择器,例如:.footer, .aside', + 'help' => '可设置多个 CSS 选择器,例如:.footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => '描述', 'empty' => '此源为空。请确认它是否正常更新。', diff --git a/app/i18n/zh-tw/sub.php b/app/i18n/zh-tw/sub.php index 7d118cd1b..f0997ef8f 100644 --- a/app/i18n/zh-tw/sub.php +++ b/app/i18n/zh-tw/sub.php @@ -63,7 +63,7 @@ return array( 'css_path' => '原文的 CSS 選擇器', 'css_path_filter' => array( '_' => '需移除元素的 CSS 選擇器', - 'help' => '可設置多個 CSS 選擇器,例如:.footer, .aside', + 'help' => '可設置多個 CSS 選擇器,例如:.footer, .aside, p[data-sanitized-class="menu"]', ), 'description' => '描述', 'empty' => '此源為空。請確認它是否正常更新。', -- cgit v1.2.3