diff options
| author | 2024-10-13 15:28:45 +0200 | |
|---|---|---|
| committer | 2024-10-13 15:28:45 +0200 | |
| commit | ccb132523a4ee740d5b576574e9f44668021fbe6 (patch) | |
| tree | 0b6977a345c56eff277abb0bc9199b0010f003a8 | |
| parent | 91624037c7d73eb545478aab2f8abc55fc224453 (diff) | |
New feed mode: HTML + XPath + JSON dot notation (JSON in HTML) (#6888)
* New feed mode: HTML + XPath + JSON dot notation (JSON in HTML)
Same as `JSON+DotNotation` but first extracting the JSON string from an HTML document thanks to an XPath expression.
Example: `//script[@type='application/json']`
fix https://github.com/FreshRSS/FreshRSS/discussions/6876
* JavaScript UI to show/hide new field
* Casing xPathToJson
* Slight renaming
34 files changed, 277 insertions, 34 deletions
diff --git a/app/Controllers/feedController.php b/app/Controllers/feedController.php index 105099966..2b757177c 100755 --- a/app/Controllers/feedController.php +++ b/app/Controllers/feedController.php @@ -260,7 +260,7 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { if (!empty($xPathSettings)) { $attributes['xpath'] = $xPathSettings; } - } elseif ($feed_kind === FreshRSS_Feed::KIND_JSON_DOTNOTATION) { + } elseif ($feed_kind === FreshRSS_Feed::KIND_JSON_DOTNOTATION || $feed_kind === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { $jsonSettings = []; if (Minz_Request::paramString('jsonFeedTitle') !== '') { $jsonSettings['feedTitle'] = Minz_Request::paramString('jsonFeedTitle', true); @@ -298,6 +298,9 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { if (!empty($jsonSettings)) { $attributes['json_dotnotation'] = $jsonSettings; } + if (Minz_Request::paramString('xPathToJson', plaintext: true) !== '') { + $attributes['xPathToJson'] = Minz_Request::paramString('xPathToJson', plaintext: true); + } } try { @@ -509,6 +512,11 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { if ($simplePie === null) { throw new FreshRSS_Feed_Exception('JSON Feed parsing failed for [' . $feed->url(false) . ']'); } + } elseif ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { + $simplePie = $feed->loadJson(); + if ($simplePie === null) { + throw new FreshRSS_Feed_Exception('HTML+XPath+JSON parsing failed for [' . $feed->url(false) . ']'); + } } else { $simplePie = $feed->load(false, $feedIsNew); } diff --git a/app/Controllers/subscriptionController.php b/app/Controllers/subscriptionController.php index e2e3bbf11..683f403bc 100644 --- a/app/Controllers/subscriptionController.php +++ b/app/Controllers/subscriptionController.php @@ -244,7 +244,7 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController { $xPathSettings['itemUid'] = Minz_Request::paramString('xPathItemUid', true); if (!empty($xPathSettings)) $feed->_attribute('xpath', $xPathSettings); - } elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION) { + } elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION || $feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { $jsonSettings = []; if (Minz_Request::paramString('jsonFeedTitle') !== '') { $jsonSettings['feedTitle'] = Minz_Request::paramString('jsonFeedTitle', true); @@ -282,6 +282,9 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController { if (!empty($jsonSettings)) { $feed->_attribute('json_dotnotation', $jsonSettings); } + if (Minz_Request::paramString('xPathToJson', plaintext: true) !== '') { + $feed->_attribute('xPathToJson', Minz_Request::paramString('xPathToJson', plaintext: true)); + } } $feed->_attribute('path_entries_filter', Minz_Request::paramString('path_entries_filter', true)); diff --git a/app/Models/Feed.php b/app/Models/Feed.php index b9afa9e83..ad84c35a1 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -32,6 +32,8 @@ class FreshRSS_Feed extends Minz_Model { public const KIND_JSONFEED = 25; public const KIND_JSON_DOTNOTATION = 30; + /** JSON embedded in HTML */ + public const KIND_HTML_XPATH_JSON_DOTNOTATION = 35; public const PRIORITY_IMPORTANT = 20; public const PRIORITY_MAIN_STREAM = 10; @@ -639,6 +641,24 @@ class FreshRSS_Feed extends Minz_Model { ]; } + private function extractJsonFromHtml(string $html): ?string { + $xPathToJson = $this->attributeString('xPathToJson') ?? ''; + if ($xPathToJson === '') { + return null; + } + + $doc = new DOMDocument(); + $doc->recover = true; + $doc->strictErrorChecking = false; + if (!$doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) { + return null; + } + + $xpath = new DOMXPath($doc); + $json = @$xpath->evaluate('normalize-space(' . $xPathToJson . ')'); + return is_string($json) ? $json : null; + } + public function loadJson(): ?\SimplePie\SimplePie { if ($this->url == '') { return null; @@ -648,14 +668,21 @@ class FreshRSS_Feed extends Minz_Model { return null; } - $httpAccept = 'json'; - $json = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions()); - if (strlen($json) <= 0) { + $httpAccept = $this->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION ? 'html' : 'json'; + $content = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions()); + if (strlen($content) <= 0) { return null; } + if ($this->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { + $content = $this->extractJsonFromHtml($content); + if ($content == null) { + return null; + } + } + //check if the content is actual JSON - $jf = json_decode($json, true); + $jf = json_decode($content, true); if (json_last_error() !== JSON_ERROR_NONE || !is_array($jf)) { return null; } diff --git a/app/Services/ExportService.php b/app/Services/ExportService.php index 37bcdc6e3..c532308d7 100644 --- a/app/Services/ExportService.php +++ b/app/Services/ExportService.php @@ -23,6 +23,7 @@ class FreshRSS_Export_Service { final public const TYPE_JSON_DOTPATH = 'JSON+DotPath'; // Legacy 1.24.0-dev final public const TYPE_JSON_DOTNOTATION = 'JSON+DotNotation'; final public const TYPE_JSONFEED = 'JSONFeed'; + final public const TYPE_HTML_XPATH_JSON_DOTNOTATION = 'HTML+XPath+JSON+DotNotation'; /** * Initialize the service for the given user. diff --git a/app/Services/ImportService.php b/app/Services/ImportService.php index 1871c37e0..298c0ec21 100644 --- a/app/Services/ImportService.php +++ b/app/Services/ImportService.php @@ -168,6 +168,9 @@ class FreshRSS_Import_Service { case strtolower(FreshRSS_Export_Service::TYPE_JSONFEED): $feed->_kind(FreshRSS_Feed::KIND_JSONFEED); break; + case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH_JSON_DOTNOTATION): + $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION); + break; default: $feed->_kind(FreshRSS_Feed::KIND_RSS); break; @@ -257,6 +260,7 @@ class FreshRSS_Import_Service { if (!empty($jsonSettings)) { $feed->_attribute('json_dotnotation', $jsonSettings); } + $feed->_attribute('xPathToJson', $feed_elt['frss:xPathToJson'] ?? null); $curl_params = []; if (isset($feed_elt['frss:CURLOPT_COOKIE'])) { diff --git a/app/i18n/cs/sub.php b/app/i18n/cs/sub.php index cb9bd02c6..c60f47f95 100644 --- a/app/i18n/cs/sub.php +++ b/app/i18n/cs/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimální počet článků pro ponechání', 'kind' => array( '_' => 'Typ zdroje feedu', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON s tečkovým zápisem používá tečky mezi objekty a závorky pro pole. (e.g. <code>data.items[0].title</code>)', 'item' => array( '_' => 'vyhledávání nových <strong>položek</strong><br /><small>(nejdůležitější)</small>', - 'help' => 'JSON cesta k poli obsahujícímu položky, např.: <code>newsItems</code>', + 'help' => 'JSON cesta k poli obsahujícímu položky, např.: <code>$</code> or <code>newsItems</code>', // DIRTY ), 'item_author' => 'autor položky', 'item_categories' => 'štítky položky', diff --git a/app/i18n/de/sub.php b/app/i18n/de/sub.php index e9be33cf6..b8fba0a7e 100644 --- a/app/i18n/de/sub.php +++ b/app/i18n/de/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimale Anzahl an Artikeln, die behalten wird', 'kind' => array( '_' => 'Art der Feed-Quelle', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Webseite scannen)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON punktnotiert nutzt Punkte zwischen den Objekten und eckige Klammern für Arrays (e.g. <code>data.items[0].title</code>)', 'item' => array( '_' => 'News <strong>Items</strong> finden<br /><small>(sehr wichtig)</small>', - 'help' => 'JSON-Pfad zum Array, das die Items enthält, z.B. <code>newsItems</code>', + 'help' => 'JSON-Pfad zum Array, das die Items enthält, z.B. <code>$</code> or <code>newsItems</code>', // DIRTY ), 'item_author' => 'Item Autor', 'item_categories' => 'Item Hashtags', diff --git a/app/i18n/el/sub.php b/app/i18n/el/sub.php index 80f420811..47e442b13 100644 --- a/app/i18n/el/sub.php +++ b/app/i18n/el/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimum number of articles to keep', // TODO 'kind' => array( '_' => 'Type of feed source', // TODO + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // TODO 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)', // TODO 'item' => array( '_' => 'finding news <strong>items</strong><br /><small>(most important)</small>', // TODO - 'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>', // TODO + 'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/en-us/sub.php b/app/i18n/en-us/sub.php index 7f4469ff1..93e7bf21b 100644 --- a/app/i18n/en-us/sub.php +++ b/app/i18n/en-us/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimum number of articles to keep', // IGNORE 'kind' => array( '_' => 'Type of feed source', // IGNORE + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // IGNORE + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // IGNORE + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // IGNORE + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)', // IGNORE 'item' => array( '_' => 'finding news <strong>items</strong><br /><small>(most important)</small>', // IGNORE - 'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>', // IGNORE + 'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>', // IGNORE ), 'item_author' => 'item author', // IGNORE 'item_categories' => 'item tags', // IGNORE diff --git a/app/i18n/en/sub.php b/app/i18n/en/sub.php index f514df5f3..ca4da3409 100644 --- a/app/i18n/en/sub.php +++ b/app/i18n/en/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimum number of articles to keep', 'kind' => array( '_' => 'Type of feed source', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)', 'item' => array( '_' => 'finding news <strong>items</strong><br /><small>(most important)</small>', - 'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>', + 'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>', ), 'item_author' => 'item author', 'item_categories' => 'item tags', diff --git a/app/i18n/es/sub.php b/app/i18n/es/sub.php index cfb2bf550..268a520cc 100644 --- a/app/i18n/es/sub.php +++ b/app/i18n/es/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Número mínimo de artículos a conservar', 'kind' => array( '_' => 'Tipo de origen de la fuente', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'Un punto JSON anotado utiliza puntos entre objetos y corchetes para matrices (ejemplo: <code>data.items[0].title</code>)', 'item' => array( '_' => 'buscando nuevos <strong>items</strong><br /><small>(más importante)</small>', - 'help' => 'Ruta JSON a la matriz que contiene los elementos, ejemplo: <code>newsItems</code>', + 'help' => 'Ruta JSON a la matriz que contiene los elementos, ejemplo: <code>$</code> o <code>newsItems</code>', ), 'item_author' => 'autor del item', 'item_categories' => 'etiquetas del item', diff --git a/app/i18n/fa/sub.php b/app/i18n/fa/sub.php index 9aa388e27..8ad8f3090 100644 --- a/app/i18n/fa/sub.php +++ b/app/i18n/fa/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => ' حداقل تعداد مقالات برای نگهداری', 'kind' => array( '_' => ' نوع منبع خوراک', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => ' HTML + XPath (خراش دادن وب)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)', // TODO 'item' => array( '_' => 'finding news <strong>items</strong><br /><small>(most important)</small>', // TODO - 'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>', // TODO + 'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/fr/sub.php b/app/i18n/fr/sub.php index 6dd205356..2c6ec36cb 100644 --- a/app/i18n/fr/sub.php +++ b/app/i18n/fr/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Nombre minimum d’articles à conserver', 'kind' => array( '_' => 'Type de source de flux', + 'html_json' => array( + '_' => 'HTML + XPath + JSON notation point (JSON dans HTML)', + 'xpath' => array( + '_' => 'XPath pour JSON dans HTML', + 'help' => 'Exemple : <code>//script[@type="application/json"]</code>', + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Moissonnage du Web)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'La notation point pour JSON utilise le point comme séparateur objet, et des crochets pour un tableau : (ex : <code>data.items[0].title</code>)', 'item' => array( '_' => 'trouver les <strong>articles</strong><br /><small>(c’est le plus important)</small>', - 'help' => 'Chemin vers le tableau contenant les articles, par exemple <code>newsItems</code>', + 'help' => 'Chemin vers le tableau contenant les articles, par exemple <code>$</code> ou <code>newsItems</code>', ), 'item_author' => 'auteur de l’article', 'item_categories' => 'catégories (tags) de l’article', diff --git a/app/i18n/he/sub.php b/app/i18n/he/sub.php index bd5f2446d..4accafe33 100644 --- a/app/i18n/he/sub.php +++ b/app/i18n/he/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'מסםר מינימלי של מאמרים לשמור', 'kind' => array( '_' => 'Type of feed source', // TODO + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // TODO 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)', // TODO 'item' => array( '_' => 'finding news <strong>items</strong><br /><small>(most important)</small>', // TODO - 'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>', // TODO + 'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/hu/sub.php b/app/i18n/hu/sub.php index 7c45b2951..7b9a33d98 100644 --- a/app/i18n/hu/sub.php +++ b/app/i18n/hu/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Megtartandó cikkek minimális száma', 'kind' => array( '_' => 'Hírforrás típusa', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON pontjelölés pontokat használ az objektumok között és zárójeleket a tömbökhöz (pl. <code>data.items[0].title</code>)', 'item' => array( '_' => 'hírek keresése <strong>elemek</strong><br /><small>(legfontosabb)</small>', - 'help' => 'JSON útvonal az elemeket tartalmazó tömbhöz, pl. <code>newsItems</code>', + 'help' => 'JSON útvonal az elemeket tartalmazó tömbhöz, pl. <code>$</code> or <code>newsItems</code>', // DIRTY ), 'item_author' => 'elem szerző', 'item_categories' => 'elem címkék', diff --git a/app/i18n/id/sub.php b/app/i18n/id/sub.php index 6f637c316..d8fde0251 100644 --- a/app/i18n/id/sub.php +++ b/app/i18n/id/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimum number of articles to keep', // TODO 'kind' => array( '_' => 'Type of feed source', // TODO + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // TODO 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)', // TODO 'item' => array( '_' => 'finding news <strong>items</strong><br /><small>(most important)</small>', // TODO - 'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>', // TODO + 'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/it/sub.php b/app/i18n/it/sub.php index ff1c6f36b..e36a144ec 100644 --- a/app/i18n/it/sub.php +++ b/app/i18n/it/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Numero minimo di articoli da mantenere', 'kind' => array( '_' => 'Tipo di sorgente del feed', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)', // TODO 'item' => array( '_' => 'ricerca nuovi <strong>elementi</strong><br /><small>(più importante)</small>', - 'help' => 'percorso JSON per l’array contenente gli elementi, es. <code>newsItems</code>', + 'help' => 'percorso JSON per l’array contenente gli elementi, es. <code>$</code> o <code>newsItems</code>', ), 'item_author' => 'autore elemento', 'item_categories' => 'tag elemento', diff --git a/app/i18n/ja/sub.php b/app/i18n/ja/sub.php index 465637307..00234c0af 100644 --- a/app/i18n/ja/sub.php +++ b/app/i18n/ja/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => '最小数の記事は保持されます', 'kind' => array( '_' => 'フィードソースの種類', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (ウェブスクレイピング)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSONのドット記法は、オブジェクトの間にドットを使用し、配列には括弧を使用します。例: <code>data.items[0].title</code>', 'item' => array( '_' => 'ニュース<strong>項目</strong>を探す<br /><small>(最重要)</small>', - 'help' => '項目を含む配列へのJSONパス。 例: <code>newsItems</code>', + 'help' => '項目を含む配列へのJSONパス。 例: <code>$</code> or <code>newsItems</code>', // DIRTY ), 'item_author' => '項目の著者', 'item_categories' => '項目のタグ', diff --git a/app/i18n/ko/sub.php b/app/i18n/ko/sub.php index 470d61478..e28f22f74 100644 --- a/app/i18n/ko/sub.php +++ b/app/i18n/ko/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => '최소 유지 글 개수', 'kind' => array( '_' => '피드 소스 유형', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (웹 스크래핑)', 'feed_title' => array( diff --git a/app/i18n/lv/sub.php b/app/i18n/lv/sub.php index c5e5b388f..d81da3718 100644 --- a/app/i18n/lv/sub.php +++ b/app/i18n/lv/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimālais saglabājamo izstrādājumu skaits', 'kind' => array( '_' => 'Barotnes avota veids', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Tīmekļa nolasīšana)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)', // TODO 'item' => array( '_' => 'finding news <strong>items</strong><br /><small>(most important)</small>', // TODO - 'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>', // TODO + 'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/nl/sub.php b/app/i18n/nl/sub.php index 38cd24d10..817fa8b10 100644 --- a/app/i18n/nl/sub.php +++ b/app/i18n/nl/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimum aantal artikelen om te houden', 'kind' => array( '_' => 'Feedbron-type', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( diff --git a/app/i18n/oc/sub.php b/app/i18n/oc/sub.php index 4fcc50781..6b8ea93bb 100644 --- a/app/i18n/oc/sub.php +++ b/app/i18n/oc/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Nombre minimum d’articles de servar', 'kind' => array( '_' => 'Tipe de font de flux', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)', // TODO 'item' => array( '_' => 'finding news <strong>items</strong><br /><small>(most important)</small>', // TODO - 'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>', // TODO + 'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/pl/sub.php b/app/i18n/pl/sub.php index 13ad11bc3..4f99c2866 100644 --- a/app/i18n/pl/sub.php +++ b/app/i18n/pl/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimalna liczba wiadomości do do przechowywania', 'kind' => array( '_' => 'Rodzaj źródła kanału', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON oddzielający obiekty kropkami i używający nawiasów kwadratowych dla tablic (na przykład <code>data.items[0].title</code>)', 'item' => array( '_' => 'odnajdywanie <strong>wiadomości</strong><br /><small>(najważniejsze)</small>', - 'help' => 'Ścieżka w JSON-ie do tablicy zawierającej wiadomości, na przykład <code>newsItems</code>', + 'help' => 'Ścieżka w JSON-ie do tablicy zawierającej wiadomości, na przykład <code>$</code> or <code>newsItems</code>', // DIRTY ), 'item_author' => 'autor wiadomości', 'item_categories' => 'tagi wiadomości', diff --git a/app/i18n/pt-br/sub.php b/app/i18n/pt-br/sub.php index 2593b3648..d1c2f08e5 100644 --- a/app/i18n/pt-br/sub.php +++ b/app/i18n/pt-br/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Número mínimo de artigos para manter', 'kind' => array( '_' => 'Tipo de fonte de alimentação do Feed', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'Um JSON na notação de ponto usa pontos entre os objetos e colchetes para arrays (e.g. <code>data.items[0].title</code>)', 'item' => array( '_' => 'encontrando novidades <strong>itens</strong><br /><small>(mais importante)</small>', - 'help' => 'Caminho do JSON para o array contendo os itens, e.g. <code>newsItems</code>', + 'help' => 'Caminho do JSON para o array contendo os itens, e.g. <code>$</code> or <code>newsItems</code>', // DIRTY ), 'item_author' => 'autor do item', 'item_categories' => 'tags dos itens', diff --git a/app/i18n/ru/sub.php b/app/i18n/ru/sub.php index a9f802738..946f53177 100644 --- a/app/i18n/ru/sub.php +++ b/app/i18n/ru/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Оставлять статей не менее', 'kind' => array( '_' => 'Тип источника ленты', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (парсинг веб-страниц)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON с точечной нотацией использует точки между объектами и квадратные скобки для массивов (например: <code>data.items[0].title</code>)', 'item' => array( '_' => 'Найти новые <strong>элементы</strong><br /><small>(самое важное)</small>', - 'help' => 'JSON-путь к массиву, содержащему элементы, например: <code>newsItems</code>', + 'help' => 'JSON-путь к массиву, содержащему элементы, например: <code>$</code> or <code>newsItems</code>', // DIRTY ), 'item_author' => 'автор элемента', 'item_categories' => 'теги элемента', diff --git a/app/i18n/sk/sub.php b/app/i18n/sk/sub.php index 766d75e8f..a019db2a9 100644 --- a/app/i18n/sk/sub.php +++ b/app/i18n/sk/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimálny počet článkov na uchovanie', 'kind' => array( '_' => 'Typ zdroja kanála', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON so zápisom s bodkou používa bodky na oddelenie objekov a zložené zátvorky pre polia (príklad: <code>data.items[0].title</code>)', 'item' => array( '_' => 'hľadajú sa <strong>položky</strong> noviniek<br /><small>(najdôležitejšie)</small>', - 'help' => 'JSON cesta k polu obsahujúce položky, príklad: <code>newsItems</code>', + 'help' => 'JSON cesta k polu obsahujúce položky, príklad: <code>$</code> or <code>newsItems</code>', // DIRTY ), 'item_author' => 'autor položky', 'item_categories' => 'značky položky', diff --git a/app/i18n/tr/sub.php b/app/i18n/tr/sub.php index ad68d0611..2bdc551a8 100644 --- a/app/i18n/tr/sub.php +++ b/app/i18n/tr/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'En az tutulacak makale sayısı', 'kind' => array( '_' => 'Akış kaynağının tipi', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( diff --git a/app/i18n/zh-cn/sub.php b/app/i18n/zh-cn/sub.php index 2ccd4bcf6..274ef677b 100644 --- a/app/i18n/zh-cn/sub.php +++ b/app/i18n/zh-cn/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => '至少保存的文章数', 'kind' => array( '_' => '订阅源类型', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web 抓取)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON 点表达式(JSON 路径)在对象之间使用点,在数组中使用中括号 (例如 <code>data.items[0].title</code>)', 'item' => array( '_' => '寻找新的 <strong>文章</strong><br /><small>(最重要的参数)</small>', - 'help' => '包含文章数组的 JSON 路径, 例如 <code>newsItems</code>', + 'help' => '包含文章数组的 JSON 路径, 例如 <code>$</code> or <code>newsItems</code>', // DIRTY ), 'item_author' => '文章作者', 'item_categories' => '文章标签', diff --git a/app/i18n/zh-tw/sub.php b/app/i18n/zh-tw/sub.php index 56237d9ed..8cbc0f9ae 100644 --- a/app/i18n/zh-tw/sub.php +++ b/app/i18n/zh-tw/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => '至少保存的文章數', 'kind' => array( '_' => '訂閱源類型', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: <code>//script[@type="application/json"]</code>', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web 抓取)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. <code>data.items[0].title</code>)', // TODO 'item' => array( '_' => '找尋新聞 <strong>項目</strong><br /><small>(最重要的)</small>', - 'help' => 'JSON path to the array containing the items, e.g. <code>newsItems</code>', // TODO + 'help' => 'JSON path to the array containing the items, e.g. <code>$</code> or <code>newsItems</code>', // TODO ), 'item_author' => '項目作者', 'item_categories' => '項目標籤', diff --git a/app/views/helpers/export/opml.phtml b/app/views/helpers/export/opml.phtml index b9563815a..eecfc5d23 100644 --- a/app/views/helpers/export/opml.phtml +++ b/app/views/helpers/export/opml.phtml @@ -33,6 +33,9 @@ function feedsToOutlines(array $feeds, bool $excludeMutedFeeds = false): array { case FreshRSS_Feed::KIND_JSONFEED: $outline['type'] = FreshRSS_Export_Service::TYPE_JSONFEED; break; + case FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION: + $outline['type'] = FreshRSS_Export_Service::TYPE_HTML_XPATH_JSON_DOTNOTATION; + break; } if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH || $feed->kind() === FreshRSS_Feed::KIND_XML_XPATH) { @@ -48,7 +51,7 @@ function feedsToOutlines(array $feeds, bool $excludeMutedFeeds = false): array { $outline['frss:xPathItemThumbnail'] = $xPathSettings['itemThumbnail'] ?? null; $outline['frss:xPathItemCategories'] = $xPathSettings['itemCategories'] ?? null; $outline['frss:xPathItemUid'] = $xPathSettings['itemUid'] ?? null; - } elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION) { + } elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION || $feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { /** @var array<string,string> */ $jsonSettings = $feed->attributeArray('json_dotnotation') ?? []; $outline['frss:jsonItem'] = $jsonSettings['item'] ?? null; @@ -61,6 +64,9 @@ function feedsToOutlines(array $feeds, bool $excludeMutedFeeds = false): array { $outline['frss:jsonItemThumbnail'] = $jsonSettings['itemThumbnail'] ?? null; $outline['frss:jsonItemCategories'] = $jsonSettings['itemCategories'] ?? null; $outline['frss:jsonItemUid'] = $jsonSettings['itemUid'] ?? null; + if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { + $outline['frss:xPathToJson'] = $feed->attributeString('xPathToJson'); + } } if (!empty($feed->filtersAction('read'))) { diff --git a/app/views/helpers/feed/update.phtml b/app/views/helpers/feed/update.phtml index 5a14eff0f..780e9667a 100644 --- a/app/views/helpers/feed/update.phtml +++ b/app/views/helpers/feed/update.phtml @@ -416,7 +416,10 @@ <option value="<?= FreshRSS_Feed::KIND_HTML_XPATH ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH ? 'selected="selected"' : '' ?> data-show="html_xpath"><?= _t('sub.feed.kind.html_xpath') ?></option> <option value="<?= FreshRSS_Feed::KIND_XML_XPATH ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_XML_XPATH ? 'selected="selected"' : '' ?> data-show="html_xpath"><?= _t('sub.feed.kind.xml_xpath') ?></option> <option value="<?= FreshRSS_Feed::KIND_JSONFEED ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_JSONFEED ? 'selected="selected"' : '' ?>><?= _t('sub.feed.kind.jsonfeed') ?></option> - <option value="<?= FreshRSS_Feed::KIND_JSON_DOTNOTATION ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION ? 'selected="selected"' : '' ?> data-show="json_dotnotation"><?= _t('sub.feed.kind.json_dotnotation') ?></option> + <option value="<?= FreshRSS_Feed::KIND_JSON_DOTNOTATION ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION ? 'selected="selected"' : '' ?> + data-show="json_dotnotation"><?= _t('sub.feed.kind.json_dotnotation') ?></option> + <option value="<?= FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION ?>" <?= $this->feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION ? 'selected="selected"' : '' ?> + data-show="json_dotnotation xPathToJsonGroup"><?= _t('sub.feed.kind.html_json') ?></option> </select> </div> </div> @@ -522,7 +525,16 @@ <?php /** @var array<string,string> $jsonSettings */ $jsonSettings = Minz_Helper::htmlspecialchars_utf8($this->feed->attributeArray('json_dotnotation') ?? []); + $xPathToJson = Minz_Helper::htmlspecialchars_utf8($this->feed->attributeString('xPathToJson')); ?> + <div class="form-group" id="xPathToJsonGroup"> + <label class="group-name" for="xPathToJson"><?= _t('sub.feed.kind.html_json.xpath') ?></label> + <div class="group-controls"> + <textarea class="valid-xpath w100" name="xPathToJson" id="xPathToJson" rows="2" cols="64" spellcheck="false" data-leave-validation="<?= $xPathToJson ?>"><?= $xPathToJson ?? '' ?></textarea> + <p class="help"><?= _i('help') ?> <?= _t('sub.feed.kind.html_json.xpath.help') ?></p> + </div> + </div> + <p class="help"><?= _i('help') ?> <?= _t('sub.feed.kind.json_dotnotation.help') ?></p> <div class="form-group"> <label class="group-name" for="jsonItem"><small><?= _t('sub.feed.kind.json_dotnotation.json') ?></small><br /> diff --git a/app/views/subscription/add.phtml b/app/views/subscription/add.phtml index 943fdc52b..be44c4d62 100644 --- a/app/views/subscription/add.phtml +++ b/app/views/subscription/add.phtml @@ -73,6 +73,7 @@ <option value="<?= FreshRSS_Feed::KIND_XML_XPATH ?>" data-show="html_xpath"><?= _t('sub.feed.kind.xml_xpath') ?></option> <option value="<?= FreshRSS_Feed::KIND_JSONFEED ?>"><?= _t('sub.feed.kind.jsonfeed') ?></option> <option value="<?= FreshRSS_Feed::KIND_JSON_DOTNOTATION ?>" data-show="json_dotnotation"><?= _t('sub.feed.kind.json_dotnotation') ?></option> + <option value="<?= FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION ?>" data-show="json_dotnotation xPathToJsonGroup"><?= _t('sub.feed.kind.html_json') ?></option> </select> </div> </div> @@ -167,6 +168,14 @@ </div> </fieldset> <fieldset id="json_dotnotation"> + <div class="form-group" id="xPathToJsonGroup"> + <label class="group-name" for="xPathToJson"><?= _t('sub.feed.kind.html_json.xpath') ?></label> + <div class="group-controls"> + <textarea class="valid-xpath" name="xPathToJson" id="xPathToJson" rows="2" cols="64" spellcheck="false"></textarea> + <p class="help"><?= _i('help') ?> <?= _t('sub.feed.kind.html_json.xpath.help') ?></p> + </div> + </div> + <p class="help"><?= _i('help') ?> <?= _t('sub.feed.kind.json_dotnotation.help') ?></p> <div class="form-group"> <label class="group-name" for="jsonFeedTitle"><small><?= _t('sub.feed.kind.json_dotnotation.json') ?></small><br /> diff --git a/docs/en/developers/OPML.md b/docs/en/developers/OPML.md index f1d5880e8..811350ec7 100644 --- a/docs/en/developers/OPML.md +++ b/docs/en/developers/OPML.md @@ -67,6 +67,11 @@ The following attributes are using similar naming conventions than [RSS-Bridge]( * `<outline type="JSONFeed" ...`: Uses `JSON+DotNotation` behind the scenes to parse a [JSON Feed](https://www.jsonfeed.org/). +### HTML+XPath+JSON + +* `<outline type="HTML+XPath+JSON+DotNotation" frss:xPathToJson="..." ...`: Same as `JSON+DotNotation` but first extracting the JSON string from an HTML document thanks to an XPath expression. + * Example: `//script[@type='application/json']` + ### cURL A number of [cURL options](https://curl.se/libcurl/c/curl_easy_setopt.html) are supported: diff --git a/p/scripts/feed.js b/p/scripts/feed.js index 29af2a3ea..7516f9e51 100644 --- a/p/scripts/feed.js +++ b/p/scripts/feed.js @@ -66,9 +66,6 @@ function init_popup_preview_selector() { }); } -/** - * Allow a <select class="select-show"> to hide/show elements defined by <option data-show="elem-id"></option> - */ function init_disable_elements_on_update(parent) { const inputs = parent.querySelectorAll('input[data-disable-update]'); for (const input of inputs) { @@ -90,8 +87,11 @@ function init_select_show(parent) { const options = select.querySelectorAll('option[data-show]'); const shows = {}; // To allow multiple options to show the same element for (const option of options) { - if (!shows[option.dataset.show]) { - shows[option.dataset.show] = option.selected; + const targets = option.dataset.show.split(' '); // Allow multiple targets + for (const target of targets) { + if (!shows[target]) { + shows[target] = option.selected; + } } } |
