From ccb132523a4ee740d5b576574e9f44668021fbe6 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 13 Oct 2024 15:28:45 +0200 Subject: New feed mode: HTML + XPath + JSON dot notation (JSON in HTML) (#6888) * New feed mode: HTML + XPath + JSON dot notation (JSON in HTML) Same as `JSON+DotNotation` but first extracting the JSON string from an HTML document thanks to an XPath expression. Example: `//script[@type='application/json']` fix https://github.com/FreshRSS/FreshRSS/discussions/6876 * JavaScript UI to show/hide new field * Casing xPathToJson * Slight renaming --- app/Controllers/feedController.php | 10 ++++++++- app/Controllers/subscriptionController.php | 5 ++++- app/Models/Feed.php | 35 ++++++++++++++++++++++++++---- app/Services/ExportService.php | 1 + app/Services/ImportService.php | 4 ++++ app/i18n/cs/sub.php | 9 +++++++- app/i18n/de/sub.php | 9 +++++++- app/i18n/el/sub.php | 9 +++++++- app/i18n/en-us/sub.php | 9 +++++++- app/i18n/en/sub.php | 9 +++++++- app/i18n/es/sub.php | 9 +++++++- app/i18n/fa/sub.php | 9 +++++++- app/i18n/fr/sub.php | 9 +++++++- app/i18n/he/sub.php | 9 +++++++- app/i18n/hu/sub.php | 9 +++++++- app/i18n/id/sub.php | 9 +++++++- app/i18n/it/sub.php | 9 +++++++- app/i18n/ja/sub.php | 9 +++++++- app/i18n/ko/sub.php | 7 ++++++ app/i18n/lv/sub.php | 9 +++++++- app/i18n/nl/sub.php | 7 ++++++ app/i18n/oc/sub.php | 9 +++++++- app/i18n/pl/sub.php | 9 +++++++- app/i18n/pt-br/sub.php | 9 +++++++- app/i18n/ru/sub.php | 9 +++++++- app/i18n/sk/sub.php | 9 +++++++- app/i18n/tr/sub.php | 7 ++++++ app/i18n/zh-cn/sub.php | 9 +++++++- app/i18n/zh-tw/sub.php | 9 +++++++- app/views/helpers/export/opml.phtml | 8 ++++++- app/views/helpers/feed/update.phtml | 14 +++++++++++- app/views/subscription/add.phtml | 9 ++++++++ docs/en/developers/OPML.md | 5 +++++ p/scripts/feed.js | 10 ++++----- 34 files changed, 277 insertions(+), 34 deletions(-) diff --git a/app/Controllers/feedController.php b/app/Controllers/feedController.php index 105099966..2b757177c 100755 --- a/app/Controllers/feedController.php +++ b/app/Controllers/feedController.php @@ -260,7 +260,7 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { if (!empty($xPathSettings)) { $attributes['xpath'] = $xPathSettings; } - } elseif ($feed_kind === FreshRSS_Feed::KIND_JSON_DOTNOTATION) { + } elseif ($feed_kind === FreshRSS_Feed::KIND_JSON_DOTNOTATION || $feed_kind === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { $jsonSettings = []; if (Minz_Request::paramString('jsonFeedTitle') !== '') { $jsonSettings['feedTitle'] = Minz_Request::paramString('jsonFeedTitle', true); @@ -298,6 +298,9 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { if (!empty($jsonSettings)) { $attributes['json_dotnotation'] = $jsonSettings; } + if (Minz_Request::paramString('xPathToJson', plaintext: true) !== '') { + $attributes['xPathToJson'] = Minz_Request::paramString('xPathToJson', plaintext: true); + } } try { @@ -509,6 +512,11 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { if ($simplePie === null) { throw new FreshRSS_Feed_Exception('JSON Feed parsing failed for [' . $feed->url(false) . ']'); } + } elseif ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { + $simplePie = $feed->loadJson(); + if ($simplePie === null) { + throw new FreshRSS_Feed_Exception('HTML+XPath+JSON parsing failed for [' . $feed->url(false) . ']'); + } } else { $simplePie = $feed->load(false, $feedIsNew); } diff --git a/app/Controllers/subscriptionController.php b/app/Controllers/subscriptionController.php index e2e3bbf11..683f403bc 100644 --- a/app/Controllers/subscriptionController.php +++ b/app/Controllers/subscriptionController.php @@ -244,7 +244,7 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController { $xPathSettings['itemUid'] = Minz_Request::paramString('xPathItemUid', true); if (!empty($xPathSettings)) $feed->_attribute('xpath', $xPathSettings); - } elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION) { + } elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION || $feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { $jsonSettings = []; if (Minz_Request::paramString('jsonFeedTitle') !== '') { $jsonSettings['feedTitle'] = Minz_Request::paramString('jsonFeedTitle', true); @@ -282,6 +282,9 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController { if (!empty($jsonSettings)) { $feed->_attribute('json_dotnotation', $jsonSettings); } + if (Minz_Request::paramString('xPathToJson', plaintext: true) !== '') { + $feed->_attribute('xPathToJson', Minz_Request::paramString('xPathToJson', plaintext: true)); + } } $feed->_attribute('path_entries_filter', Minz_Request::paramString('path_entries_filter', true)); diff --git a/app/Models/Feed.php b/app/Models/Feed.php index b9afa9e83..ad84c35a1 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -32,6 +32,8 @@ class FreshRSS_Feed extends Minz_Model { public const KIND_JSONFEED = 25; public const KIND_JSON_DOTNOTATION = 30; + /** JSON embedded in HTML */ + public const KIND_HTML_XPATH_JSON_DOTNOTATION = 35; public const PRIORITY_IMPORTANT = 20; public const PRIORITY_MAIN_STREAM = 10; @@ -639,6 +641,24 @@ class FreshRSS_Feed extends Minz_Model { ]; } + private function extractJsonFromHtml(string $html): ?string { + $xPathToJson = $this->attributeString('xPathToJson') ?? ''; + if ($xPathToJson === '') { + return null; + } + + $doc = new DOMDocument(); + $doc->recover = true; + $doc->strictErrorChecking = false; + if (!$doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) { + return null; + } + + $xpath = new DOMXPath($doc); + $json = @$xpath->evaluate('normalize-space(' . $xPathToJson . ')'); + return is_string($json) ? $json : null; + } + public function loadJson(): ?\SimplePie\SimplePie { if ($this->url == '') { return null; @@ -648,14 +668,21 @@ class FreshRSS_Feed extends Minz_Model { return null; } - $httpAccept = 'json'; - $json = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions()); - if (strlen($json) <= 0) { + $httpAccept = $this->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION ? 'html' : 'json'; + $content = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions()); + if (strlen($content) <= 0) { return null; } + if ($this->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { + $content = $this->extractJsonFromHtml($content); + if ($content == null) { + return null; + } + } + //check if the content is actual JSON - $jf = json_decode($json, true); + $jf = json_decode($content, true); if (json_last_error() !== JSON_ERROR_NONE || !is_array($jf)) { return null; } diff --git a/app/Services/ExportService.php b/app/Services/ExportService.php index 37bcdc6e3..c532308d7 100644 --- a/app/Services/ExportService.php +++ b/app/Services/ExportService.php @@ -23,6 +23,7 @@ class FreshRSS_Export_Service { final public const TYPE_JSON_DOTPATH = 'JSON+DotPath'; // Legacy 1.24.0-dev final public const TYPE_JSON_DOTNOTATION = 'JSON+DotNotation'; final public const TYPE_JSONFEED = 'JSONFeed'; + final public const TYPE_HTML_XPATH_JSON_DOTNOTATION = 'HTML+XPath+JSON+DotNotation'; /** * Initialize the service for the given user. diff --git a/app/Services/ImportService.php b/app/Services/ImportService.php index 1871c37e0..298c0ec21 100644 --- a/app/Services/ImportService.php +++ b/app/Services/ImportService.php @@ -168,6 +168,9 @@ class FreshRSS_Import_Service { case strtolower(FreshRSS_Export_Service::TYPE_JSONFEED): $feed->_kind(FreshRSS_Feed::KIND_JSONFEED); break; + case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH_JSON_DOTNOTATION): + $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION); + break; default: $feed->_kind(FreshRSS_Feed::KIND_RSS); break; @@ -257,6 +260,7 @@ class FreshRSS_Import_Service { if (!empty($jsonSettings)) { $feed->_attribute('json_dotnotation', $jsonSettings); } + $feed->_attribute('xPathToJson', $feed_elt['frss:xPathToJson'] ?? null); $curl_params = []; if (isset($feed_elt['frss:CURLOPT_COOKIE'])) { diff --git a/app/i18n/cs/sub.php b/app/i18n/cs/sub.php index cb9bd02c6..c60f47f95 100644 --- a/app/i18n/cs/sub.php +++ b/app/i18n/cs/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimální počet článků pro ponechání', 'kind' => array( '_' => 'Typ zdroje feedu', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON s tečkovým zápisem používá tečky mezi objekty a závorky pro pole. (e.g. data.items[0].title)', 'item' => array( '_' => 'vyhledávání nových položek
(nejdůležitější)', - 'help' => 'JSON cesta k poli obsahujícímu položky, např.: newsItems', + 'help' => 'JSON cesta k poli obsahujícímu položky, např.: $ or newsItems', // DIRTY ), 'item_author' => 'autor položky', 'item_categories' => 'štítky položky', diff --git a/app/i18n/de/sub.php b/app/i18n/de/sub.php index e9be33cf6..b8fba0a7e 100644 --- a/app/i18n/de/sub.php +++ b/app/i18n/de/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimale Anzahl an Artikeln, die behalten wird', 'kind' => array( '_' => 'Art der Feed-Quelle', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Webseite scannen)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON punktnotiert nutzt Punkte zwischen den Objekten und eckige Klammern für Arrays (e.g. data.items[0].title)', 'item' => array( '_' => 'News Items finden
(sehr wichtig)', - 'help' => 'JSON-Pfad zum Array, das die Items enthält, z.B. newsItems', + 'help' => 'JSON-Pfad zum Array, das die Items enthält, z.B. $ or newsItems', // DIRTY ), 'item_author' => 'Item Autor', 'item_categories' => 'Item Hashtags', diff --git a/app/i18n/el/sub.php b/app/i18n/el/sub.php index 80f420811..47e442b13 100644 --- a/app/i18n/el/sub.php +++ b/app/i18n/el/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimum number of articles to keep', // TODO 'kind' => array( '_' => 'Type of feed source', // TODO + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // TODO 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. data.items[0].title)', // TODO 'item' => array( '_' => 'finding news items
(most important)', // TODO - 'help' => 'JSON path to the array containing the items, e.g. newsItems', // TODO + 'help' => 'JSON path to the array containing the items, e.g. $ or newsItems', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/en-us/sub.php b/app/i18n/en-us/sub.php index 7f4469ff1..93e7bf21b 100644 --- a/app/i18n/en-us/sub.php +++ b/app/i18n/en-us/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimum number of articles to keep', // IGNORE 'kind' => array( '_' => 'Type of feed source', // IGNORE + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // IGNORE + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // IGNORE + 'help' => 'Example: //script[@type="application/json"]', // IGNORE + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. data.items[0].title)', // IGNORE 'item' => array( '_' => 'finding news items
(most important)', // IGNORE - 'help' => 'JSON path to the array containing the items, e.g. newsItems', // IGNORE + 'help' => 'JSON path to the array containing the items, e.g. $ or newsItems', // IGNORE ), 'item_author' => 'item author', // IGNORE 'item_categories' => 'item tags', // IGNORE diff --git a/app/i18n/en/sub.php b/app/i18n/en/sub.php index f514df5f3..ca4da3409 100644 --- a/app/i18n/en/sub.php +++ b/app/i18n/en/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimum number of articles to keep', 'kind' => array( '_' => 'Type of feed source', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. data.items[0].title)', 'item' => array( '_' => 'finding news items
(most important)', - 'help' => 'JSON path to the array containing the items, e.g. newsItems', + 'help' => 'JSON path to the array containing the items, e.g. $ or newsItems', ), 'item_author' => 'item author', 'item_categories' => 'item tags', diff --git a/app/i18n/es/sub.php b/app/i18n/es/sub.php index cfb2bf550..268a520cc 100644 --- a/app/i18n/es/sub.php +++ b/app/i18n/es/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Número mínimo de artículos a conservar', 'kind' => array( '_' => 'Tipo de origen de la fuente', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'Un punto JSON anotado utiliza puntos entre objetos y corchetes para matrices (ejemplo: data.items[0].title)', 'item' => array( '_' => 'buscando nuevos items
(más importante)', - 'help' => 'Ruta JSON a la matriz que contiene los elementos, ejemplo: newsItems', + 'help' => 'Ruta JSON a la matriz que contiene los elementos, ejemplo: $ o newsItems', ), 'item_author' => 'autor del item', 'item_categories' => 'etiquetas del item', diff --git a/app/i18n/fa/sub.php b/app/i18n/fa/sub.php index 9aa388e27..8ad8f3090 100644 --- a/app/i18n/fa/sub.php +++ b/app/i18n/fa/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => ' حداقل تعداد مقالات برای نگهداری', 'kind' => array( '_' => ' نوع منبع خوراک', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => ' HTML + XPath (خراش دادن وب)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. data.items[0].title)', // TODO 'item' => array( '_' => 'finding news items
(most important)', // TODO - 'help' => 'JSON path to the array containing the items, e.g. newsItems', // TODO + 'help' => 'JSON path to the array containing the items, e.g. $ or newsItems', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/fr/sub.php b/app/i18n/fr/sub.php index 6dd205356..2c6ec36cb 100644 --- a/app/i18n/fr/sub.php +++ b/app/i18n/fr/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Nombre minimum d’articles à conserver', 'kind' => array( '_' => 'Type de source de flux', + 'html_json' => array( + '_' => 'HTML + XPath + JSON notation point (JSON dans HTML)', + 'xpath' => array( + '_' => 'XPath pour JSON dans HTML', + 'help' => 'Exemple : //script[@type="application/json"]', + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Moissonnage du Web)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'La notation point pour JSON utilise le point comme séparateur objet, et des crochets pour un tableau : (ex : data.items[0].title)', 'item' => array( '_' => 'trouver les articles
(c’est le plus important)', - 'help' => 'Chemin vers le tableau contenant les articles, par exemple newsItems', + 'help' => 'Chemin vers le tableau contenant les articles, par exemple $ ou newsItems', ), 'item_author' => 'auteur de l’article', 'item_categories' => 'catégories (tags) de l’article', diff --git a/app/i18n/he/sub.php b/app/i18n/he/sub.php index bd5f2446d..4accafe33 100644 --- a/app/i18n/he/sub.php +++ b/app/i18n/he/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'מסםר מינימלי של מאמרים לשמור', 'kind' => array( '_' => 'Type of feed source', // TODO + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // TODO 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. data.items[0].title)', // TODO 'item' => array( '_' => 'finding news items
(most important)', // TODO - 'help' => 'JSON path to the array containing the items, e.g. newsItems', // TODO + 'help' => 'JSON path to the array containing the items, e.g. $ or newsItems', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/hu/sub.php b/app/i18n/hu/sub.php index 7c45b2951..7b9a33d98 100644 --- a/app/i18n/hu/sub.php +++ b/app/i18n/hu/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Megtartandó cikkek minimális száma', 'kind' => array( '_' => 'Hírforrás típusa', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON pontjelölés pontokat használ az objektumok között és zárójeleket a tömbökhöz (pl. data.items[0].title)', 'item' => array( '_' => 'hírek keresése elemek
(legfontosabb)', - 'help' => 'JSON útvonal az elemeket tartalmazó tömbhöz, pl. newsItems', + 'help' => 'JSON útvonal az elemeket tartalmazó tömbhöz, pl. $ or newsItems', // DIRTY ), 'item_author' => 'elem szerző', 'item_categories' => 'elem címkék', diff --git a/app/i18n/id/sub.php b/app/i18n/id/sub.php index 6f637c316..d8fde0251 100644 --- a/app/i18n/id/sub.php +++ b/app/i18n/id/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimum number of articles to keep', // TODO 'kind' => array( '_' => 'Type of feed source', // TODO + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // TODO 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. data.items[0].title)', // TODO 'item' => array( '_' => 'finding news items
(most important)', // TODO - 'help' => 'JSON path to the array containing the items, e.g. newsItems', // TODO + 'help' => 'JSON path to the array containing the items, e.g. $ or newsItems', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/it/sub.php b/app/i18n/it/sub.php index ff1c6f36b..e36a144ec 100644 --- a/app/i18n/it/sub.php +++ b/app/i18n/it/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Numero minimo di articoli da mantenere', 'kind' => array( '_' => 'Tipo di sorgente del feed', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. data.items[0].title)', // TODO 'item' => array( '_' => 'ricerca nuovi elementi
(più importante)', - 'help' => 'percorso JSON per l’array contenente gli elementi, es. newsItems', + 'help' => 'percorso JSON per l’array contenente gli elementi, es. $ o newsItems', ), 'item_author' => 'autore elemento', 'item_categories' => 'tag elemento', diff --git a/app/i18n/ja/sub.php b/app/i18n/ja/sub.php index 465637307..00234c0af 100644 --- a/app/i18n/ja/sub.php +++ b/app/i18n/ja/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => '最小数の記事は保持されます', 'kind' => array( '_' => 'フィードソースの種類', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (ウェブスクレイピング)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSONのドット記法は、オブジェクトの間にドットを使用し、配列には括弧を使用します。例: data.items[0].title', 'item' => array( '_' => 'ニュース項目を探す
(最重要)', - 'help' => '項目を含む配列へのJSONパス。 例: newsItems', + 'help' => '項目を含む配列へのJSONパス。 例: $ or newsItems', // DIRTY ), 'item_author' => '項目の著者', 'item_categories' => '項目のタグ', diff --git a/app/i18n/ko/sub.php b/app/i18n/ko/sub.php index 470d61478..e28f22f74 100644 --- a/app/i18n/ko/sub.php +++ b/app/i18n/ko/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => '최소 유지 글 개수', 'kind' => array( '_' => '피드 소스 유형', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (웹 스크래핑)', 'feed_title' => array( diff --git a/app/i18n/lv/sub.php b/app/i18n/lv/sub.php index c5e5b388f..d81da3718 100644 --- a/app/i18n/lv/sub.php +++ b/app/i18n/lv/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimālais saglabājamo izstrādājumu skaits', 'kind' => array( '_' => 'Barotnes avota veids', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Tīmekļa nolasīšana)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. data.items[0].title)', // TODO 'item' => array( '_' => 'finding news items
(most important)', // TODO - 'help' => 'JSON path to the array containing the items, e.g. newsItems', // TODO + 'help' => 'JSON path to the array containing the items, e.g. $ or newsItems', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/nl/sub.php b/app/i18n/nl/sub.php index 38cd24d10..817fa8b10 100644 --- a/app/i18n/nl/sub.php +++ b/app/i18n/nl/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimum aantal artikelen om te houden', 'kind' => array( '_' => 'Feedbron-type', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( diff --git a/app/i18n/oc/sub.php b/app/i18n/oc/sub.php index 4fcc50781..6b8ea93bb 100644 --- a/app/i18n/oc/sub.php +++ b/app/i18n/oc/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Nombre minimum d’articles de servar', 'kind' => array( '_' => 'Tipe de font de flux', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. data.items[0].title)', // TODO 'item' => array( '_' => 'finding news items
(most important)', // TODO - 'help' => 'JSON path to the array containing the items, e.g. newsItems', // TODO + 'help' => 'JSON path to the array containing the items, e.g. $ or newsItems', // TODO ), 'item_author' => 'item author', // TODO 'item_categories' => 'item tags', // TODO diff --git a/app/i18n/pl/sub.php b/app/i18n/pl/sub.php index 13ad11bc3..4f99c2866 100644 --- a/app/i18n/pl/sub.php +++ b/app/i18n/pl/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimalna liczba wiadomości do do przechowywania', 'kind' => array( '_' => 'Rodzaj źródła kanału', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON oddzielający obiekty kropkami i używający nawiasów kwadratowych dla tablic (na przykład data.items[0].title)', 'item' => array( '_' => 'odnajdywanie wiadomości
(najważniejsze)', - 'help' => 'Ścieżka w JSON-ie do tablicy zawierającej wiadomości, na przykład newsItems', + 'help' => 'Ścieżka w JSON-ie do tablicy zawierającej wiadomości, na przykład $ or newsItems', // DIRTY ), 'item_author' => 'autor wiadomości', 'item_categories' => 'tagi wiadomości', diff --git a/app/i18n/pt-br/sub.php b/app/i18n/pt-br/sub.php index 2593b3648..d1c2f08e5 100644 --- a/app/i18n/pt-br/sub.php +++ b/app/i18n/pt-br/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Número mínimo de artigos para manter', 'kind' => array( '_' => 'Tipo de fonte de alimentação do Feed', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'Um JSON na notação de ponto usa pontos entre os objetos e colchetes para arrays (e.g. data.items[0].title)', 'item' => array( '_' => 'encontrando novidades itens
(mais importante)', - 'help' => 'Caminho do JSON para o array contendo os itens, e.g. newsItems', + 'help' => 'Caminho do JSON para o array contendo os itens, e.g. $ or newsItems', // DIRTY ), 'item_author' => 'autor do item', 'item_categories' => 'tags dos itens', diff --git a/app/i18n/ru/sub.php b/app/i18n/ru/sub.php index a9f802738..946f53177 100644 --- a/app/i18n/ru/sub.php +++ b/app/i18n/ru/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Оставлять статей не менее', 'kind' => array( '_' => 'Тип источника ленты', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (парсинг веб-страниц)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON с точечной нотацией использует точки между объектами и квадратные скобки для массивов (например: data.items[0].title)', 'item' => array( '_' => 'Найти новые элементы
(самое важное)', - 'help' => 'JSON-путь к массиву, содержащему элементы, например: newsItems', + 'help' => 'JSON-путь к массиву, содержащему элементы, например: $ or newsItems', // DIRTY ), 'item_author' => 'автор элемента', 'item_categories' => 'теги элемента', diff --git a/app/i18n/sk/sub.php b/app/i18n/sk/sub.php index 766d75e8f..a019db2a9 100644 --- a/app/i18n/sk/sub.php +++ b/app/i18n/sk/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'Minimálny počet článkov na uchovanie', 'kind' => array( '_' => 'Typ zdroja kanála', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON so zápisom s bodkou používa bodky na oddelenie objekov a zložené zátvorky pre polia (príklad: data.items[0].title)', 'item' => array( '_' => 'hľadajú sa položky noviniek
(najdôležitejšie)', - 'help' => 'JSON cesta k polu obsahujúce položky, príklad: newsItems', + 'help' => 'JSON cesta k polu obsahujúce položky, príklad: $ or newsItems', // DIRTY ), 'item_author' => 'autor položky', 'item_categories' => 'značky položky', diff --git a/app/i18n/tr/sub.php b/app/i18n/tr/sub.php index ad68d0611..2bdc551a8 100644 --- a/app/i18n/tr/sub.php +++ b/app/i18n/tr/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => 'En az tutulacak makale sayısı', 'kind' => array( '_' => 'Akış kaynağının tipi', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web scraping)', // IGNORE 'feed_title' => array( diff --git a/app/i18n/zh-cn/sub.php b/app/i18n/zh-cn/sub.php index 2ccd4bcf6..274ef677b 100644 --- a/app/i18n/zh-cn/sub.php +++ b/app/i18n/zh-cn/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => '至少保存的文章数', 'kind' => array( '_' => '订阅源类型', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web 抓取)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'JSON 点表达式(JSON 路径)在对象之间使用点,在数组中使用中括号 (例如 data.items[0].title)', 'item' => array( '_' => '寻找新的 文章
(最重要的参数)', - 'help' => '包含文章数组的 JSON 路径, 例如 newsItems', + 'help' => '包含文章数组的 JSON 路径, 例如 $ or newsItems', // DIRTY ), 'item_author' => '文章作者', 'item_categories' => '文章标签', diff --git a/app/i18n/zh-tw/sub.php b/app/i18n/zh-tw/sub.php index 56237d9ed..8cbc0f9ae 100644 --- a/app/i18n/zh-tw/sub.php +++ b/app/i18n/zh-tw/sub.php @@ -83,6 +83,13 @@ return array( 'keep_min' => '至少保存的文章數', 'kind' => array( '_' => '訂閱源類型', + 'html_json' => array( + '_' => 'HTML + XPath + JSON dot notation (JSON in HTML)', // TODO + 'xpath' => array( + '_' => 'XPath for JSON in HTML', // TODO + 'help' => 'Example: //script[@type="application/json"]', // TODO + ), + ), 'html_xpath' => array( '_' => 'HTML + XPath (Web 抓取)', 'feed_title' => array( @@ -139,7 +146,7 @@ return array( 'help' => 'A JSON dot notated uses dots between objects and brackets for arrays (e.g. data.items[0].title)', // TODO 'item' => array( '_' => '找尋新聞 項目
(最重要的)', - 'help' => 'JSON path to the array containing the items, e.g. newsItems', // TODO + 'help' => 'JSON path to the array containing the items, e.g. $ or newsItems', // TODO ), 'item_author' => '項目作者', 'item_categories' => '項目標籤', diff --git a/app/views/helpers/export/opml.phtml b/app/views/helpers/export/opml.phtml index b9563815a..eecfc5d23 100644 --- a/app/views/helpers/export/opml.phtml +++ b/app/views/helpers/export/opml.phtml @@ -33,6 +33,9 @@ function feedsToOutlines(array $feeds, bool $excludeMutedFeeds = false): array { case FreshRSS_Feed::KIND_JSONFEED: $outline['type'] = FreshRSS_Export_Service::TYPE_JSONFEED; break; + case FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION: + $outline['type'] = FreshRSS_Export_Service::TYPE_HTML_XPATH_JSON_DOTNOTATION; + break; } if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH || $feed->kind() === FreshRSS_Feed::KIND_XML_XPATH) { @@ -48,7 +51,7 @@ function feedsToOutlines(array $feeds, bool $excludeMutedFeeds = false): array { $outline['frss:xPathItemThumbnail'] = $xPathSettings['itemThumbnail'] ?? null; $outline['frss:xPathItemCategories'] = $xPathSettings['itemCategories'] ?? null; $outline['frss:xPathItemUid'] = $xPathSettings['itemUid'] ?? null; - } elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION) { + } elseif ($feed->kind() === FreshRSS_Feed::KIND_JSON_DOTNOTATION || $feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { /** @var array */ $jsonSettings = $feed->attributeArray('json_dotnotation') ?? []; $outline['frss:jsonItem'] = $jsonSettings['item'] ?? null; @@ -61,6 +64,9 @@ function feedsToOutlines(array $feeds, bool $excludeMutedFeeds = false): array { $outline['frss:jsonItemThumbnail'] = $jsonSettings['itemThumbnail'] ?? null; $outline['frss:jsonItemCategories'] = $jsonSettings['itemCategories'] ?? null; $outline['frss:jsonItemUid'] = $jsonSettings['itemUid'] ?? null; + if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION) { + $outline['frss:xPathToJson'] = $feed->attributeString('xPathToJson'); + } } if (!empty($feed->filtersAction('read'))) { diff --git a/app/views/helpers/feed/update.phtml b/app/views/helpers/feed/update.phtml index 5a14eff0f..780e9667a 100644 --- a/app/views/helpers/feed/update.phtml +++ b/app/views/helpers/feed/update.phtml @@ -416,7 +416,10 @@ - + + @@ -522,7 +525,16 @@ $jsonSettings */ $jsonSettings = Minz_Helper::htmlspecialchars_utf8($this->feed->attributeArray('json_dotnotation') ?? []); + $xPathToJson = Minz_Helper::htmlspecialchars_utf8($this->feed->attributeString('xPathToJson')); ?> +
+ +
+ +

+
+
+

@@ -167,6 +168,14 @@
+
+ +
+ +

+
+
+