From daaa391e33c5d92e3dd91bb0b81ac420abed7097 Mon Sep 17 00:00:00 2001 From: berumuron Date: Wed, 18 Jan 2023 10:12:21 +0100 Subject: tec: Update the lib_opml (#4403) * fix: Fix undefined GLOB_BRACE on Alpine The manual states that: > Note: The GLOB_BRACE flag is not available on some non GNU systems, > like Solaris or Alpine Linux. This generated an error on Alpine. Reference: https://www.php.net/manual/function.glob.php * fix: List details of feeds for OPML exportation The details are necessary to export the XPath information, the CSS full content path and read actions filters. * Update LibOpml to 0.4.0 * Refactor OPML importation to be more robust First, it fixes two regressions introduced by the update of lib_opml: - title attribute is used when text attribute is missing; - the OPML category attribute is used as a fallback for feeds categories. In a related way, if also fixes a problem when a feed had both a parent category outline and a category attribute. Before, it only considered the attribute as its category, but now it considers the parent outline. Then, it counts category limit correctly by not increasing `$nb_categories` if the category already exists. * Exclude lib_opml from the CodeSniffer * Fix variable names when logging some errors * Fix catch of LibOpml Exception * Make sure to declare the category * Exclude lib_opml from PHPStan analyze * Disable markdownlint for lib_opml * Fix typos * Use auto-loading and allow updates via Composer * Fix broken links to lib_opml * Bring back the ability to import the OPML frss:opmlUrl attribute * Refactor the logs of OPML errors * Update lib_opml to the version 0.5.0 Co-authored-by: Alexandre Alapetite --- app/Services/ImportService.php | 440 ++++++++++++++++++++++++++--------------- 1 file changed, 280 insertions(+), 160 deletions(-) (limited to 'app/Services/ImportService.php') diff --git a/app/Services/ImportService.php b/app/Services/ImportService.php index 28286a753..68aa6f741 100644 --- a/app/Services/ImportService.php +++ b/app/Services/ImportService.php @@ -19,8 +19,6 @@ class FreshRSS_Import_Service { * @param string $username */ public function __construct($username = null) { - require_once(LIB_PATH . '/lib_opml.php'); - $this->catDAO = FreshRSS_Factory::createCategoryDao($username); $this->feedDAO = FreshRSS_Factory::createFeedDao($username); } @@ -34,153 +32,191 @@ class FreshRSS_Import_Service { * This method parses and imports an OPML file. * * @param string $opml_file the OPML file content. - * @param FreshRSS_Category|null $parent_cat the name of the parent category. - * @param boolean $flatten true to disable categories, false otherwise. - * @return array|false an array of categories containing some feeds, or false if an error occurred. + * @param FreshRSS_Category|null $forced_category force the feeds to be associated to this category. + * @param boolean $dry_run true to not create categories and feeds in database. */ - public function importOpml(string $opml_file, $parent_cat = null, $flatten = false, $dryRun = false) { + public function importOpml(string $opml_file, $forced_category = null, $dry_run = false) { $this->lastStatus = true; $opml_array = array(); try { - $opml_array = libopml_parse_string($opml_file, false); - } catch (LibOPML_Exception $e) { - if (FreshRSS_Context::$isCli) { - fwrite(STDERR, 'FreshRSS error during OPML parsing: ' . $e->getMessage() . "\n"); - } else { - Minz_Log::warning($e->getMessage()); - } + $libopml = new \marienfressinaud\LibOpml\LibOpml(false); + $opml_array = $libopml->parseString($opml_file); + } catch (\marienfressinaud\LibOpml\Exception $e) { + self::log($e->getMessage()); $this->lastStatus = false; - return false; + return; } - return $this->addOpmlElements($opml_array['body'], $parent_cat, $flatten, $dryRun); - } + $this->catDAO->checkDefault(); + $default_category = $this->catDAO->getDefault(); + if (!$default_category) { + self::log('Cannot get the default category'); + $this->lastStatus = false; + return; + } - /** - * This method imports an OPML file based on its body. - * - * @param array $opml_elements an OPML element (body or outline). - * @param FreshRSS_Category|null $parent_cat the name of the parent category. - * @param boolean $flatten true to disable categories, false otherwise. - * @return array an array of categories containing some feeds - */ - private function addOpmlElements($opml_elements, $parent_cat = null, $flatten = false, $dryRun = false) { + // Get the categories by names so we can use this array to retrieve + // existing categories later. + $categories = $this->catDAO->listCategories(false); + $categories_by_names = []; + foreach ($categories as $category) { + $categories_by_names[$category->name()] = $category; + } + + // Get current numbers of categories and feeds, and the limits to + // verify the user can import its categories/feeds. + $nb_categories = count($categories); $nb_feeds = count($this->feedDAO->listFeeds()); - $nb_cats = count($this->catDAO->listCategories(false)); $limits = FreshRSS_Context::$system_conf->limits; - //Sort with categories first - usort($opml_elements, static function ($a, $b) { - return strcmp( - (isset($a['xmlUrl']) ? 'Z' : 'A') . (isset($a['text']) ? $a['text'] : ''), - (isset($b['xmlUrl']) ? 'Z' : 'A') . (isset($b['text']) ? $b['text'] : '')); - }); - - $categories = []; - - foreach ($opml_elements as $elt) { - if (isset($elt['xmlUrl'])) { - // If xmlUrl exists, it means it is a feed - if (FreshRSS_Context::$isCli && $nb_feeds >= $limits['max_feeds']) { - Minz_Log::warning(_t('feedback.sub.feed.over_max', - $limits['max_feeds'])); - $this->lastStatus = false; - continue; - } + // Process the OPML outlines to get a list of categories and a list of + // feeds elements indexed by their categories names. + list ( + $categories_elements, + $categories_to_feeds, + ) = $this->loadFromOutlines($opml_array['body'], ''); - if ($this->addFeedOpml($elt, $parent_cat, $dryRun)) { - $nb_feeds++; + foreach ($categories_to_feeds as $category_name => $feeds_elements) { + $category_element = $categories_elements[$category_name] ?? null; + + $category = null; + if ($forced_category) { + // If the category is forced, ignore the actual category name + $category = $forced_category; + } elseif (isset($categories_by_names[$category_name])) { + // If the category already exists, get it from $categories_by_names + $category = $categories_by_names[$category_name]; + } elseif ($category_element) { + // Otherwise, create the category (if possible) + $limit_reached = $nb_categories >= $limits['max_categories']; + $can_create_category = FreshRSS_Context::$isCli || !$limit_reached; + + if ($can_create_category) { + $category = $this->createCategory($category_element, $dry_run); + if ($category) { + $categories_by_names[$category->name()] = $category; + $nb_categories++; + } } else { - $this->lastStatus = false; + Minz_Log::warning( + _t('feedback.sub.category.over_max', $limits['max_categories']) + ); } - } elseif (!empty($elt['text'])) { - // No xmlUrl? It should be a category! - $limit_reached = !$flatten && ($nb_cats >= $limits['max_categories']); - if (!FreshRSS_Context::$isCli && $limit_reached) { - Minz_Log::warning(_t('feedback.sub.category.over_max', - $limits['max_categories'])); + } + + if (!$category) { + // Category can be null if the feeds weren't in a category + // outline, or if we weren't able to create the category. + $category = $default_category; + } + + // Then, create the feeds one by one and attach them to the + // category we just got. + foreach ($feeds_elements as $feed_element) { + $limit_reached = $nb_feeds >= $limits['max_feeds']; + $can_create_feed = FreshRSS_Context::$isCli || !$limit_reached; + if (!$can_create_feed) { + Minz_Log::warning( + _t('feedback.sub.feed.over_max', $limits['max_feeds']) + ); $this->lastStatus = false; - $flatten = true; + break; } - $category = $this->addCategoryOpml($elt, $parent_cat, $flatten, $dryRun); - - if ($category) { - $nb_cats++; - $categories[] = $category; + if ($this->createFeed($feed_element, $category, $dry_run)) { + // TODO what if the feed already exists in the database? + $nb_feeds++; + } else { + $this->lastStatus = false; } } } - return $categories; + return; } /** - * This method imports an OPML feed element. + * Create a feed from a feed element (i.e. OPML outline). * - * @param array $feed_elt an OPML element (must be a feed element). - * @param FreshRSS_Category|null $parent_cat the name of the parent category. - * @return FreshRSS_Feed|null a feed. + * @param array $feed_elt An OPML element (must be a feed element). + * @param FreshRSS_Category $category The category to associate to the feed. + * @param boolean $dry_run true to not create the feed in database. + * + * @return FreshRSS_Feed|null The created feed, or null if it failed. */ - private function addFeedOpml($feed_elt, $parent_cat, $dryRun = false) { - if (empty($feed_elt['xmlUrl'])) { - return null; - } - if ($parent_cat == null) { - // This feed has no parent category so we get the default one - $this->catDAO->checkDefault(); - $parent_cat = $this->catDAO->getDefault(); - if ($parent_cat == null) { - $this->lastStatus = false; - return null; - } - } - - // We get different useful information + private function createFeed($feed_elt, $category, $dry_run) { $url = Minz_Helper::htmlspecialchars_utf8($feed_elt['xmlUrl']); - $name = Minz_Helper::htmlspecialchars_utf8($feed_elt['text'] ?? ''); + $name = $feed_elt['text'] ?? $feed_elt['title'] ?? ''; + $name = Minz_Helper::htmlspecialchars_utf8($name); $website = Minz_Helper::htmlspecialchars_utf8($feed_elt['htmlUrl'] ?? ''); $description = Minz_Helper::htmlspecialchars_utf8($feed_elt['description'] ?? ''); try { // Create a Feed object and add it in DB $feed = new FreshRSS_Feed($url); - $feed->_categoryId($parent_cat->id()); - $parent_cat->addFeed($feed); + $feed->_categoryId($category->id()); + $category->addFeed($feed); $feed->_name($name); $feed->_website($website); $feed->_description($description); switch ($feed_elt['type'] ?? '') { - case FreshRSS_Export_Service::TYPE_HTML_XPATH: + case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH): $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH); break; - case FreshRSS_Export_Service::TYPE_RSS_ATOM: + case strtolower(FreshRSS_Export_Service::TYPE_RSS_ATOM): default: $feed->_kind(FreshRSS_Feed::KIND_RSS); break; } + if (isset($feed_elt['frss:cssFullContent'])) { + $feed->_pathEntries(Minz_Helper::htmlspecialchars_utf8($feed_elt['frss:cssFullContent'])); + } + + if (isset($feed_elt['frss:cssFullContentFilter'])) { + $feed->_attributes('path_entries_filter', $feed_elt['frss:cssFullContentFilter']); + } + + if (isset($feed_elt['frss:filtersActionRead'])) { + $feed->_filtersAction( + 'read', + preg_split('/[\n\r]+/', $feed_elt['frss:filtersActionRead']) + ); + } + $xPathSettings = []; - foreach ($feed_elt as $key => $value) { - if (is_array($value) && !empty($value['value']) && ($value['namespace'] ?? '') === FreshRSS_Export_Service::FRSS_NAMESPACE) { - switch ($key) { - case 'cssFullContent': $feed->_pathEntries(Minz_Helper::htmlspecialchars_utf8($value['value'])); break; - case 'cssFullContentFilter': $feed->_attributes('path_entries_filter', $value['value']); break; - case 'filtersActionRead': $feed->_filtersAction('read', preg_split('/[\n\r]+/', $value['value'])); break; - case 'xPathItem': $xPathSettings['item'] = $value['value']; break; - case 'xPathItemTitle': $xPathSettings['itemTitle'] = $value['value']; break; - case 'xPathItemContent': $xPathSettings['itemContent'] = $value['value']; break; - case 'xPathItemUri': $xPathSettings['itemUri'] = $value['value']; break; - case 'xPathItemAuthor': $xPathSettings['itemAuthor'] = $value['value']; break; - case 'xPathItemTimestamp': $xPathSettings['itemTimestamp'] = $value['value']; break; - case 'xPathItemTimeFormat': $xPathSettings['itemTimeFormat'] = $value['value']; break; - case 'xPathItemThumbnail': $xPathSettings['itemThumbnail'] = $value['value']; break; - case 'xPathItemCategories': $xPathSettings['itemCategories'] = $value['value']; break; - case 'xPathItemUid': $xPathSettings['itemUid'] = $value['value']; break; - } - } + if (isset($feed_elt['frss:xPathItem'])) { + $xPathSettings['item'] = $feed_elt['frss:xPathItem']; } + if (isset($feed_elt['frss:xPathItemTitle'])) { + $xPathSettings['itemTitle'] = $feed_elt['frss:xPathItemTitle']; + } + if (isset($feed_elt['frss:xPathItemContent'])) { + $xPathSettings['itemContent'] = $feed_elt['frss:xPathItemContent']; + } + if (isset($feed_elt['frss:xPathItemUri'])) { + $xPathSettings['itemUri'] = $feed_elt['frss:xPathItemUri']; + } + if (isset($feed_elt['frss:xPathItemAuthor'])) { + $xPathSettings['itemAuthor'] = $feed_elt['frss:xPathItemAuthor']; + } + if (isset($feed_elt['frss:xPathItemTimestamp'])) { + $xPathSettings['itemTimestamp'] = $feed_elt['frss:xPathItemTimestamp']; + } + if (isset($feed_elt['frss:xPathItemTimeFormat'])) { + $xPathSettings['itemTimeFormat'] = $feed_elt['frss:xPathItemTimeFormat']; + } + if (isset($feed_elt['frss:xPathItemThumbnail'])) { + $xPathSettings['itemThumbnail'] = $feed_elt['frss:xPathItemThumbnail']; + } + if (isset($feed_elt['frss:xPathItemCategories'])) { + $xPathSettings['itemCategories'] = $feed_elt['frss:xPathItemCategories']; + } + if (isset($feed_elt['frss:xPathItemUid'])) { + $xPathSettings['itemUid'] = $feed_elt['frss:xPathItemUid']; + } + if (!empty($xPathSettings)) { $feed->_attributes('xpath', $xPathSettings); } @@ -188,9 +224,11 @@ class FreshRSS_Import_Service { // Call the extension hook /** @var FreshRSS_Feed|null */ $feed = Minz_ExtensionManager::callHook('feed_before_insert', $feed); - if ($dryRun) { + + if ($dry_run) { return $feed; } + if ($feed != null) { // addFeedObject checks if feed is already in DB $id = $this->feedDAO->addFeedObject($feed); @@ -202,81 +240,163 @@ class FreshRSS_Import_Service { } } } catch (FreshRSS_Feed_Exception $e) { - if (FreshRSS_Context::$isCli) { - fwrite(STDERR, 'FreshRSS error during OPML feed import: ' . $e->getMessage() . "\n"); - } else { - Minz_Log::warning($e->getMessage()); - } + self::log($e->getMessage()); $this->lastStatus = false; } - if (FreshRSS_Context::$isCli) { - fwrite(STDERR, 'FreshRSS error during OPML feed import from URL: ' . - SimplePie_Misc::url_remove_credentials($url) . ' in category ' . $parent_cat->id() . "\n"); - } else { - Minz_Log::warning('Error during OPML feed import from URL: ' . - SimplePie_Misc::url_remove_credentials($url) . ' in category ' . $parent_cat->id()); - } - + $clean_url = SimplePie_Misc::url_remove_credentials($url); + self::log("Cannot create {$clean_url} feed in category {$category->name()}"); return null; } /** - * This method imports an OPML category element. + * Create and return a category. + * + * @param array $category_element An OPML element (must be a category element). + * @param boolean $dry_run true to not create the category in database. * - * @param array $cat_elt an OPML element (must be a category element). - * @param FreshRSS_Category|null $parent_cat the name of the parent category. - * @param boolean $flatten true to disable categories, false otherwise. - * @return FreshRSS_Category|null a new category containing some feeds, or null if no category was created, or false if an error occurred. + * @return FreshRSS_Category|null The created category, or null if it failed. */ - private function addCategoryOpml($cat_elt, $parent_cat, $flatten = false, $dryRun = false) { - $error = false; - $cat = null; - if (!$flatten) { - $catName = Minz_Helper::htmlspecialchars_utf8($cat_elt['text']); - $cat = new FreshRSS_Category($catName); - - foreach ($cat_elt as $key => $value) { - if (is_array($value) && !empty($value['value']) && ($value['namespace'] ?? '') === FreshRSS_Export_Service::FRSS_NAMESPACE) { - switch ($key) { - case 'opmlUrl': - $opml_url = checkUrl($value['value']); - if ($opml_url != '') { - $cat->_kind(FreshRSS_Category::KIND_DYNAMIC_OPML); - $cat->_attributes('opml_url', $opml_url); - } - break; - } - } + private function createCategory($category_element, $dry_run) { + $name = $category_element['text'] ?? $category_element['title'] ?? ''; + $name = Minz_Helper::htmlspecialchars_utf8($name); + $category = new FreshRSS_Category($name); + + if (isset($category_element['frss:opmlUrl'])) { + $opml_url = checkUrl($category_element['frss:opmlUrl']); + if ($opml_url != '') { + $category->_kind(FreshRSS_Category::KIND_DYNAMIC_OPML); + $category->_attributes('opml_url', $opml_url); } + } - if (!$dryRun) { - $id = $this->catDAO->addCategoryObject($cat); - if ($id == false) { - $this->lastStatus = false; - $error = true; - } else { - $cat->_id($id); + if ($dry_run) { + return $category; + } + + $id = $this->catDAO->addCategoryObject($category); + if ($id !== false) { + $category->_id($id); + return $category; + } else { + self::log("Cannot create category {$category->name()}"); + $this->lastStatus = false; + return null; + } + } + + /** + * Return the list of category and feed outlines by categories names. + * + * This method is applied to a list of outlines. It merges the different + * list of feeds from several outlines into one array. + * + * @param array $outlines + * The outlines from which to extract the outlines. + * @param string $parent_category_name + * The name of the parent category of the current outlines. + * + * @return array[] + */ + private function loadFromOutlines($outlines, $parent_category_name) { + $categories_elements = []; + $categories_to_feeds = []; + + foreach ($outlines as $outline) { + // Get the categories and feeds from the child outline (it may + // return several categories and feeds if the outline is a category). + list ( + $outline_categories, + $outline_categories_to_feeds, + ) = $this->loadFromOutline($outline, $parent_category_name); + + // Then, we merge the initial arrays with the arrays returned by + // the outline. + $categories_elements = array_merge($categories_elements, $outline_categories); + + foreach ($outline_categories_to_feeds as $category_name => $feeds) { + if (!isset($categories_to_feeds[$category_name])) { + $categories_to_feeds[$category_name] = []; } + + $categories_to_feeds[$category_name] = array_merge( + $categories_to_feeds[$category_name], + $feeds + ); } - if ($error) { - if (FreshRSS_Context::$isCli) { - fwrite(STDERR, 'FreshRSS error during OPML category import from URL: ' . $catName . "\n"); - } else { - Minz_Log::warning('Error during OPML category import from URL: ' . $catName); - } + } + + return [$categories_elements, $categories_to_feeds]; + } + + /** + * Return the list of category and feed outlines by categories names. + * + * This method is applied to a specific outline. If the outline represents + * a category (i.e. @outlines key exists), it will reapply loadFromOutlines() + * to its children. If the outline represents a feed (i.e. xmlUrl key + * exists), it will add the outline to an array accessible by its category + * name. + * + * @param array $outline + * The outline from which to extract the categories and feeds outlines. + * @param string $parent_category_name + * The name of the parent category of the current outline. + * + * @return array[] + */ + private function loadFromOutline($outline, $parent_category_name) { + $categories_elements = []; + $categories_to_feeds = []; + + if ($parent_category_name === '' && isset($outline['category'])) { + // The outline has no parent category, but its OPML category + // attribute is set, so we use it as the category name. + // lib_opml parses this attribute as an array of strings, so we + // rebuild a string here. + $parent_category_name = implode(', ', $outline['category']); + $categories_elements[$parent_category_name] = [ + 'text' => $parent_category_name, + ]; + } + + if (isset($outline['@outlines'])) { + // The outline has children, it's probably a category + if (!empty($outline['text'])) { + $category_name = $outline['text']; + } elseif (!empty($outline['title'])) { + $category_name = $outline['title']; } else { - $parent_cat = $cat; + $category_name = $parent_category_name; } + + list ( + $categories_elements, + $categories_to_feeds, + ) = $this->loadFromOutlines($outline['@outlines'], $category_name); + + unset($outline['@outlines']); + $categories_elements[$category_name] = $outline; } - if (isset($cat_elt['@outlines'])) { - // Our cat_elt contains more categories or more feeds, so we - // add them recursively. - // Note: FreshRSS does not support yet category arborescence, so always flatten from here - $this->addOpmlElements($cat_elt['@outlines'], $parent_cat, true, $dryRun); + // The xmlUrl means it's a feed URL: add the outline to the array if it + // exists. + if (isset($outline['xmlUrl'])) { + if (!isset($categories_to_feeds[$parent_category_name])) { + $categories_to_feeds[$parent_category_name] = []; + } + + $categories_to_feeds[$parent_category_name][] = $outline; } - return $cat; + return [$categories_elements, $categories_to_feeds]; + } + + private static function log($message) { + if (FreshRSS_Context::$isCli) { + fwrite(STDERR, "FreshRSS error during OPML import: {$message}\n"); + } else { + Minz_Log::warning("Error during OPML import: {$message}"); + } } } -- cgit v1.2.3 From 05ae1b0d2684cea4eda664c5ea1a995cb9f0c4b9 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Thu, 9 Feb 2023 13:57:20 +0100 Subject: XML+XPath (#5076) * XML+XPath #fix https://github.com/FreshRSS/FreshRSS/issues/5075 Implementation allowing to take an XML document as input using an XML parser (instead of an HTML parser for HTML+XPath) * Remove noise from another PR * Better MIME for XML * And add glob *.xml for cache cleaning * Minor syntax * Add glob json for clean cache --- app/Controllers/feedController.php | 14 ++++++++++---- app/Controllers/subscriptionController.php | 2 +- app/Models/Feed.php | 29 ++++++++++++++++++++++++----- app/Services/ExportService.php | 1 + app/Services/ImportService.php | 5 ++++- app/i18n/cz/sub.php | 1 + app/i18n/de/sub.php | 1 + app/i18n/el/sub.php | 1 + app/i18n/en-us/sub.php | 1 + app/i18n/en/sub.php | 1 + app/i18n/es/sub.php | 1 + app/i18n/fr/sub.php | 1 + app/i18n/he/sub.php | 1 + app/i18n/id/sub.php | 1 + app/i18n/it/sub.php | 1 + app/i18n/ja/sub.php | 1 + app/i18n/ko/sub.php | 1 + app/i18n/nl/sub.php | 1 + app/i18n/oc/sub.php | 1 + app/i18n/pl/sub.php | 1 + app/i18n/pt-br/sub.php | 1 + app/i18n/ru/sub.php | 1 + app/i18n/sk/sub.php | 1 + app/i18n/tr/sub.php | 1 + app/i18n/zh-cn/sub.php | 1 + app/i18n/zh-tw/sub.php | 1 + app/views/helpers/export/opml.phtml | 11 +++++++++-- app/views/helpers/feed/update.phtml | 5 +++-- app/views/subscription/add.phtml | 1 + docs/en/developers/OPML.md | 4 +++- lib/lib_rss.php | 14 ++++++++++++-- p/scripts/feed.js | 11 +++++++++-- 32 files changed, 98 insertions(+), 20 deletions(-) (limited to 'app/Services/ImportService.php') diff --git a/app/Controllers/feedController.php b/app/Controllers/feedController.php index 2bef85f0e..84f38fe5e 100644 --- a/app/Controllers/feedController.php +++ b/app/Controllers/feedController.php @@ -81,6 +81,7 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { $feed->load(true); //Throws FreshRSS_Feed_Exception, Minz_FileNotExistException break; case FreshRSS_Feed::KIND_HTML_XPATH: + case FreshRSS_Feed::KIND_XML_XPATH: $feed->_website($url); break; } @@ -201,8 +202,8 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { $timeout = intval(Minz_Request::param('timeout', 0)); $attributes['timeout'] = $timeout > 0 ? $timeout : null; - $feed_kind = Minz_Request::param('feed_kind', FreshRSS_Feed::KIND_RSS); - if ($feed_kind == FreshRSS_Feed::KIND_HTML_XPATH) { + $feed_kind = (int)Minz_Request::param('feed_kind', FreshRSS_Feed::KIND_RSS); + if ($feed_kind === FreshRSS_Feed::KIND_HTML_XPATH || $feed_kind === FreshRSS_Feed::KIND_XML_XPATH) { $xPathSettings = []; if (Minz_Request::param('xPathFeedTitle', '') != '') $xPathSettings['feedTitle'] = Minz_Request::param('xPathFeedTitle', '', true); if (Minz_Request::param('xPathItem', '') != '') $xPathSettings['item'] = Minz_Request::param('xPathItem', '', true); @@ -385,10 +386,15 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { if ($simplePiePush) { $simplePie = $simplePiePush; //Used by WebSub } elseif ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH) { - $simplePie = $feed->loadHtmlXpath(false, $isNewFeed); - if ($simplePie == null) { + $simplePie = $feed->loadHtmlXpath(); + if ($simplePie === null) { throw new FreshRSS_Feed_Exception('HTML+XPath Web scraping failed for [' . $feed->url(false) . ']'); } + } elseif ($feed->kind() === FreshRSS_Feed::KIND_XML_XPATH) { + $simplePie = $feed->loadHtmlXpath(); + if ($simplePie === null) { + throw new FreshRSS_Feed_Exception('XML+XPath parsing failed for [' . $feed->url(false) . ']'); + } } else { $simplePie = $feed->load(false, $isNewFeed); } diff --git a/app/Controllers/subscriptionController.php b/app/Controllers/subscriptionController.php index b2ee046d9..f0355a82a 100644 --- a/app/Controllers/subscriptionController.php +++ b/app/Controllers/subscriptionController.php @@ -203,7 +203,7 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController { $feed->_filtersAction('read', preg_split('/[\n\r]+/', Minz_Request::param('filteractions_read', ''))); $feed->_kind(intval(Minz_Request::param('feed_kind', FreshRSS_Feed::KIND_RSS))); - if ($feed->kind() == FreshRSS_Feed::KIND_HTML_XPATH) { + if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH || $feed->kind() === FreshRSS_Feed::KIND_XML_XPATH) { $xPathSettings = []; if (Minz_Request::param('xPathItem', '') != '') $xPathSettings['item'] = Minz_Request::param('xPathItem', '', true); if (Minz_Request::param('xPathItemTitle', '') != '') $xPathSettings['itemTitle'] = Minz_Request::param('xPathItemTitle', '', true); diff --git a/app/Models/Feed.php b/app/Models/Feed.php index f7ff76768..7c46199a5 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -17,6 +17,11 @@ class FreshRSS_Feed extends Minz_Model { * @var int */ const KIND_HTML_XPATH = 10; + /** + * Normal XML with XPath scraping + * @var int + */ + const KIND_XML_XPATH = 15; /** * Normal JSON with XPath scraping * @var int @@ -586,7 +591,7 @@ class FreshRSS_Feed extends Minz_Model { /** * @return SimplePie|null */ - public function loadHtmlXpath(bool $loadDetails = false, bool $noCache = false) { + public function loadHtmlXpath() { if ($this->url == '') { return null; } @@ -614,8 +619,9 @@ class FreshRSS_Feed extends Minz_Model { return null; } - $cachePath = FreshRSS_Feed::cacheFilename($feedSourceUrl, $this->attributes(), FreshRSS_Feed::KIND_HTML_XPATH); - $html = httpGet($feedSourceUrl, $cachePath, 'html', $this->attributes()); + $cachePath = FreshRSS_Feed::cacheFilename($feedSourceUrl, $this->attributes(), $this->kind()); + $html = httpGet($feedSourceUrl, $cachePath, + $this->kind() === FreshRSS_Feed::KIND_XML_XPATH ? 'xml' : 'html', $this->attributes()); if (strlen($html) <= 0) { return null; } @@ -630,7 +636,18 @@ class FreshRSS_Feed extends Minz_Model { $doc = new DOMDocument(); $doc->recover = true; $doc->strictErrorChecking = false; - $doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING); + + switch ($this->kind()) { + case FreshRSS_Feed::KIND_HTML_XPATH: + $doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING); + break; + case FreshRSS_Feed::KIND_XML_XPATH: + $doc->loadXML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING); + break; + default: + return null; + } + $xpath = new DOMXPath($doc); $view->rss_title = $xPathFeedTitle == '' ? $this->name() : htmlspecialchars(@$xpath->evaluate('normalize-space(' . $xPathFeedTitle . ')'), ENT_COMPAT, 'UTF-8'); @@ -776,8 +793,10 @@ class FreshRSS_Feed extends Minz_Model { public static function cacheFilename(string $url, array $attributes, int $kind = FreshRSS_Feed::KIND_RSS): string { $simplePie = customSimplePie($attributes); $filename = $simplePie->get_cache_filename($url); - if ($kind == FreshRSS_Feed::KIND_HTML_XPATH) { + if ($kind === FreshRSS_Feed::KIND_HTML_XPATH) { return CACHE_PATH . '/' . $filename . '.html'; + } elseif ($kind === FreshRSS_Feed::KIND_XML_XPATH) { + return CACHE_PATH . '/' . $filename . '.xml'; } else { return CACHE_PATH . '/' . $filename . '.spc'; } diff --git a/app/Services/ExportService.php b/app/Services/ExportService.php index 2f35666a8..6b0a3f178 100644 --- a/app/Services/ExportService.php +++ b/app/Services/ExportService.php @@ -21,6 +21,7 @@ class FreshRSS_Export_Service { const FRSS_NAMESPACE = 'https://freshrss.org/opml'; const TYPE_HTML_XPATH = 'HTML+XPath'; + const TYPE_XML_XPATH = 'XML+XPath'; const TYPE_RSS_ATOM = 'rss'; /** diff --git a/app/Services/ImportService.php b/app/Services/ImportService.php index 68aa6f741..55aa28679 100644 --- a/app/Services/ImportService.php +++ b/app/Services/ImportService.php @@ -160,10 +160,13 @@ class FreshRSS_Import_Service { $feed->_website($website); $feed->_description($description); - switch ($feed_elt['type'] ?? '') { + switch (strtolower($feed_elt['type'] ?? '')) { case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH): $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH); break; + case strtolower(FreshRSS_Export_Service::TYPE_XML_XPATH): + $feed->_kind(FreshRSS_Feed::KIND_XML_XPATH); + break; case strtolower(FreshRSS_Export_Service::TYPE_RSS_ATOM): default: $feed->_kind(FreshRSS_Feed::KIND_RSS); diff --git a/app/i18n/cz/sub.php b/app/i18n/cz/sub.php index a11a9359d..3d08c315b 100644 --- a/app/i18n/cz/sub.php +++ b/app/i18n/cz/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath pro:', ), 'rss' => 'RSS / Atom (výchozí)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Vymazat mezipaměť', diff --git a/app/i18n/de/sub.php b/app/i18n/de/sub.php index 580f7d348..b265c1b98 100644 --- a/app/i18n/de/sub.php +++ b/app/i18n/de/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath für:', ), 'rss' => 'RSS / Atom (Standard)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Zwischenspeicher leeren', diff --git a/app/i18n/el/sub.php b/app/i18n/el/sub.php index 424fafc7b..aae9ae412 100644 --- a/app/i18n/el/sub.php +++ b/app/i18n/el/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath for:', // TODO ), 'rss' => 'RSS / Atom (default)', // TODO + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Clear cache', // TODO diff --git a/app/i18n/en-us/sub.php b/app/i18n/en-us/sub.php index a6b311084..92d75b81e 100644 --- a/app/i18n/en-us/sub.php +++ b/app/i18n/en-us/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath for:', // IGNORE ), 'rss' => 'RSS / Atom (default)', // IGNORE + 'xml_xpath' => 'XML + XPath', // IGNORE ), 'maintenance' => array( 'clear_cache' => 'Clear cache', // IGNORE diff --git a/app/i18n/en/sub.php b/app/i18n/en/sub.php index c7e100c25..04caaff05 100644 --- a/app/i18n/en/sub.php +++ b/app/i18n/en/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath for:', ), 'rss' => 'RSS / Atom (default)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Clear cache', diff --git a/app/i18n/es/sub.php b/app/i18n/es/sub.php index 52d681067..4fd2fa393 100644 --- a/app/i18n/es/sub.php +++ b/app/i18n/es/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath para:', ), 'rss' => 'RSS / Atom (por defecto)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Borrar caché', diff --git a/app/i18n/fr/sub.php b/app/i18n/fr/sub.php index f9df0dbcc..be6dc094d 100644 --- a/app/i18n/fr/sub.php +++ b/app/i18n/fr/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath pour :', ), 'rss' => 'RSS / Atom (par défaut)', + 'xml_xpath' => 'XML + XPath', // IGNORE ), 'maintenance' => array( 'clear_cache' => 'Vider le cache', diff --git a/app/i18n/he/sub.php b/app/i18n/he/sub.php index 25552ffa1..bae5f5177 100644 --- a/app/i18n/he/sub.php +++ b/app/i18n/he/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath for:', // TODO ), 'rss' => 'RSS / Atom (default)', // TODO + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Clear cache', // TODO diff --git a/app/i18n/id/sub.php b/app/i18n/id/sub.php index 7fdf5c024..3f9a4916a 100644 --- a/app/i18n/id/sub.php +++ b/app/i18n/id/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath for:', // TODO ), 'rss' => 'RSS / Atom (default)', // TODO + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Clear cache', // TODO diff --git a/app/i18n/it/sub.php b/app/i18n/it/sub.php index 8614caca7..7ab83cf07 100644 --- a/app/i18n/it/sub.php +++ b/app/i18n/it/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath per:', ), 'rss' => 'RSS / Atom (predefinito)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Svuota cache', diff --git a/app/i18n/ja/sub.php b/app/i18n/ja/sub.php index 80548c025..2425b21f3 100644 --- a/app/i18n/ja/sub.php +++ b/app/i18n/ja/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPathは:', ), 'rss' => 'RSS / Atom (標準)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'キャッシュのクリア', diff --git a/app/i18n/ko/sub.php b/app/i18n/ko/sub.php index e0ef5990b..f376247d5 100644 --- a/app/i18n/ko/sub.php +++ b/app/i18n/ko/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => '다음의 XPath:', ), 'rss' => 'RSS / Atom (기본값)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => '캐쉬 지우기', diff --git a/app/i18n/nl/sub.php b/app/i18n/nl/sub.php index 0fa767171..631da9477 100644 --- a/app/i18n/nl/sub.php +++ b/app/i18n/nl/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath voor:', ), 'rss' => 'RSS / Atom (standaard)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Cache leegmaken', diff --git a/app/i18n/oc/sub.php b/app/i18n/oc/sub.php index 92a73057c..008b4964d 100644 --- a/app/i18n/oc/sub.php +++ b/app/i18n/oc/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath per :', ), 'rss' => 'RSS / Atom (defaut)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Escafar lo cache', diff --git a/app/i18n/pl/sub.php b/app/i18n/pl/sub.php index b6121fcb7..565401982 100644 --- a/app/i18n/pl/sub.php +++ b/app/i18n/pl/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath dla:', ), 'rss' => 'RSS / Atom (domyślne)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Wyczyść pamięć podręczną', diff --git a/app/i18n/pt-br/sub.php b/app/i18n/pt-br/sub.php index c9755755e..4cdee8681 100644 --- a/app/i18n/pt-br/sub.php +++ b/app/i18n/pt-br/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath para:', ), 'rss' => 'RSS / Atom (padrão)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Limpar o cache', diff --git a/app/i18n/ru/sub.php b/app/i18n/ru/sub.php index 5704b53b1..d13c4c4f0 100644 --- a/app/i18n/ru/sub.php +++ b/app/i18n/ru/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath для:', ), 'rss' => 'RSS / Atom (по умолчанию)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Очистить кэш', diff --git a/app/i18n/sk/sub.php b/app/i18n/sk/sub.php index f583f6ca0..3c980d202 100644 --- a/app/i18n/sk/sub.php +++ b/app/i18n/sk/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath pre:', ), 'rss' => 'RSS / Atom (prednastavené)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Vymazať vyrovnáciu pamäť', diff --git a/app/i18n/tr/sub.php b/app/i18n/tr/sub.php index 056c059ac..3e03f667c 100644 --- a/app/i18n/tr/sub.php +++ b/app/i18n/tr/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath:', ), 'rss' => 'RSS / Atom (varsayılan)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => 'Önbelleği temizle', diff --git a/app/i18n/zh-cn/sub.php b/app/i18n/zh-cn/sub.php index 2f9d17ace..5e6e570a9 100644 --- a/app/i18n/zh-cn/sub.php +++ b/app/i18n/zh-cn/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath 定位:', ), 'rss' => 'RSS / Atom (默认)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => '清理缓存', diff --git a/app/i18n/zh-tw/sub.php b/app/i18n/zh-tw/sub.php index dddcb2661..8a255645d 100644 --- a/app/i18n/zh-tw/sub.php +++ b/app/i18n/zh-tw/sub.php @@ -122,6 +122,7 @@ return array( 'xpath' => 'XPath 定位:', ), 'rss' => 'RSS / Atom (默認)', + 'xml_xpath' => 'XML + XPath', // TODO ), 'maintenance' => array( 'clear_cache' => '清理暫存', diff --git a/app/views/helpers/export/opml.phtml b/app/views/helpers/export/opml.phtml index eb6f7523b..64c83c960 100644 --- a/app/views/helpers/export/opml.phtml +++ b/app/views/helpers/export/opml.phtml @@ -18,8 +18,15 @@ function feedsToOutlines($feeds, $excludeMutedFeeds = false): array { 'description' => htmlspecialchars_decode($feed->description(), ENT_QUOTES), ]; - if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH) { - $outline['type'] = FreshRSS_Export_Service::TYPE_HTML_XPATH; + if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH || $feed->kind() === FreshRSS_Feed::KIND_XML_XPATH) { + switch ($feed->kind()) { + case FreshRSS_Feed::KIND_HTML_XPATH: + $outline['type'] = FreshRSS_Export_Service::TYPE_HTML_XPATH; + break; + case FreshRSS_Feed::KIND_XML_XPATH: + $outline['type'] = FreshRSS_Export_Service::TYPE_XML_XPATH; + break; + } /** @var array */ $xPathSettings = $feed->attributes('xpath'); $outline['frss:xPathItem'] = $xPathSettings['item'] ?? null; diff --git a/app/views/helpers/feed/update.phtml b/app/views/helpers/feed/update.phtml index 5b958451d..0cd2ec0c3 100644 --- a/app/views/helpers/feed/update.phtml +++ b/app/views/helpers/feed/update.phtml @@ -391,8 +391,9 @@
diff --git a/app/views/subscription/add.phtml b/app/views/subscription/add.phtml index 7fa59e751..4e9da877f 100644 --- a/app/views/subscription/add.phtml +++ b/app/views/subscription/add.phtml @@ -70,6 +70,7 @@ diff --git a/docs/en/developers/OPML.md b/docs/en/developers/OPML.md index 2190a1de3..f65fd2faa 100644 --- a/docs/en/developers/OPML.md +++ b/docs/en/developers/OPML.md @@ -17,12 +17,14 @@ FreshRSS uses the XML namespace to export/import ext The list of the custom FreshRSS attributes can be seen in [the source code](https://github.com/FreshRSS/FreshRSS/blob/edge/app/views/helpers/export/opml.phtml), and here is an overview: -### HTML+XPath +### HTML+XPath or XML+XPath * ` ℹ️ [XPath 1.0](https://en.wikipedia.org/wiki/XPath) is a standard query language, which FreshRSS supports to enable [Web scraping](https://en.wikipedia.org/wiki/Web_scraping). +* ` $attributes */ function httpGet(string $url, string $cachePath, string $type = 'html', array $attributes = []): string { @@ -439,9 +443,15 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a $accept = '*/*;q=0.8'; switch ($type) { + case 'json': + $accept = 'application/json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7'; + break; case 'opml': $accept = 'text/x-opml,text/xml;q=0.9,application/xml;q=0.9,*/*;q=0.8'; break; + case 'xml': + $accept = 'application/xml,application/xhtml+xml,text/xml;q=0.9,*/*;q=0.8'; + break; case 'html': default: $accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'; diff --git a/p/scripts/feed.js b/p/scripts/feed.js index 1a6833db6..29af2a3ea 100644 --- a/p/scripts/feed.js +++ b/p/scripts/feed.js @@ -88,10 +88,17 @@ function init_disable_elements_on_update(parent) { function init_select_show(parent) { const listener = (select) => { const options = select.querySelectorAll('option[data-show]'); + const shows = {}; // To allow multiple options to show the same element for (const option of options) { - const elem = document.getElementById(option.dataset.show); + if (!shows[option.dataset.show]) { + shows[option.dataset.show] = option.selected; + } + } + + for (const show in shows) { + const elem = document.getElementById(show); if (elem) { - elem.style.display = option.selected ? 'block' : 'none'; + elem.style.display = shows[show] ? 'block' : 'none'; } } }; -- cgit v1.2.3