aboutsummaryrefslogtreecommitdiff
path: root/app/Services/ImportService.php
diff options
context:
space:
mode:
authorGravatar berumuron <dev@marienfressinaud.fr> 2023-01-18 10:12:21 +0100
committerGravatar GitHub <noreply@github.com> 2023-01-18 10:12:21 +0100
commitdaaa391e33c5d92e3dd91bb0b81ac420abed7097 (patch)
treea3263c26ac90fb3115627e156eba580826acfd4f /app/Services/ImportService.php
parent216e39c3cc43061686981b96328796765d264d29 (diff)
tec: Update the lib_opml (#4403)
* fix: Fix undefined GLOB_BRACE on Alpine The manual states that: > Note: The GLOB_BRACE flag is not available on some non GNU systems, > like Solaris or Alpine Linux. This generated an error on Alpine. Reference: https://www.php.net/manual/function.glob.php * fix: List details of feeds for OPML exportation The details are necessary to export the XPath information, the CSS full content path and read actions filters. * Update LibOpml to 0.4.0 * Refactor OPML importation to be more robust First, it fixes two regressions introduced by the update of lib_opml: - title attribute is used when text attribute is missing; - the OPML category attribute is used as a fallback for feeds categories. In a related way, if also fixes a problem when a feed had both a parent category outline and a category attribute. Before, it only considered the attribute as its category, but now it considers the parent outline. Then, it counts category limit correctly by not increasing `$nb_categories` if the category already exists. * Exclude lib_opml from the CodeSniffer * Fix variable names when logging some errors * Fix catch of LibOpml Exception * Make sure to declare the category * Exclude lib_opml from PHPStan analyze * Disable markdownlint for lib_opml * Fix typos * Use auto-loading and allow updates via Composer * Fix broken links to lib_opml * Bring back the ability to import the OPML frss:opmlUrl attribute * Refactor the logs of OPML errors * Update lib_opml to the version 0.5.0 Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
Diffstat (limited to 'app/Services/ImportService.php')
-rw-r--r--app/Services/ImportService.php440
1 files changed, 280 insertions, 160 deletions
diff --git a/app/Services/ImportService.php b/app/Services/ImportService.php
index 28286a753..68aa6f741 100644
--- a/app/Services/ImportService.php
+++ b/app/Services/ImportService.php
@@ -19,8 +19,6 @@ class FreshRSS_Import_Service {
* @param string $username
*/
public function __construct($username = null) {
- require_once(LIB_PATH . '/lib_opml.php');
-
$this->catDAO = FreshRSS_Factory::createCategoryDao($username);
$this->feedDAO = FreshRSS_Factory::createFeedDao($username);
}
@@ -34,153 +32,191 @@ class FreshRSS_Import_Service {
* This method parses and imports an OPML file.
*
* @param string $opml_file the OPML file content.
- * @param FreshRSS_Category|null $parent_cat the name of the parent category.
- * @param boolean $flatten true to disable categories, false otherwise.
- * @return array<FreshRSS_Category>|false an array of categories containing some feeds, or false if an error occurred.
+ * @param FreshRSS_Category|null $forced_category force the feeds to be associated to this category.
+ * @param boolean $dry_run true to not create categories and feeds in database.
*/
- public function importOpml(string $opml_file, $parent_cat = null, $flatten = false, $dryRun = false) {
+ public function importOpml(string $opml_file, $forced_category = null, $dry_run = false) {
$this->lastStatus = true;
$opml_array = array();
try {
- $opml_array = libopml_parse_string($opml_file, false);
- } catch (LibOPML_Exception $e) {
- if (FreshRSS_Context::$isCli) {
- fwrite(STDERR, 'FreshRSS error during OPML parsing: ' . $e->getMessage() . "\n");
- } else {
- Minz_Log::warning($e->getMessage());
- }
+ $libopml = new \marienfressinaud\LibOpml\LibOpml(false);
+ $opml_array = $libopml->parseString($opml_file);
+ } catch (\marienfressinaud\LibOpml\Exception $e) {
+ self::log($e->getMessage());
$this->lastStatus = false;
- return false;
+ return;
}
- return $this->addOpmlElements($opml_array['body'], $parent_cat, $flatten, $dryRun);
- }
+ $this->catDAO->checkDefault();
+ $default_category = $this->catDAO->getDefault();
+ if (!$default_category) {
+ self::log('Cannot get the default category');
+ $this->lastStatus = false;
+ return;
+ }
- /**
- * This method imports an OPML file based on its body.
- *
- * @param array $opml_elements an OPML element (body or outline).
- * @param FreshRSS_Category|null $parent_cat the name of the parent category.
- * @param boolean $flatten true to disable categories, false otherwise.
- * @return array<FreshRSS_Category> an array of categories containing some feeds
- */
- private function addOpmlElements($opml_elements, $parent_cat = null, $flatten = false, $dryRun = false) {
+ // Get the categories by names so we can use this array to retrieve
+ // existing categories later.
+ $categories = $this->catDAO->listCategories(false);
+ $categories_by_names = [];
+ foreach ($categories as $category) {
+ $categories_by_names[$category->name()] = $category;
+ }
+
+ // Get current numbers of categories and feeds, and the limits to
+ // verify the user can import its categories/feeds.
+ $nb_categories = count($categories);
$nb_feeds = count($this->feedDAO->listFeeds());
- $nb_cats = count($this->catDAO->listCategories(false));
$limits = FreshRSS_Context::$system_conf->limits;
- //Sort with categories first
- usort($opml_elements, static function ($a, $b) {
- return strcmp(
- (isset($a['xmlUrl']) ? 'Z' : 'A') . (isset($a['text']) ? $a['text'] : ''),
- (isset($b['xmlUrl']) ? 'Z' : 'A') . (isset($b['text']) ? $b['text'] : ''));
- });
-
- $categories = [];
-
- foreach ($opml_elements as $elt) {
- if (isset($elt['xmlUrl'])) {
- // If xmlUrl exists, it means it is a feed
- if (FreshRSS_Context::$isCli && $nb_feeds >= $limits['max_feeds']) {
- Minz_Log::warning(_t('feedback.sub.feed.over_max',
- $limits['max_feeds']));
- $this->lastStatus = false;
- continue;
- }
+ // Process the OPML outlines to get a list of categories and a list of
+ // feeds elements indexed by their categories names.
+ list (
+ $categories_elements,
+ $categories_to_feeds,
+ ) = $this->loadFromOutlines($opml_array['body'], '');
- if ($this->addFeedOpml($elt, $parent_cat, $dryRun)) {
- $nb_feeds++;
+ foreach ($categories_to_feeds as $category_name => $feeds_elements) {
+ $category_element = $categories_elements[$category_name] ?? null;
+
+ $category = null;
+ if ($forced_category) {
+ // If the category is forced, ignore the actual category name
+ $category = $forced_category;
+ } elseif (isset($categories_by_names[$category_name])) {
+ // If the category already exists, get it from $categories_by_names
+ $category = $categories_by_names[$category_name];
+ } elseif ($category_element) {
+ // Otherwise, create the category (if possible)
+ $limit_reached = $nb_categories >= $limits['max_categories'];
+ $can_create_category = FreshRSS_Context::$isCli || !$limit_reached;
+
+ if ($can_create_category) {
+ $category = $this->createCategory($category_element, $dry_run);
+ if ($category) {
+ $categories_by_names[$category->name()] = $category;
+ $nb_categories++;
+ }
} else {
- $this->lastStatus = false;
+ Minz_Log::warning(
+ _t('feedback.sub.category.over_max', $limits['max_categories'])
+ );
}
- } elseif (!empty($elt['text'])) {
- // No xmlUrl? It should be a category!
- $limit_reached = !$flatten && ($nb_cats >= $limits['max_categories']);
- if (!FreshRSS_Context::$isCli && $limit_reached) {
- Minz_Log::warning(_t('feedback.sub.category.over_max',
- $limits['max_categories']));
+ }
+
+ if (!$category) {
+ // Category can be null if the feeds weren't in a category
+ // outline, or if we weren't able to create the category.
+ $category = $default_category;
+ }
+
+ // Then, create the feeds one by one and attach them to the
+ // category we just got.
+ foreach ($feeds_elements as $feed_element) {
+ $limit_reached = $nb_feeds >= $limits['max_feeds'];
+ $can_create_feed = FreshRSS_Context::$isCli || !$limit_reached;
+ if (!$can_create_feed) {
+ Minz_Log::warning(
+ _t('feedback.sub.feed.over_max', $limits['max_feeds'])
+ );
$this->lastStatus = false;
- $flatten = true;
+ break;
}
- $category = $this->addCategoryOpml($elt, $parent_cat, $flatten, $dryRun);
-
- if ($category) {
- $nb_cats++;
- $categories[] = $category;
+ if ($this->createFeed($feed_element, $category, $dry_run)) {
+ // TODO what if the feed already exists in the database?
+ $nb_feeds++;
+ } else {
+ $this->lastStatus = false;
}
}
}
- return $categories;
+ return;
}
/**
- * This method imports an OPML feed element.
+ * Create a feed from a feed element (i.e. OPML outline).
*
- * @param array $feed_elt an OPML element (must be a feed element).
- * @param FreshRSS_Category|null $parent_cat the name of the parent category.
- * @return FreshRSS_Feed|null a feed.
+ * @param array<string, string> $feed_elt An OPML element (must be a feed element).
+ * @param FreshRSS_Category $category The category to associate to the feed.
+ * @param boolean $dry_run true to not create the feed in database.
+ *
+ * @return FreshRSS_Feed|null The created feed, or null if it failed.
*/
- private function addFeedOpml($feed_elt, $parent_cat, $dryRun = false) {
- if (empty($feed_elt['xmlUrl'])) {
- return null;
- }
- if ($parent_cat == null) {
- // This feed has no parent category so we get the default one
- $this->catDAO->checkDefault();
- $parent_cat = $this->catDAO->getDefault();
- if ($parent_cat == null) {
- $this->lastStatus = false;
- return null;
- }
- }
-
- // We get different useful information
+ private function createFeed($feed_elt, $category, $dry_run) {
$url = Minz_Helper::htmlspecialchars_utf8($feed_elt['xmlUrl']);
- $name = Minz_Helper::htmlspecialchars_utf8($feed_elt['text'] ?? '');
+ $name = $feed_elt['text'] ?? $feed_elt['title'] ?? '';
+ $name = Minz_Helper::htmlspecialchars_utf8($name);
$website = Minz_Helper::htmlspecialchars_utf8($feed_elt['htmlUrl'] ?? '');
$description = Minz_Helper::htmlspecialchars_utf8($feed_elt['description'] ?? '');
try {
// Create a Feed object and add it in DB
$feed = new FreshRSS_Feed($url);
- $feed->_categoryId($parent_cat->id());
- $parent_cat->addFeed($feed);
+ $feed->_categoryId($category->id());
+ $category->addFeed($feed);
$feed->_name($name);
$feed->_website($website);
$feed->_description($description);
switch ($feed_elt['type'] ?? '') {
- case FreshRSS_Export_Service::TYPE_HTML_XPATH:
+ case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH):
$feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH);
break;
- case FreshRSS_Export_Service::TYPE_RSS_ATOM:
+ case strtolower(FreshRSS_Export_Service::TYPE_RSS_ATOM):
default:
$feed->_kind(FreshRSS_Feed::KIND_RSS);
break;
}
+ if (isset($feed_elt['frss:cssFullContent'])) {
+ $feed->_pathEntries(Minz_Helper::htmlspecialchars_utf8($feed_elt['frss:cssFullContent']));
+ }
+
+ if (isset($feed_elt['frss:cssFullContentFilter'])) {
+ $feed->_attributes('path_entries_filter', $feed_elt['frss:cssFullContentFilter']);
+ }
+
+ if (isset($feed_elt['frss:filtersActionRead'])) {
+ $feed->_filtersAction(
+ 'read',
+ preg_split('/[\n\r]+/', $feed_elt['frss:filtersActionRead'])
+ );
+ }
+
$xPathSettings = [];
- foreach ($feed_elt as $key => $value) {
- if (is_array($value) && !empty($value['value']) && ($value['namespace'] ?? '') === FreshRSS_Export_Service::FRSS_NAMESPACE) {
- switch ($key) {
- case 'cssFullContent': $feed->_pathEntries(Minz_Helper::htmlspecialchars_utf8($value['value'])); break;
- case 'cssFullContentFilter': $feed->_attributes('path_entries_filter', $value['value']); break;
- case 'filtersActionRead': $feed->_filtersAction('read', preg_split('/[\n\r]+/', $value['value'])); break;
- case 'xPathItem': $xPathSettings['item'] = $value['value']; break;
- case 'xPathItemTitle': $xPathSettings['itemTitle'] = $value['value']; break;
- case 'xPathItemContent': $xPathSettings['itemContent'] = $value['value']; break;
- case 'xPathItemUri': $xPathSettings['itemUri'] = $value['value']; break;
- case 'xPathItemAuthor': $xPathSettings['itemAuthor'] = $value['value']; break;
- case 'xPathItemTimestamp': $xPathSettings['itemTimestamp'] = $value['value']; break;
- case 'xPathItemTimeFormat': $xPathSettings['itemTimeFormat'] = $value['value']; break;
- case 'xPathItemThumbnail': $xPathSettings['itemThumbnail'] = $value['value']; break;
- case 'xPathItemCategories': $xPathSettings['itemCategories'] = $value['value']; break;
- case 'xPathItemUid': $xPathSettings['itemUid'] = $value['value']; break;
- }
- }
+ if (isset($feed_elt['frss:xPathItem'])) {
+ $xPathSettings['item'] = $feed_elt['frss:xPathItem'];
}
+ if (isset($feed_elt['frss:xPathItemTitle'])) {
+ $xPathSettings['itemTitle'] = $feed_elt['frss:xPathItemTitle'];
+ }
+ if (isset($feed_elt['frss:xPathItemContent'])) {
+ $xPathSettings['itemContent'] = $feed_elt['frss:xPathItemContent'];
+ }
+ if (isset($feed_elt['frss:xPathItemUri'])) {
+ $xPathSettings['itemUri'] = $feed_elt['frss:xPathItemUri'];
+ }
+ if (isset($feed_elt['frss:xPathItemAuthor'])) {
+ $xPathSettings['itemAuthor'] = $feed_elt['frss:xPathItemAuthor'];
+ }
+ if (isset($feed_elt['frss:xPathItemTimestamp'])) {
+ $xPathSettings['itemTimestamp'] = $feed_elt['frss:xPathItemTimestamp'];
+ }
+ if (isset($feed_elt['frss:xPathItemTimeFormat'])) {
+ $xPathSettings['itemTimeFormat'] = $feed_elt['frss:xPathItemTimeFormat'];
+ }
+ if (isset($feed_elt['frss:xPathItemThumbnail'])) {
+ $xPathSettings['itemThumbnail'] = $feed_elt['frss:xPathItemThumbnail'];
+ }
+ if (isset($feed_elt['frss:xPathItemCategories'])) {
+ $xPathSettings['itemCategories'] = $feed_elt['frss:xPathItemCategories'];
+ }
+ if (isset($feed_elt['frss:xPathItemUid'])) {
+ $xPathSettings['itemUid'] = $feed_elt['frss:xPathItemUid'];
+ }
+
if (!empty($xPathSettings)) {
$feed->_attributes('xpath', $xPathSettings);
}
@@ -188,9 +224,11 @@ class FreshRSS_Import_Service {
// Call the extension hook
/** @var FreshRSS_Feed|null */
$feed = Minz_ExtensionManager::callHook('feed_before_insert', $feed);
- if ($dryRun) {
+
+ if ($dry_run) {
return $feed;
}
+
if ($feed != null) {
// addFeedObject checks if feed is already in DB
$id = $this->feedDAO->addFeedObject($feed);
@@ -202,81 +240,163 @@ class FreshRSS_Import_Service {
}
}
} catch (FreshRSS_Feed_Exception $e) {
- if (FreshRSS_Context::$isCli) {
- fwrite(STDERR, 'FreshRSS error during OPML feed import: ' . $e->getMessage() . "\n");
- } else {
- Minz_Log::warning($e->getMessage());
- }
+ self::log($e->getMessage());
$this->lastStatus = false;
}
- if (FreshRSS_Context::$isCli) {
- fwrite(STDERR, 'FreshRSS error during OPML feed import from URL: ' .
- SimplePie_Misc::url_remove_credentials($url) . ' in category ' . $parent_cat->id() . "\n");
- } else {
- Minz_Log::warning('Error during OPML feed import from URL: ' .
- SimplePie_Misc::url_remove_credentials($url) . ' in category ' . $parent_cat->id());
- }
-
+ $clean_url = SimplePie_Misc::url_remove_credentials($url);
+ self::log("Cannot create {$clean_url} feed in category {$category->name()}");
return null;
}
/**
- * This method imports an OPML category element.
+ * Create and return a category.
+ *
+ * @param array<string, string> $category_element An OPML element (must be a category element).
+ * @param boolean $dry_run true to not create the category in database.
*
- * @param array $cat_elt an OPML element (must be a category element).
- * @param FreshRSS_Category|null $parent_cat the name of the parent category.
- * @param boolean $flatten true to disable categories, false otherwise.
- * @return FreshRSS_Category|null a new category containing some feeds, or null if no category was created, or false if an error occurred.
+ * @return FreshRSS_Category|null The created category, or null if it failed.
*/
- private function addCategoryOpml($cat_elt, $parent_cat, $flatten = false, $dryRun = false) {
- $error = false;
- $cat = null;
- if (!$flatten) {
- $catName = Minz_Helper::htmlspecialchars_utf8($cat_elt['text']);
- $cat = new FreshRSS_Category($catName);
-
- foreach ($cat_elt as $key => $value) {
- if (is_array($value) && !empty($value['value']) && ($value['namespace'] ?? '') === FreshRSS_Export_Service::FRSS_NAMESPACE) {
- switch ($key) {
- case 'opmlUrl':
- $opml_url = checkUrl($value['value']);
- if ($opml_url != '') {
- $cat->_kind(FreshRSS_Category::KIND_DYNAMIC_OPML);
- $cat->_attributes('opml_url', $opml_url);
- }
- break;
- }
- }
+ private function createCategory($category_element, $dry_run) {
+ $name = $category_element['text'] ?? $category_element['title'] ?? '';
+ $name = Minz_Helper::htmlspecialchars_utf8($name);
+ $category = new FreshRSS_Category($name);
+
+ if (isset($category_element['frss:opmlUrl'])) {
+ $opml_url = checkUrl($category_element['frss:opmlUrl']);
+ if ($opml_url != '') {
+ $category->_kind(FreshRSS_Category::KIND_DYNAMIC_OPML);
+ $category->_attributes('opml_url', $opml_url);
}
+ }
- if (!$dryRun) {
- $id = $this->catDAO->addCategoryObject($cat);
- if ($id == false) {
- $this->lastStatus = false;
- $error = true;
- } else {
- $cat->_id($id);
+ if ($dry_run) {
+ return $category;
+ }
+
+ $id = $this->catDAO->addCategoryObject($category);
+ if ($id !== false) {
+ $category->_id($id);
+ return $category;
+ } else {
+ self::log("Cannot create category {$category->name()}");
+ $this->lastStatus = false;
+ return null;
+ }
+ }
+
+ /**
+ * Return the list of category and feed outlines by categories names.
+ *
+ * This method is applied to a list of outlines. It merges the different
+ * list of feeds from several outlines into one array.
+ *
+ * @param array $outlines
+ * The outlines from which to extract the outlines.
+ * @param string $parent_category_name
+ * The name of the parent category of the current outlines.
+ *
+ * @return array[]
+ */
+ private function loadFromOutlines($outlines, $parent_category_name) {
+ $categories_elements = [];
+ $categories_to_feeds = [];
+
+ foreach ($outlines as $outline) {
+ // Get the categories and feeds from the child outline (it may
+ // return several categories and feeds if the outline is a category).
+ list (
+ $outline_categories,
+ $outline_categories_to_feeds,
+ ) = $this->loadFromOutline($outline, $parent_category_name);
+
+ // Then, we merge the initial arrays with the arrays returned by
+ // the outline.
+ $categories_elements = array_merge($categories_elements, $outline_categories);
+
+ foreach ($outline_categories_to_feeds as $category_name => $feeds) {
+ if (!isset($categories_to_feeds[$category_name])) {
+ $categories_to_feeds[$category_name] = [];
}
+
+ $categories_to_feeds[$category_name] = array_merge(
+ $categories_to_feeds[$category_name],
+ $feeds
+ );
}
- if ($error) {
- if (FreshRSS_Context::$isCli) {
- fwrite(STDERR, 'FreshRSS error during OPML category import from URL: ' . $catName . "\n");
- } else {
- Minz_Log::warning('Error during OPML category import from URL: ' . $catName);
- }
+ }
+
+ return [$categories_elements, $categories_to_feeds];
+ }
+
+ /**
+ * Return the list of category and feed outlines by categories names.
+ *
+ * This method is applied to a specific outline. If the outline represents
+ * a category (i.e. @outlines key exists), it will reapply loadFromOutlines()
+ * to its children. If the outline represents a feed (i.e. xmlUrl key
+ * exists), it will add the outline to an array accessible by its category
+ * name.
+ *
+ * @param array $outline
+ * The outline from which to extract the categories and feeds outlines.
+ * @param string $parent_category_name
+ * The name of the parent category of the current outline.
+ *
+ * @return array[]
+ */
+ private function loadFromOutline($outline, $parent_category_name) {
+ $categories_elements = [];
+ $categories_to_feeds = [];
+
+ if ($parent_category_name === '' && isset($outline['category'])) {
+ // The outline has no parent category, but its OPML category
+ // attribute is set, so we use it as the category name.
+ // lib_opml parses this attribute as an array of strings, so we
+ // rebuild a string here.
+ $parent_category_name = implode(', ', $outline['category']);
+ $categories_elements[$parent_category_name] = [
+ 'text' => $parent_category_name,
+ ];
+ }
+
+ if (isset($outline['@outlines'])) {
+ // The outline has children, it's probably a category
+ if (!empty($outline['text'])) {
+ $category_name = $outline['text'];
+ } elseif (!empty($outline['title'])) {
+ $category_name = $outline['title'];
} else {
- $parent_cat = $cat;
+ $category_name = $parent_category_name;
}
+
+ list (
+ $categories_elements,
+ $categories_to_feeds,
+ ) = $this->loadFromOutlines($outline['@outlines'], $category_name);
+
+ unset($outline['@outlines']);
+ $categories_elements[$category_name] = $outline;
}
- if (isset($cat_elt['@outlines'])) {
- // Our cat_elt contains more categories or more feeds, so we
- // add them recursively.
- // Note: FreshRSS does not support yet category arborescence, so always flatten from here
- $this->addOpmlElements($cat_elt['@outlines'], $parent_cat, true, $dryRun);
+ // The xmlUrl means it's a feed URL: add the outline to the array if it
+ // exists.
+ if (isset($outline['xmlUrl'])) {
+ if (!isset($categories_to_feeds[$parent_category_name])) {
+ $categories_to_feeds[$parent_category_name] = [];
+ }
+
+ $categories_to_feeds[$parent_category_name][] = $outline;
}
- return $cat;
+ return [$categories_elements, $categories_to_feeds];
+ }
+
+ private static function log($message) {
+ if (FreshRSS_Context::$isCli) {
+ fwrite(STDERR, "FreshRSS error during OPML import: {$message}\n");
+ } else {
+ Minz_Log::warning("Error during OPML import: {$message}");
+ }
}
}