diff options
| author | 2023-01-18 10:12:21 +0100 | |
|---|---|---|
| committer | 2023-01-18 10:12:21 +0100 | |
| commit | daaa391e33c5d92e3dd91bb0b81ac420abed7097 (patch) | |
| tree | a3263c26ac90fb3115627e156eba580826acfd4f | |
| parent | 216e39c3cc43061686981b96328796765d264d29 (diff) | |
tec: Update the lib_opml (#4403)
* fix: Fix undefined GLOB_BRACE on Alpine
The manual states that:
> Note: The GLOB_BRACE flag is not available on some non GNU systems,
> like Solaris or Alpine Linux.
This generated an error on Alpine.
Reference: https://www.php.net/manual/function.glob.php
* fix: List details of feeds for OPML exportation
The details are necessary to export the XPath information, the CSS full
content path and read actions filters.
* Update LibOpml to 0.4.0
* Refactor OPML importation to be more robust
First, it fixes two regressions introduced by the update of lib_opml:
- title attribute is used when text attribute is missing;
- the OPML category attribute is used as a fallback for feeds categories.
In a related way, if also fixes a problem when a feed had both a parent
category outline and a category attribute. Before, it only considered the
attribute as its category, but now it considers the parent outline.
Then, it counts category limit correctly by not increasing
`$nb_categories` if the category already exists.
* Exclude lib_opml from the CodeSniffer
* Fix variable names when logging some errors
* Fix catch of LibOpml Exception
* Make sure to declare the category
* Exclude lib_opml from PHPStan analyze
* Disable markdownlint for lib_opml
* Fix typos
* Use auto-loading and allow updates via Composer
* Fix broken links to lib_opml
* Bring back the ability to import the OPML frss:opmlUrl attribute
* Refactor the logs of OPML errors
* Update lib_opml to the version 0.5.0
Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
| -rw-r--r-- | .markdownlintignore | 1 | ||||
| -rw-r--r-- | .typos.toml | 1 | ||||
| -rw-r--r-- | README.fr.md | 2 | ||||
| -rw-r--r-- | README.md | 2 | ||||
| -rw-r--r-- | app/Controllers/importExportController.php | 2 | ||||
| -rwxr-xr-x | app/Controllers/indexController.php | 2 | ||||
| -rw-r--r-- | app/Models/Category.php | 2 | ||||
| -rw-r--r-- | app/Services/ExportService.php | 2 | ||||
| -rw-r--r-- | app/Services/ImportService.php | 440 | ||||
| -rw-r--r-- | app/views/helpers/export/opml.phtml | 55 | ||||
| -rw-r--r-- | lib/.gitignore | 8 | ||||
| -rw-r--r-- | lib/composer.json | 1 | ||||
| -rw-r--r-- | lib/lib_opml.php | 353 | ||||
| -rw-r--r-- | lib/lib_rss.php | 5 | ||||
| -rw-r--r-- | lib/marienfressinaud/lib_opml/.gitattributes | 8 | ||||
| -rw-r--r-- | lib/marienfressinaud/lib_opml/.gitignore | 2 | ||||
| -rw-r--r-- | lib/marienfressinaud/lib_opml/CHANGELOG.md | 63 | ||||
| -rw-r--r-- | lib/marienfressinaud/lib_opml/LICENSE | 21 | ||||
| -rw-r--r-- | lib/marienfressinaud/lib_opml/README.md | 338 | ||||
| -rw-r--r-- | lib/marienfressinaud/lib_opml/composer.json | 35 | ||||
| -rw-r--r-- | lib/marienfressinaud/lib_opml/src/LibOpml/Exception.php | 15 | ||||
| -rw-r--r-- | lib/marienfressinaud/lib_opml/src/LibOpml/LibOpml.php | 770 | ||||
| -rw-r--r-- | phpcs.xml | 1 | ||||
| -rw-r--r-- | phpstan.neon | 22 |
24 files changed, 1596 insertions, 555 deletions
diff --git a/.markdownlintignore b/.markdownlintignore index 6e1cfb9c4..fa771b056 100644 --- a/.markdownlintignore +++ b/.markdownlintignore @@ -1,4 +1,5 @@ .git/ +lib/marienfressinaud/ lib/phpgt/ lib/phpmailer/ node_modules/ diff --git a/.typos.toml b/.typos.toml index 38a2a1cee..2170f5e85 100644 --- a/.typos.toml +++ b/.typos.toml @@ -36,6 +36,7 @@ extend-exclude = [ "composer.lock", "data/", "docs/fr/", + "lib/marienfressinaud/", "lib/phpgt/", "lib/phpmailer/", "lib/SimplePie/", diff --git a/README.fr.md b/README.fr.md index ce36a6b34..99b5a1a2c 100644 --- a/README.fr.md +++ b/README.fr.md @@ -242,7 +242,7 @@ et [l’API Fever](https://freshrss.github.io/FreshRSS/fr/users/06_Fever_API.htm * [SimplePie](https://simplepie.org/) * [MINZ](https://framagit.org/marienfressinaud/MINZ) * [php-http-304](https://alexandre.alapetite.fr/doc-alex/php-http-304/) -* [lib_opml](https://github.com/marienfressinaud/lib_opml) +* [lib_opml](https://framagit.org/marienfressinaud/lib_opml) * [PhpGt/CssXPath](https://github.com/PhpGt/CssXPath) * [PHPMailer](https://github.com/PHPMailer/PHPMailer) * [Chart.js](https://www.chartjs.org) @@ -142,7 +142,7 @@ and [Fever API](https://freshrss.github.io/FreshRSS/en/users/06_Fever_API.html) * [SimplePie](https://simplepie.org/) * [MINZ](https://framagit.org/marienfressinaud/MINZ) * [php-http-304](https://alexandre.alapetite.fr/doc-alex/php-http-304/) -* [lib_opml](https://github.com/marienfressinaud/lib_opml) +* [lib_opml](https://framagit.org/marienfressinaud/lib_opml) * [PhpGt/CssXPath](https://github.com/PhpGt/CssXPath) * [PHPMailer](https://github.com/PHPMailer/PHPMailer) * [Chart.js](https://www.chartjs.org) diff --git a/app/Controllers/importExportController.php b/app/Controllers/importExportController.php index a1e1106c1..6c4b684e9 100644 --- a/app/Controllers/importExportController.php +++ b/app/Controllers/importExportController.php @@ -21,8 +21,6 @@ class FreshRSS_importExport_Controller extends FreshRSS_ActionController { Minz_Error::error(403); } - require_once(LIB_PATH . '/lib_opml.php'); - $this->entryDAO = FreshRSS_Factory::createEntryDao(); $this->feedDAO = FreshRSS_Factory::createFeedDao(); } diff --git a/app/Controllers/indexController.php b/app/Controllers/indexController.php index 7fced48af..968518e3f 100755 --- a/app/Controllers/indexController.php +++ b/app/Controllers/indexController.php @@ -237,8 +237,6 @@ class FreshRSS_index_Controller extends FreshRSS_ActionController { return; } - require_once(LIB_PATH . '/lib_opml.php'); - // No layout for OPML output. $this->view->_layout(false); header('Content-Type: application/xml; charset=utf-8'); diff --git a/app/Models/Category.php b/app/Models/Category.php index b33bec26e..b23e8da0a 100644 --- a/app/Models/Category.php +++ b/app/Models/Category.php @@ -195,7 +195,7 @@ class FreshRSS_Category extends Minz_Model { } else { $dryRunCategory = new FreshRSS_Category(); $importService = new FreshRSS_Import_Service(); - $importService->importOpml($opml, $dryRunCategory, true, true); + $importService->importOpml($opml, $dryRunCategory, true); if ($importService->lastStatus()) { $feedDAO = FreshRSS_Factory::createFeedDao(); diff --git a/app/Services/ExportService.php b/app/Services/ExportService.php index ad0f5f5a8..2f35666a8 100644 --- a/app/Services/ExportService.php +++ b/app/Services/ExportService.php @@ -43,8 +43,6 @@ class FreshRSS_Export_Service { * @return array First item is the filename, second item is the content */ public function generateOpml() { - require_once(LIB_PATH . '/lib_opml.php'); - $view = new FreshRSS_View(); $day = date('Y-m-d'); $view->categories = $this->category_dao->listCategories(true, true); diff --git a/app/Services/ImportService.php b/app/Services/ImportService.php index 28286a753..68aa6f741 100644 --- a/app/Services/ImportService.php +++ b/app/Services/ImportService.php @@ -19,8 +19,6 @@ class FreshRSS_Import_Service { * @param string $username */ public function __construct($username = null) { - require_once(LIB_PATH . '/lib_opml.php'); - $this->catDAO = FreshRSS_Factory::createCategoryDao($username); $this->feedDAO = FreshRSS_Factory::createFeedDao($username); } @@ -34,153 +32,191 @@ class FreshRSS_Import_Service { * This method parses and imports an OPML file. * * @param string $opml_file the OPML file content. - * @param FreshRSS_Category|null $parent_cat the name of the parent category. - * @param boolean $flatten true to disable categories, false otherwise. - * @return array<FreshRSS_Category>|false an array of categories containing some feeds, or false if an error occurred. + * @param FreshRSS_Category|null $forced_category force the feeds to be associated to this category. + * @param boolean $dry_run true to not create categories and feeds in database. */ - public function importOpml(string $opml_file, $parent_cat = null, $flatten = false, $dryRun = false) { + public function importOpml(string $opml_file, $forced_category = null, $dry_run = false) { $this->lastStatus = true; $opml_array = array(); try { - $opml_array = libopml_parse_string($opml_file, false); - } catch (LibOPML_Exception $e) { - if (FreshRSS_Context::$isCli) { - fwrite(STDERR, 'FreshRSS error during OPML parsing: ' . $e->getMessage() . "\n"); - } else { - Minz_Log::warning($e->getMessage()); - } + $libopml = new \marienfressinaud\LibOpml\LibOpml(false); + $opml_array = $libopml->parseString($opml_file); + } catch (\marienfressinaud\LibOpml\Exception $e) { + self::log($e->getMessage()); $this->lastStatus = false; - return false; + return; } - return $this->addOpmlElements($opml_array['body'], $parent_cat, $flatten, $dryRun); - } + $this->catDAO->checkDefault(); + $default_category = $this->catDAO->getDefault(); + if (!$default_category) { + self::log('Cannot get the default category'); + $this->lastStatus = false; + return; + } - /** - * This method imports an OPML file based on its body. - * - * @param array $opml_elements an OPML element (body or outline). - * @param FreshRSS_Category|null $parent_cat the name of the parent category. - * @param boolean $flatten true to disable categories, false otherwise. - * @return array<FreshRSS_Category> an array of categories containing some feeds - */ - private function addOpmlElements($opml_elements, $parent_cat = null, $flatten = false, $dryRun = false) { + // Get the categories by names so we can use this array to retrieve + // existing categories later. + $categories = $this->catDAO->listCategories(false); + $categories_by_names = []; + foreach ($categories as $category) { + $categories_by_names[$category->name()] = $category; + } + + // Get current numbers of categories and feeds, and the limits to + // verify the user can import its categories/feeds. + $nb_categories = count($categories); $nb_feeds = count($this->feedDAO->listFeeds()); - $nb_cats = count($this->catDAO->listCategories(false)); $limits = FreshRSS_Context::$system_conf->limits; - //Sort with categories first - usort($opml_elements, static function ($a, $b) { - return strcmp( - (isset($a['xmlUrl']) ? 'Z' : 'A') . (isset($a['text']) ? $a['text'] : ''), - (isset($b['xmlUrl']) ? 'Z' : 'A') . (isset($b['text']) ? $b['text'] : '')); - }); - - $categories = []; - - foreach ($opml_elements as $elt) { - if (isset($elt['xmlUrl'])) { - // If xmlUrl exists, it means it is a feed - if (FreshRSS_Context::$isCli && $nb_feeds >= $limits['max_feeds']) { - Minz_Log::warning(_t('feedback.sub.feed.over_max', - $limits['max_feeds'])); - $this->lastStatus = false; - continue; - } + // Process the OPML outlines to get a list of categories and a list of + // feeds elements indexed by their categories names. + list ( + $categories_elements, + $categories_to_feeds, + ) = $this->loadFromOutlines($opml_array['body'], ''); - if ($this->addFeedOpml($elt, $parent_cat, $dryRun)) { - $nb_feeds++; + foreach ($categories_to_feeds as $category_name => $feeds_elements) { + $category_element = $categories_elements[$category_name] ?? null; + + $category = null; + if ($forced_category) { + // If the category is forced, ignore the actual category name + $category = $forced_category; + } elseif (isset($categories_by_names[$category_name])) { + // If the category already exists, get it from $categories_by_names + $category = $categories_by_names[$category_name]; + } elseif ($category_element) { + // Otherwise, create the category (if possible) + $limit_reached = $nb_categories >= $limits['max_categories']; + $can_create_category = FreshRSS_Context::$isCli || !$limit_reached; + + if ($can_create_category) { + $category = $this->createCategory($category_element, $dry_run); + if ($category) { + $categories_by_names[$category->name()] = $category; + $nb_categories++; + } } else { - $this->lastStatus = false; + Minz_Log::warning( + _t('feedback.sub.category.over_max', $limits['max_categories']) + ); } - } elseif (!empty($elt['text'])) { - // No xmlUrl? It should be a category! - $limit_reached = !$flatten && ($nb_cats >= $limits['max_categories']); - if (!FreshRSS_Context::$isCli && $limit_reached) { - Minz_Log::warning(_t('feedback.sub.category.over_max', - $limits['max_categories'])); + } + + if (!$category) { + // Category can be null if the feeds weren't in a category + // outline, or if we weren't able to create the category. + $category = $default_category; + } + + // Then, create the feeds one by one and attach them to the + // category we just got. + foreach ($feeds_elements as $feed_element) { + $limit_reached = $nb_feeds >= $limits['max_feeds']; + $can_create_feed = FreshRSS_Context::$isCli || !$limit_reached; + if (!$can_create_feed) { + Minz_Log::warning( + _t('feedback.sub.feed.over_max', $limits['max_feeds']) + ); $this->lastStatus = false; - $flatten = true; + break; } - $category = $this->addCategoryOpml($elt, $parent_cat, $flatten, $dryRun); - - if ($category) { - $nb_cats++; - $categories[] = $category; + if ($this->createFeed($feed_element, $category, $dry_run)) { + // TODO what if the feed already exists in the database? + $nb_feeds++; + } else { + $this->lastStatus = false; } } } - return $categories; + return; } /** - * This method imports an OPML feed element. + * Create a feed from a feed element (i.e. OPML outline). * - * @param array $feed_elt an OPML element (must be a feed element). - * @param FreshRSS_Category|null $parent_cat the name of the parent category. - * @return FreshRSS_Feed|null a feed. + * @param array<string, string> $feed_elt An OPML element (must be a feed element). + * @param FreshRSS_Category $category The category to associate to the feed. + * @param boolean $dry_run true to not create the feed in database. + * + * @return FreshRSS_Feed|null The created feed, or null if it failed. */ - private function addFeedOpml($feed_elt, $parent_cat, $dryRun = false) { - if (empty($feed_elt['xmlUrl'])) { - return null; - } - if ($parent_cat == null) { - // This feed has no parent category so we get the default one - $this->catDAO->checkDefault(); - $parent_cat = $this->catDAO->getDefault(); - if ($parent_cat == null) { - $this->lastStatus = false; - return null; - } - } - - // We get different useful information + private function createFeed($feed_elt, $category, $dry_run) { $url = Minz_Helper::htmlspecialchars_utf8($feed_elt['xmlUrl']); - $name = Minz_Helper::htmlspecialchars_utf8($feed_elt['text'] ?? ''); + $name = $feed_elt['text'] ?? $feed_elt['title'] ?? ''; + $name = Minz_Helper::htmlspecialchars_utf8($name); $website = Minz_Helper::htmlspecialchars_utf8($feed_elt['htmlUrl'] ?? ''); $description = Minz_Helper::htmlspecialchars_utf8($feed_elt['description'] ?? ''); try { // Create a Feed object and add it in DB $feed = new FreshRSS_Feed($url); - $feed->_categoryId($parent_cat->id()); - $parent_cat->addFeed($feed); + $feed->_categoryId($category->id()); + $category->addFeed($feed); $feed->_name($name); $feed->_website($website); $feed->_description($description); switch ($feed_elt['type'] ?? '') { - case FreshRSS_Export_Service::TYPE_HTML_XPATH: + case strtolower(FreshRSS_Export_Service::TYPE_HTML_XPATH): $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH); break; - case FreshRSS_Export_Service::TYPE_RSS_ATOM: + case strtolower(FreshRSS_Export_Service::TYPE_RSS_ATOM): default: $feed->_kind(FreshRSS_Feed::KIND_RSS); break; } + if (isset($feed_elt['frss:cssFullContent'])) { + $feed->_pathEntries(Minz_Helper::htmlspecialchars_utf8($feed_elt['frss:cssFullContent'])); + } + + if (isset($feed_elt['frss:cssFullContentFilter'])) { + $feed->_attributes('path_entries_filter', $feed_elt['frss:cssFullContentFilter']); + } + + if (isset($feed_elt['frss:filtersActionRead'])) { + $feed->_filtersAction( + 'read', + preg_split('/[\n\r]+/', $feed_elt['frss:filtersActionRead']) + ); + } + $xPathSettings = []; - foreach ($feed_elt as $key => $value) { - if (is_array($value) && !empty($value['value']) && ($value['namespace'] ?? '') === FreshRSS_Export_Service::FRSS_NAMESPACE) { - switch ($key) { - case 'cssFullContent': $feed->_pathEntries(Minz_Helper::htmlspecialchars_utf8($value['value'])); break; - case 'cssFullContentFilter': $feed->_attributes('path_entries_filter', $value['value']); break; - case 'filtersActionRead': $feed->_filtersAction('read', preg_split('/[\n\r]+/', $value['value'])); break; - case 'xPathItem': $xPathSettings['item'] = $value['value']; break; - case 'xPathItemTitle': $xPathSettings['itemTitle'] = $value['value']; break; - case 'xPathItemContent': $xPathSettings['itemContent'] = $value['value']; break; - case 'xPathItemUri': $xPathSettings['itemUri'] = $value['value']; break; - case 'xPathItemAuthor': $xPathSettings['itemAuthor'] = $value['value']; break; - case 'xPathItemTimestamp': $xPathSettings['itemTimestamp'] = $value['value']; break; - case 'xPathItemTimeFormat': $xPathSettings['itemTimeFormat'] = $value['value']; break; - case 'xPathItemThumbnail': $xPathSettings['itemThumbnail'] = $value['value']; break; - case 'xPathItemCategories': $xPathSettings['itemCategories'] = $value['value']; break; - case 'xPathItemUid': $xPathSettings['itemUid'] = $value['value']; break; - } - } + if (isset($feed_elt['frss:xPathItem'])) { + $xPathSettings['item'] = $feed_elt['frss:xPathItem']; } + if (isset($feed_elt['frss:xPathItemTitle'])) { + $xPathSettings['itemTitle'] = $feed_elt['frss:xPathItemTitle']; + } + if (isset($feed_elt['frss:xPathItemContent'])) { + $xPathSettings['itemContent'] = $feed_elt['frss:xPathItemContent']; + } + if (isset($feed_elt['frss:xPathItemUri'])) { + $xPathSettings['itemUri'] = $feed_elt['frss:xPathItemUri']; + } + if (isset($feed_elt['frss:xPathItemAuthor'])) { + $xPathSettings['itemAuthor'] = $feed_elt['frss:xPathItemAuthor']; + } + if (isset($feed_elt['frss:xPathItemTimestamp'])) { + $xPathSettings['itemTimestamp'] = $feed_elt['frss:xPathItemTimestamp']; + } + if (isset($feed_elt['frss:xPathItemTimeFormat'])) { + $xPathSettings['itemTimeFormat'] = $feed_elt['frss:xPathItemTimeFormat']; + } + if (isset($feed_elt['frss:xPathItemThumbnail'])) { + $xPathSettings['itemThumbnail'] = $feed_elt['frss:xPathItemThumbnail']; + } + if (isset($feed_elt['frss:xPathItemCategories'])) { + $xPathSettings['itemCategories'] = $feed_elt['frss:xPathItemCategories']; + } + if (isset($feed_elt['frss:xPathItemUid'])) { + $xPathSettings['itemUid'] = $feed_elt['frss:xPathItemUid']; + } + if (!empty($xPathSettings)) { $feed->_attributes('xpath', $xPathSettings); } @@ -188,9 +224,11 @@ class FreshRSS_Import_Service { // Call the extension hook /** @var FreshRSS_Feed|null */ $feed = Minz_ExtensionManager::callHook('feed_before_insert', $feed); - if ($dryRun) { + + if ($dry_run) { return $feed; } + if ($feed != null) { // addFeedObject checks if feed is already in DB $id = $this->feedDAO->addFeedObject($feed); @@ -202,81 +240,163 @@ class FreshRSS_Import_Service { } } } catch (FreshRSS_Feed_Exception $e) { - if (FreshRSS_Context::$isCli) { - fwrite(STDERR, 'FreshRSS error during OPML feed import: ' . $e->getMessage() . "\n"); - } else { - Minz_Log::warning($e->getMessage()); - } + self::log($e->getMessage()); $this->lastStatus = false; } - if (FreshRSS_Context::$isCli) { - fwrite(STDERR, 'FreshRSS error during OPML feed import from URL: ' . - SimplePie_Misc::url_remove_credentials($url) . ' in category ' . $parent_cat->id() . "\n"); - } else { - Minz_Log::warning('Error during OPML feed import from URL: ' . - SimplePie_Misc::url_remove_credentials($url) . ' in category ' . $parent_cat->id()); - } - + $clean_url = SimplePie_Misc::url_remove_credentials($url); + self::log("Cannot create {$clean_url} feed in category {$category->name()}"); return null; } /** - * This method imports an OPML category element. + * Create and return a category. + * + * @param array<string, string> $category_element An OPML element (must be a category element). + * @param boolean $dry_run true to not create the category in database. * - * @param array $cat_elt an OPML element (must be a category element). - * @param FreshRSS_Category|null $parent_cat the name of the parent category. - * @param boolean $flatten true to disable categories, false otherwise. - * @return FreshRSS_Category|null a new category containing some feeds, or null if no category was created, or false if an error occurred. + * @return FreshRSS_Category|null The created category, or null if it failed. */ - private function addCategoryOpml($cat_elt, $parent_cat, $flatten = false, $dryRun = false) { - $error = false; - $cat = null; - if (!$flatten) { - $catName = Minz_Helper::htmlspecialchars_utf8($cat_elt['text']); - $cat = new FreshRSS_Category($catName); - - foreach ($cat_elt as $key => $value) { - if (is_array($value) && !empty($value['value']) && ($value['namespace'] ?? '') === FreshRSS_Export_Service::FRSS_NAMESPACE) { - switch ($key) { - case 'opmlUrl': - $opml_url = checkUrl($value['value']); - if ($opml_url != '') { - $cat->_kind(FreshRSS_Category::KIND_DYNAMIC_OPML); - $cat->_attributes('opml_url', $opml_url); - } - break; - } - } + private function createCategory($category_element, $dry_run) { + $name = $category_element['text'] ?? $category_element['title'] ?? ''; + $name = Minz_Helper::htmlspecialchars_utf8($name); + $category = new FreshRSS_Category($name); + + if (isset($category_element['frss:opmlUrl'])) { + $opml_url = checkUrl($category_element['frss:opmlUrl']); + if ($opml_url != '') { + $category->_kind(FreshRSS_Category::KIND_DYNAMIC_OPML); + $category->_attributes('opml_url', $opml_url); } + } - if (!$dryRun) { - $id = $this->catDAO->addCategoryObject($cat); - if ($id == false) { - $this->lastStatus = false; - $error = true; - } else { - $cat->_id($id); + if ($dry_run) { + return $category; + } + + $id = $this->catDAO->addCategoryObject($category); + if ($id !== false) { + $category->_id($id); + return $category; + } else { + self::log("Cannot create category {$category->name()}"); + $this->lastStatus = false; + return null; + } + } + + /** + * Return the list of category and feed outlines by categories names. + * + * This method is applied to a list of outlines. It merges the different + * list of feeds from several outlines into one array. + * + * @param array $outlines + * The outlines from which to extract the outlines. + * @param string $parent_category_name + * The name of the parent category of the current outlines. + * + * @return array[] + */ + private function loadFromOutlines($outlines, $parent_category_name) { + $categories_elements = []; + $categories_to_feeds = []; + + foreach ($outlines as $outline) { + // Get the categories and feeds from the child outline (it may + // return several categories and feeds if the outline is a category). + list ( + $outline_categories, + $outline_categories_to_feeds, + ) = $this->loadFromOutline($outline, $parent_category_name); + + // Then, we merge the initial arrays with the arrays returned by + // the outline. + $categories_elements = array_merge($categories_elements, $outline_categories); + + foreach ($outline_categories_to_feeds as $category_name => $feeds) { + if (!isset($categories_to_feeds[$category_name])) { + $categories_to_feeds[$category_name] = []; } + + $categories_to_feeds[$category_name] = array_merge( + $categories_to_feeds[$category_name], + $feeds + ); } - if ($error) { - if (FreshRSS_Context::$isCli) { - fwrite(STDERR, 'FreshRSS error during OPML category import from URL: ' . $catName . "\n"); - } else { - Minz_Log::warning('Error during OPML category import from URL: ' . $catName); - } + } + + return [$categories_elements, $categories_to_feeds]; + } + + /** + * Return the list of category and feed outlines by categories names. + * + * This method is applied to a specific outline. If the outline represents + * a category (i.e. @outlines key exists), it will reapply loadFromOutlines() + * to its children. If the outline represents a feed (i.e. xmlUrl key + * exists), it will add the outline to an array accessible by its category + * name. + * + * @param array $outline + * The outline from which to extract the categories and feeds outlines. + * @param string $parent_category_name + * The name of the parent category of the current outline. + * + * @return array[] + */ + private function loadFromOutline($outline, $parent_category_name) { + $categories_elements = []; + $categories_to_feeds = []; + + if ($parent_category_name === '' && isset($outline['category'])) { + // The outline has no parent category, but its OPML category + // attribute is set, so we use it as the category name. + // lib_opml parses this attribute as an array of strings, so we + // rebuild a string here. + $parent_category_name = implode(', ', $outline['category']); + $categories_elements[$parent_category_name] = [ + 'text' => $parent_category_name, + ]; + } + + if (isset($outline['@outlines'])) { + // The outline has children, it's probably a category + if (!empty($outline['text'])) { + $category_name = $outline['text']; + } elseif (!empty($outline['title'])) { + $category_name = $outline['title']; } else { - $parent_cat = $cat; + $category_name = $parent_category_name; } + + list ( + $categories_elements, + $categories_to_feeds, + ) = $this->loadFromOutlines($outline['@outlines'], $category_name); + + unset($outline['@outlines']); + $categories_elements[$category_name] = $outline; } - if (isset($cat_elt['@outlines'])) { - // Our cat_elt contains more categories or more feeds, so we - // add them recursively. - // Note: FreshRSS does not support yet category arborescence, so always flatten from here - $this->addOpmlElements($cat_elt['@outlines'], $parent_cat, true, $dryRun); + // The xmlUrl means it's a feed URL: add the outline to the array if it + // exists. + if (isset($outline['xmlUrl'])) { + if (!isset($categories_to_feeds[$parent_category_name])) { + $categories_to_feeds[$parent_category_name] = []; + } + + $categories_to_feeds[$parent_category_name][] = $outline; } - return $cat; + return [$categories_elements, $categories_to_feeds]; + } + + private static function log($message) { + if (FreshRSS_Context::$isCli) { + fwrite(STDERR, "FreshRSS error during OPML import: {$message}\n"); + } else { + Minz_Log::warning("Error during OPML import: {$message}"); + } } } diff --git a/app/views/helpers/export/opml.phtml b/app/views/helpers/export/opml.phtml index d97641fd2..eb6f7523b 100644 --- a/app/views/helpers/export/opml.phtml +++ b/app/views/helpers/export/opml.phtml @@ -9,6 +9,7 @@ function feedsToOutlines($feeds, $excludeMutedFeeds = false): array { if ($feed->mute() && $excludeMutedFeeds) { continue; } + $outline = [ 'text' => htmlspecialchars_decode($feed->name(), ENT_QUOTES), 'type' => FreshRSS_Export_Service::TYPE_RSS_ATOM, @@ -16,49 +17,58 @@ function feedsToOutlines($feeds, $excludeMutedFeeds = false): array { 'htmlUrl' => htmlspecialchars_decode($feed->website(), ENT_QUOTES), 'description' => htmlspecialchars_decode($feed->description(), ENT_QUOTES), ]; + if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH) { $outline['type'] = FreshRSS_Export_Service::TYPE_HTML_XPATH; /** @var array<string,string> */ $xPathSettings = $feed->attributes('xpath'); - $outline['frss:xPathItem'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['item'] ?? null]; - $outline['frss:xPathItemTitle'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemTitle'] ?? null]; - $outline['frss:xPathItemContent'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemContent'] ?? null]; - $outline['frss:xPathItemUri'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemUri'] ?? null]; - $outline['frss:xPathItemAuthor'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemAuthor'] ?? null]; - $outline['frss:xPathItemTimestamp'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemTimestamp'] ?? null]; - $outline['frss:xPathItemTimeformat'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemTimeformat'] ?? null]; - $outline['frss:xPathItemThumbnail'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemThumbnail'] ?? null]; - $outline['frss:xPathItemCategories'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemCategories'] ?? null]; - $outline['frss:xPathItemUid'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemUid'] ?? null]; + $outline['frss:xPathItem'] = $xPathSettings['item'] ?? null; + $outline['frss:xPathItemTitle'] = $xPathSettings['itemTitle'] ?? null; + $outline['frss:xPathItemContent'] = $xPathSettings['itemContent'] ?? null; + $outline['frss:xPathItemUri'] = $xPathSettings['itemUri'] ?? null; + $outline['frss:xPathItemAuthor'] = $xPathSettings['itemAuthor'] ?? null; + $outline['frss:xPathItemTimestamp'] = $xPathSettings['itemTimestamp'] ?? null; + $outline['frss:xPathItemTimeformat'] = $xPathSettings['itemTimeformat'] ?? null; + $outline['frss:xPathItemThumbnail'] = $xPathSettings['itemThumbnail'] ?? null; + $outline['frss:xPathItemCategories'] = $xPathSettings['itemCategories'] ?? null; + $outline['frss:xPathItemUid'] = $xPathSettings['itemUid'] ?? null; } + if (!empty($feed->filtersAction('read'))) { $filters = ''; foreach ($feed->filtersAction('read') as $filterRead) { $filters .= $filterRead->getRawInput() . "\n"; } $filters = trim($filters); - $outline['frss:filtersActionRead'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $filters]; + $outline['frss:filtersActionRead'] = $filters; } + if ($feed->pathEntries() != '') { - $outline['frss:cssFullContent'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => htmlspecialchars_decode($feed->pathEntries(), ENT_QUOTES)]; + $outline['frss:cssFullContent'] = htmlspecialchars_decode($feed->pathEntries(), ENT_QUOTES); } + if ($feed->attributes('path_entries_filter') != '') { - $outline['frss:cssFullContentFilter'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $feed->attributes('path_entries_filter')]; + $outline['frss:cssFullContentFilter'] = $feed->attributes('path_entries_filter'); } + $outlines[] = $outline; } + return $outlines; } /** @var FreshRSS_View $this */ -$opml_array = array( - 'head' => array( +$opml_array = [ + 'namespaces' => [ + 'frss' => FreshRSS_Export_Service::FRSS_NAMESPACE, + ], + 'head' => [ 'title' => FreshRSS_Context::$system_conf->title, - 'dateCreated' => date('D, d M Y H:i:s') - ), - 'body' => array() -); + 'dateCreated' => new DateTime(), + ], + 'body' => [], +]; if (!empty($this->categories)) { foreach ($this->categories as $key => $cat) { @@ -66,9 +76,11 @@ if (!empty($this->categories)) { 'text' => htmlspecialchars_decode($cat->name(), ENT_QUOTES), '@outlines' => feedsToOutlines($cat->feeds(), $this->excludeMutedFeeds), ]; + if ($cat->kind() === FreshRSS_Category::KIND_DYNAMIC_OPML) { - $outline['frss:opmlUrl'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $cat->attributes('opml_url')];; + $outline['frss:opmlUrl'] = $cat->attributes('opml_url'); } + $opml_array['body'][$key] = $outline; } } @@ -77,4 +89,5 @@ if (!empty($this->feeds)) { $opml_array['body'][] = feedsToOutlines($this->feeds, $this->excludeMutedFeeds); } -echo libopml_render($opml_array); +$libopml = new \marienfressinaud\LibOpml\LibOpml(true); +echo $libopml->render($opml_array); diff --git a/lib/.gitignore b/lib/.gitignore index 812bbfe76..a1df80381 100644 --- a/lib/.gitignore +++ b/lib/.gitignore @@ -1,6 +1,14 @@ autoload.php composer.lock composer/ +marienfressinaud/lib_opml/.git/ +marienfressinaud/lib_opml/.gitlab-ci.yml +marienfressinaud/lib_opml/.gitlab/ +marienfressinaud/lib_opml/ci/ +marienfressinaud/lib_opml/examples/ +marienfressinaud/lib_opml/Makefile +marienfressinaud/lib_opml/src/functions.php +marienfressinaud/lib_opml/tests/ phpgt/cssxpath/.* phpgt/cssxpath/composer.json phpgt/cssxpath/CONTRIBUTING.md diff --git a/lib/composer.json b/lib/composer.json index 4e4e1c051..6e9e0ee32 100644 --- a/lib/composer.json +++ b/lib/composer.json @@ -12,6 +12,7 @@ ], "require": { "php": ">=7.2.0", + "marienfressinaud/lib_opml": "0.5.0", "phpgt/cssxpath": "dev-master#4fbe420aba3d9e729940107ded4236a835a1a132", "phpmailer/phpmailer": "6.6.0" }, diff --git a/lib/lib_opml.php b/lib/lib_opml.php deleted file mode 100644 index f86d780b7..000000000 --- a/lib/lib_opml.php +++ /dev/null @@ -1,353 +0,0 @@ -<?php - -/** - * lib_opml is a free library to manage OPML format in PHP. - * - * By default, it takes in consideration version 2.0 but can be compatible with - * OPML 1.0. More information on http://dev.opml.org. - * Difference is "text" attribute is optional in version 1.0. It is highly - * recommended to use this attribute. - * - * lib_opml requires SimpleXML (php.net/simplexml) and DOMDocument (php.net/domdocument) - * - * @author Marien Fressinaud <dev@marienfressinaud.fr> - * @link https://github.com/marienfressinaud/lib_opml - * @version 0.2-FreshRSS~1.20.0 - * @license public domain - * - * Usages: - * > include('lib_opml.php'); - * > $filename = 'my_opml_file.xml'; - * > $opml_array = libopml_parse_file($filename); - * > print_r($opml_array); - * - * > $opml_string = [...]; - * > $opml_array = libopml_parse_string($opml_string); - * > print_r($opml_array); - * - * > $opml_array = [...]; - * > $opml_string = libopml_render($opml_array); - * > $opml_object = libopml_render($opml_array, true); - * > echo $opml_string; - * > print_r($opml_object); - * - * You can set $strict argument to false if you want to bypass "text" attribute - * requirement. - * - * If parsing fails for any reason (e.g. not an XML string, does not match with - * the specifications), a LibOPML_Exception is raised. - * - * lib_opml array format is described here: - * $array = array( - * 'head' => array( // 'head' element is optional (but recommended) - * 'key' => 'value', // key must be a part of available OPML head elements - * ), - * 'body' => array( // body is required - * array( // this array represents an outline (at least one) - * 'text' => 'value', // 'text' element is required if $strict is true - * 'key' => 'value', // key and value are what you want (optional) - * '@outlines' = array( // @outlines is a special value and represents sub-outlines - * array( - * [...] // where [...] is a valid outline definition - * ), - * ), - * ), - * array( // other outline definitions - * [...] - * ), - * [...], - * ) - * ) - * - */ - -/** - * A simple Exception class which represents any kind of OPML problem. - * Message should precise the current problem. - */ -class LibOPML_Exception extends Exception {} - - -// Define the list of available head attributes. All of them are optional. -define('HEAD_ELEMENTS', serialize(array( - 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail', - 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop', - 'windowLeft', 'windowBottom', 'windowRight' -))); - - -/** - * Parse an XML object as an outline object and return corresponding array - * - * @param SimpleXMLElement $outline_xml the XML object we want to parse - * @param bool $strict true if "text" attribute is required, false else - * @return array corresponding to an outline and following format described above - * @throws LibOPML_Exception - * @access private - */ -function libopml_parse_outline($outline_xml, $strict = true) { - $outline = array(); - - // An outline may contain any kind of attributes but "text" attribute is - // required ! - $text_is_present = false; - - $elem = dom_import_simplexml($outline_xml); - /** @var DOMAttr $attr */ - foreach ($elem->attributes as $attr) { - $key = $attr->localName; - - if ($attr->namespaceURI == '') { - $outline[$key] = $attr->value; - } else { - $outline[$key] = [ - 'namespace' => $attr->namespaceURI, - 'value' => $attr->value, - ]; - } - - if ($key === 'text') { - $text_is_present = true; - } - } - - if (!$text_is_present && $strict) { - throw new LibOPML_Exception( - 'Outline does not contain any text attribute' - ); - } - - if (empty($outline['text']) && isset($outline['title'])) { - $outline['text'] = $outline['title']; - } - - foreach ($outline_xml->children() as $key => $value) { - // An outline may contain any number of outline children - if ($key === 'outline') { - $outline['@outlines'][] = libopml_parse_outline($value, $strict); - } - } - - return $outline; -} - -/** - * Reformat the XML document as a hierarchy when - * the OPML 2.0 category attribute is used - */ -function preprocessing_categories($doc) { - $outline_categories = array(); - $body = $doc->getElementsByTagName('body')->item(0); - $xpath = new DOMXpath($doc); - $outlines = $xpath->query('/opml/body/outline[@category]'); - foreach ($outlines as $outline) { - $category = trim($outline->getAttribute('category')); - if ($category != '') { - $outline_category = null; - if (!isset($outline_categories[$category])) { - $outline_category = $doc->createElement('outline'); - $outline_category->setAttribute('text', $category); - $body->insertBefore($outline_category, $body->firstChild); - $outline_categories[$category] = $outline_category; - } else { - $outline_category = $outline_categories[$category]; - } - $outline->parentNode->removeChild($outline); - $outline_category->appendChild($outline); - } - } -} - -/** - * Parse a string as a XML one and returns the corresponding array - * - * @param string $xml is the string we want to parse - * @param bool $strict true to perform some validation (e.g. require "text" attribute), false to relax - * @return array corresponding to the XML string and following format described above - * @throws LibOPML_Exception - * @access public - */ -function libopml_parse_string($xml, $strict = true) { - $dom = new DOMDocument(); - $dom->recover = true; - $dom->strictErrorChecking = false; - $dom->loadXML($xml); - $dom->encoding = 'UTF-8'; - - //Partial compatibility with the category attribute of OPML 2.0 - preprocessing_categories($dom); - - $opml = simplexml_import_dom($dom); - - if (!$opml) { - throw new LibOPML_Exception(); - } - - $array = array( - 'version' => (string)$opml['version'], - 'head' => array(), - 'body' => array() - ); - - if (isset($opml->head)) { - // We get all "head" elements. Head is required but its sub-elements are optional. - foreach ($opml->head->children() as $key => $value) { - if (in_array($key, unserialize(HEAD_ELEMENTS), true)) { - $array['head'][$key] = (string)$value; - } elseif ($strict) { - throw new LibOPML_Exception($key . ' is not part of the OPML 2.0 specification'); - } - } - } elseif ($strict) { - throw new LibOPML_Exception('Required OPML head element is missing!'); - } - - // Then, we get body oulines. Body must contain at least one outline - // element. - $at_least_one_outline = false; - foreach ($opml->body->children() as $key => $value) { - if ($key === 'outline') { - $at_least_one_outline = true; - $array['body'][] = libopml_parse_outline($value, $strict); - } - } - - if (!$at_least_one_outline) { - throw new LibOPML_Exception( - 'OPML body must contain at least one outline element' - ); - } - - return $array; -} - - -/** - * Parse a string contained into a file as a XML string and returns the corresponding array - * - * @param string $filename should indicates a valid XML file - * @param bool $strict true if "text" attribute is required, false else - * @return array corresponding to the file content and following format described above - * @throws LibOPML_Exception - * @access public - */ -function libopml_parse_file($filename, $strict = true) { - $file_content = file_get_contents($filename); - - if ($file_content === false) { - throw new LibOPML_Exception( - $filename . ' cannot be found' - ); - } - - return libopml_parse_string($file_content, $strict); -} - - -/** - * Create a XML outline object in a parent object. - * - * @param SimpleXMLElement $parent_elt is the parent object of current outline - * @param array $outline array representing an outline object - * @param bool $strict true if "text" attribute is required, false else - * @throws LibOPML_Exception - * @access private - */ -function libopml_render_outline($parent_elt, $outline, $strict) { - // Outline MUST be an array! - if (!is_array($outline)) { - throw new LibOPML_Exception( - 'Outline element must be defined as array' - ); - } - - $outline_elt = $parent_elt->addChild('outline'); - $text_is_present = false; - /** @var string|array<string,mixed> $value */ - foreach ($outline as $key => $value) { - // Only outlines can be an array and so we consider children are also - // outline elements. - if ($key === '@outlines' && is_array($value)) { - foreach ($value as $outline_child) { - libopml_render_outline($outline_elt, $outline_child, $strict); - } - } elseif (is_array($value) && !isset($value['namespace'])) { - throw new LibOPML_Exception( - 'Type of outline elements cannot be array (except for providing a namespace): ' . $key - ); - } else { - // Detect text attribute is present, that's good :) - if ($key === 'text') { - $text_is_present = true; - } - if (is_array($value)) { - if (!empty($value['namespace']) && !empty($value['value'])) { - $outline_elt->addAttribute($key, $value['value'], $value['namespace']); - } - } else { - $outline_elt->addAttribute($key, $value); - } - } - } - - if (!$text_is_present && $strict) { - throw new LibOPML_Exception( - 'You must define at least a text element for all outlines' - ); - } -} - - -/** - * Render an array as an OPML string or a XML object. - * - * @param array $array is the array we want to render and must follow structure defined above - * @param bool $as_xml_object false if function must return a string, true for a XML object - * @param bool $strict true if "text" attribute is required, false else - * @return string|SimpleXMLElement XML string corresponding to $array or XML object - * @throws LibOPML_Exception - * @access public - */ -function libopml_render($array, $as_xml_object = false, $strict = true) { - $opml = new SimpleXMLElement('<opml></opml>'); - $opml->addAttribute('version', $strict ? '2.0' : '1.0'); - - // Create head element. $array['head'] is optional but head element will - // exist in the final XML object. - $head = $opml->addChild('head'); - if (isset($array['head'])) { - foreach ($array['head'] as $key => $value) { - if (in_array($key, unserialize(HEAD_ELEMENTS), true)) { - $head->addChild($key, $value); - } - } - } - - // Check body is set and contains at least one element - if (!isset($array['body'])) { - throw new LibOPML_Exception( - '$array must contain a body element' - ); - } - if (count($array['body']) <= 0) { - throw new LibOPML_Exception( - 'Body element must contain at least one element (array)' - ); - } - - // Create outline elements - $body = $opml->addChild('body'); - foreach ($array['body'] as $outline) { - libopml_render_outline($body, $outline, $strict); - } - - // And return the final result - if ($as_xml_object) { - return $opml; - } else { - $dom = dom_import_simplexml($opml)->ownerDocument; - $dom->formatOutput = true; - $dom->encoding = 'UTF-8'; - return $dom->saveXML(); - } -} diff --git a/lib/lib_rss.php b/lib/lib_rss.php index cbdfff773..e5362bc5c 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -57,6 +57,11 @@ function classAutoloader($class) { $base_dir = LIB_PATH . '/phpgt/cssxpath/src/'; $relative_class_name = substr($class, strlen($prefix)); require $base_dir . str_replace('\\', '/', $relative_class_name) . '.php'; + } elseif (str_starts_with($class, 'marienfressinaud\\LibOpml\\')) { + $prefix = 'marienfressinaud\\LibOpml\\'; + $base_dir = LIB_PATH . '/marienfressinaud/lib_opml/src/LibOpml/'; + $relative_class_name = substr($class, strlen($prefix)); + require $base_dir . str_replace('\\', '/', $relative_class_name) . '.php'; } elseif (str_starts_with($class, 'PHPMailer\\PHPMailer\\')) { $prefix = 'PHPMailer\\PHPMailer\\'; $base_dir = LIB_PATH . '/phpmailer/phpmailer/src/'; diff --git a/lib/marienfressinaud/lib_opml/.gitattributes b/lib/marienfressinaud/lib_opml/.gitattributes new file mode 100644 index 000000000..669ea8c8d --- /dev/null +++ b/lib/marienfressinaud/lib_opml/.gitattributes @@ -0,0 +1,8 @@ +/.* export-ignore + +/ci export-ignore +/examples export-ignore +/tests export-ignore + +/CHANGELOG.md export-ignore +/Makefile export-ignore diff --git a/lib/marienfressinaud/lib_opml/.gitignore b/lib/marienfressinaud/lib_opml/.gitignore new file mode 100644 index 000000000..ca9baaf91 --- /dev/null +++ b/lib/marienfressinaud/lib_opml/.gitignore @@ -0,0 +1,2 @@ +/coverage +/vendor diff --git a/lib/marienfressinaud/lib_opml/CHANGELOG.md b/lib/marienfressinaud/lib_opml/CHANGELOG.md new file mode 100644 index 000000000..ee9245e7e --- /dev/null +++ b/lib/marienfressinaud/lib_opml/CHANGELOG.md @@ -0,0 +1,63 @@ +# Changelog of lib\_opml + +## 2022-07-25 - v0.5.0 + +- BREAKING CHANGE: Reverse parameters in `libopml_render()` +- BREAKING CHANGE: Validate email and URL address elements +- Add support for PHP 7.2+ +- Add a .gitattributes file +- Improve the documentation about usage +- Add a note about stability in README +- Fix a PHPDoc annotation +- Homogeneize tests with "Newspapers" examples + +## 2022-06-04 - v0.4.0 + +- Refactor the LibOpml class to be not static +- Parse or render attributes according to their types +- Add support for namespaces +- Don't require text attribute if OPML version is 1.0 +- Check that outline text attribute is not empty +- Verify that xmlUrl and url attributes are present according to the type + attribute +- Accept a version attribute in render method +- Handle OPML 1.1 as 1.0 +- Fail if version, head or body is missing +- Fail if OPML version is not supported +- Fail if head contains invalid elements +- Fail if sub-outlines are not arrays when rendering +- Make parsing less strict by default +- Don't raise most parsing errors when strict is false +- Force type attribute to lowercase +- Remove SimpleXML as a requirement +- Homogenize exception messages +- Close pre tags in the example file +- Improve documentation in the README +- Improve comments in the source code +- Add a MR checklist item about changes +- Update the description in composer.json +- Update dev dependencies + +## 2022-04-23 - v0.3.0 + +- Reorganize the architecture of code (using namespaces and classes) +- Change PHP minimum version to 7.4 +- Move to Framagit instead of GitHub +- Change the license to MIT +- Configure lib\_opml with Composer +- Add PHPUnit tests for all the methods and functions +- Add a linter to the project +- Provide a Makefile +- Configure Gitlab CI instead of Travis +- Add a merge request template +- Improve the comments, documentation and examples + +## 2014-03-31 - v0.2.0 + +- Allow to make optional the `text` attribute +- Improve and complete documentation +- Fix examples + +## 2014-03-29 - v0.1.0 + +First version diff --git a/lib/marienfressinaud/lib_opml/LICENSE b/lib/marienfressinaud/lib_opml/LICENSE new file mode 100644 index 000000000..2ad7f2db4 --- /dev/null +++ b/lib/marienfressinaud/lib_opml/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Marien Fressinaud + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lib/marienfressinaud/lib_opml/README.md b/lib/marienfressinaud/lib_opml/README.md new file mode 100644 index 000000000..34026bc14 --- /dev/null +++ b/lib/marienfressinaud/lib_opml/README.md @@ -0,0 +1,338 @@ +# lib\_opml + +lib\_opml is a library to read and write OPML in PHP. + +OPML is a standard designed to store and exchange outlines (i.e. a tree +structure arranged to show hierarchical relationships). It is mainly used to +exchange list of feeds between feed aggregators. The specification is +available at [opml.org](http://opml.org). + +lib\_opml has been tested with PHP 7.2+. It requires [DOMDocument](https://www.php.net/manual/book.dom.php) +to work. + +It supports versions 1.0 and 2.0 of OPML since these are the only published +versions. Version 1.1 is treated as version 1.0, as stated by the specification. + +It is licensed under the [MIT license](/LICENSE). + +## Installation + +lib\_opml is available on [Packagist](https://packagist.org/packages/marienfressinaud/lib_opml) +and it is recommended to install it with Composer: + +```console +$ composer require marienfressinaud/lib_opml +``` + +If you don’t use Composer, you can download [the ZIP archive](https://framagit.org/marienfressinaud/lib_opml/-/archive/main/lib_opml-main.zip) +and copy the content of the `src/` folder in your project. Then, load the files +manually: + +```php +<?php +require 'path/to/lib_opml/LibOpml/Exception.php'; +require 'path/to/lib_opml/LibOpml/LibOpml.php'; +require 'path/to/lib_opml/functions.php'; +``` + +## Usage + +### Parse OPML + +Let’s say that you have an OPML file named `my_opml_file.xml`: + +```xml +<?xml version="1.0" encoding="UTF-8" ?> +<opml version="2.0"> + <head> + <title>My OPML</title> + </head> + <body> + <outline text="Newspapers"> + <outline text="El País" /> + <outline text="Le Monde" /> + <outline text="The Guardian" /> + <outline text="The New York Times" /> + </outline> + </body> +</opml> +``` + +You can load it with: + +```php +$opml_array = libopml_parse_file('my_opml_file.xml'); +``` + +lib\_opml parses the file and returns an array: + +```php +[ + 'version' => '2.0', + 'namespaces' => [], + 'head' => [ + 'title' => 'My OPML' + ], + 'body' => [ // each entry of the body is an outline + [ + 'text' => 'Newspapers', + '@outlines' => [ // sub-outlines are accessible with the @outlines key + ['text' => 'El País'], + ['text' => 'Le Monde'], + ['text' => 'The Guardian'], + ['text' => 'The New York Times'] + ] + ] + ] +] +``` + +Since it's just an array, it's very simple to manipulate: + +```php +foreach ($opml_array['body'] as $outline) { + echo $outline['text']; +} +``` + +You also can load directly an OPML string: + +```php +$opml_string = '<opml>...</opml>'; +$opml_array = libopml_parse_string($opml_string); +``` + +### Render OPML + +lib\_opml is able to render an OPML string from an array. It checks that the +data is valid and respects the specification. + +```php +$opml_array = [ + 'head' => [ + 'title' => 'My OPML', + ], + 'body' => [ + [ + 'text' => 'Newspapers', + '@outlines' => [ + ['text' => 'El País'], + ['text' => 'Le Monde'], + ['text' => 'The Guardian'], + ['text' => 'The New York Times'] + ] + ] + ] +]; + +$opml_string = libopml_render($opml_array); + +file_put_contents('my_opml_file.xml', $opml_string); +``` + +### Handle errors + +If rendering (or parsing) fails for any reason (e.g. empty `body`, missing +`text` attribute, wrong element type), a `\marienfressinaud\LibOpml\Exception` +is raised: + +```php +try { + $opml_array = libopml_render([ + 'body' => [] + ]); +} catch (\marienfressinaud\LibOpml\Exception $e) { + echo $e->getMessage(); +} +``` + +### Class style + +lib\_opml can also be used with a class style: + +```php +use marienfressinaud\LibOpml; + +$libopml = new LibOpml\LibOpml(); + +$opml_array = $libopml->parseFile($filename); +$opml_array = $libopml->parseString($opml_string); +$opml_string = $libopml->render($opml_array); +``` + +### Special elements and attributes + +Some elements have special meanings according to the specification, which means +they can be parsed to a specific type by lib\_opml. In the other way, when +rendering an OPML string, you must pass these elements with their correct +types. + +Head elements: + +- `dateCreated` is parsed to a `\DateTime`; +- `dateModified` is parsed to a `\DateTime`; +- `expansionState` is parsed to an array of integers; +- `vertScrollState` is parsed to an integer; +- `windowTop` is parsed to an integer; +- `windowLeft` is parsed to an integer; +- `windowBottom` is parsed to an integer; +- `windowRight` is parsed to an integer. + +Outline attributes: + +- `created` is parsed to a `\DateTime`; +- `category` is parsed to an array of strings; +- `isComment` is parsed to a boolean; +- `isBreakpoint` is parsed to a boolean. + +If one of these elements is not of the correct type, an Exception is raised. + +Finally, there are additional checks based on the outline type attribute: + +- if `type="rss"`, then the `xmlUrl` attribute is required; +- if `type="link"`, then the `url` attribute is required; +- if `type="include"`, then the `url` attribute is required. + +Note that the `type` attribute is case-insensitive and will always be lowercased. + +### Namespaces + +OPML can be extended with namespaces: + +> An OPML file may contain elements and attributes not described on this page, +> only if those elements are defined in a namespace, as specified by the W3C. + +When rendering an OPML, you can include a `namespaces` key to specify +namespaces: + +```php +$opml_array = [ + 'namespaces' => [ + 'test' => 'https://example.com/test', + ], + 'body' => [ + ['text' => 'My outline', 'test:path' => '/some/example/path'], + ], +]; + +$opml_string = libopml_render($opml_array); +echo $opml_string; +``` + +This will output: + +```xml +<?xml version="1.0" encoding="UTF-8"?> +<opml xmlns:test="https://example.com/test" version="2.0"> + <head/> + <body> + <outline text="My outline" test:path="/some/example/path"/> + </body> +</opml> +``` + +### Strictness + +You can tell lib\_opml to be less or more strict when parsing or rendering OPML. +This is done by passing an optional `$strict` attribute to the functions. When +strict is `false`, most of the specification requirements are simply ignored +and lib\_opml will do its best to parse (or generate) an OPML. + +By default, parsing is not strict so you’ll be able to read most of the files +out there. If you want the parsing to be strict (to validate a file for +instance), pass `true` to `libopml_parse_file()` or `libopml_parse_string()`: + +```php +$opml_array = libopml_parse_file($filename, true); +$opml_array = libopml_parse_string($opml_string, true); +``` + +On the other side, reading is strict by default, so you are encouraged to +generate valid OPMLs. If you need to relax the strictness, pass `false` to +`libopml_render()`: + +```php +$opml_string = libopml_render($opml_array, false); +``` + +Please note that when using the class form, strict is passed during the object +instantiation: + +```php +use marienfressinaud\LibOpml; + +// lib_opml will be strict for both parsing and rendering! +$libopml = new LibOpml\LibOpml(true); + +$opml_array = $libopml->parseString($opml_string); +$opml_string = $libopml->render($opml_array); +``` + +## Examples and documented source code + +See the [`examples/`](/examples) folder for concrete examples. + +You are encouraged to read the source code to learn more about lib\_opml. Thus, +the full documentation is available as comments in the code: + +- [`src/LibOpml/LibOpml.php`](src/LibOpml/LibOpml.php) +- [`src/LibOpml/Exception.php`](src/LibOpml/Exception.php) +- [`src/functions.php`](src/functions.php) + +## Changelog + +See [CHANGELOG.md](/CHANGELOG.md). + +## Support and stability + +Today, lib\_opml covers all the aspects of the OPML specification. Since the +spec didn't change for more than 15 years, it is expected for the library to +not change a lot in the future. Thus, I plan to release the v1.0 in a near +future. I'm only waiting for more tests to be done on its latest version (in +particular in FreshRSS, see [FreshRSS/FreshRSS#4403](https://github.com/FreshRSS/FreshRSS/pull/4403)). +I would also wait for clarifications about the specification (see [scripting/opml.org#3](https://github.com/scripting/opml.org/issues/3)), +but it isn't a hard requirement. + +After the release of 1.0, lib\_opml will be considered as “finished”. This +means I will not add new features, nor break the existing code. However, I +commit myself to continue to support the library to fix security issues, bugs, +or to add support to new PHP versions. + +In consequence, you can expect lib\_opml to be stable. + +## Tests and linters + +This section is for developers of lib\_opml. + +To run the tests, you’ll have to install Composer first (see [the official +documentation](https://getcomposer.org/doc/00-intro.md)). Then, install the +dependencies: + +```console +$ make install +``` + +You should now have a `vendor/` folder containing the development dependencies. + +Run the tests with: + +```console +$ make test +``` + +Run the linter with: + +```console +$ make lint +$ make lint-fix +``` + +## Contributing + +Please submit bug reports and merge requests to the [Framagit repository](https://framagit.org/marienfressinaud/lib_opml). + +There’s not a lot to do, but the documentation and examples could probably be +improved. + +Merge requests require that you fill a short checklist to save me time while +reviewing your changes. You also must make sure the test suite succeeds. diff --git a/lib/marienfressinaud/lib_opml/composer.json b/lib/marienfressinaud/lib_opml/composer.json new file mode 100644 index 000000000..ba48d16ed --- /dev/null +++ b/lib/marienfressinaud/lib_opml/composer.json @@ -0,0 +1,35 @@ +{ + "name": "marienfressinaud/lib_opml", + "description": "A library to read and write OPML in PHP.", + "license": "MIT", + "authors": [ + { + "name": "Marien Fressinaud", + "email": "dev@marienfressinaud.fr" + } + ], + "require": { + "php": ">=7.2.0", + "ext-dom": "*" + }, + "config": { + "platform": { + "php": "7.2.0" + } + }, + "support": { + "issues": "https://framagit.org/marienfressinaud/lib_opml/-/issues" + }, + "autoload": { + "files": [ + "src/functions.php" + ], + "psr-4": { + "marienfressinaud\\": "src/" + } + }, + "require-dev": { + "squizlabs/php_codesniffer": "^3.6", + "phpunit/phpunit": "^8" + } +} diff --git a/lib/marienfressinaud/lib_opml/src/LibOpml/Exception.php b/lib/marienfressinaud/lib_opml/src/LibOpml/Exception.php new file mode 100644 index 000000000..27c3287a2 --- /dev/null +++ b/lib/marienfressinaud/lib_opml/src/LibOpml/Exception.php @@ -0,0 +1,15 @@ +<?php + +namespace marienfressinaud\LibOpml; + +/** + * A simple Exception class which represents any kind of OPML problem. + * Message precises the current problem. + * + * @author Marien Fressinaud <dev@marienfressinaud.fr> + * @link https://framagit.org/marienfressinaud/lib_opml + * @license MIT + */ +class Exception extends \Exception +{ +} diff --git a/lib/marienfressinaud/lib_opml/src/LibOpml/LibOpml.php b/lib/marienfressinaud/lib_opml/src/LibOpml/LibOpml.php new file mode 100644 index 000000000..4ba0df821 --- /dev/null +++ b/lib/marienfressinaud/lib_opml/src/LibOpml/LibOpml.php @@ -0,0 +1,770 @@ +<?php + +namespace marienfressinaud\LibOpml; + +/** + * The LibOpml class provides the methods to read and write OPML files and + * strings. It transforms OPML files or strings to PHP arrays (or the reverse). + * + * How to read this file? + * + * The first methods are dedicated to the parsing, and the next ones to the + * reading. The three last methods are helpful methods, but you don't have to + * worry too much about them. + * + * The main methods are the public ones: parseFile, parseString and render. + * They call the other parse* and render* methods internally. + * + * These three main methods are available as functions (see the src/functions.php + * file). + * + * What's the array format? + * + * As said before, LibOpml transforms OPML to PHP arrays, or the reverse. The + * format is pretty simple. It contains four keys: + * + * - version: the version of the OPML; + * - namespaces: an array of namespaces used in the OPML, if any; + * - head: an array of OPML head elements, where keys are the names of the + * elements; + * - body: an array of arrays representing OPML outlines, where keys are the + * name of the attributes (the special @outlines key contains the sub-outlines). + * + * When rendering, only the body key is required (version will default to 2.0). + * + * Example: + * + * [ + * version => '2.0', + * namespaces => [], + * head => [ + * title => 'An OPML file' + * ], + * body => [ + * [ + * text => 'Newspapers', + * @outlines => [ + * [text => 'El País'], + * [text => 'Le Monde'], + * [text => 'The Guardian'], + * [text => 'The New York Times'], + * ] + * ] + * ] + * ] + * + * @see http://opml.org/spec2.opml + * + * @author Marien Fressinaud <dev@marienfressinaud.fr> + * @link https://framagit.org/marienfressinaud/lib_opml + * @license MIT + */ +class LibOpml +{ + /** + * The list of valid head elements. + */ + public const HEAD_ELEMENTS = [ + 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail', + 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop', + 'windowLeft', 'windowBottom', 'windowRight' + ]; + + /** + * The list of numeric head elements. + */ + public const NUMERIC_HEAD_ELEMENTS = [ + 'vertScrollState', + 'windowTop', + 'windowLeft', + 'windowBottom', + 'windowRight', + ]; + + /** @var boolean */ + private $strict = true; + + /** @var string */ + private $version = '2.0'; + + /** @var string[] */ + private $namespaces = []; + + /** + * @param bool $strict + * Set to true (default) to check for violations of the specification, + * false otherwise. + */ + public function __construct($strict = true) + { + $this->strict = $strict; + } + + /** + * Parse a XML file and return the corresponding array. + * + * @param string $filename + * The XML file to parse. + * + * @throws \marienfressinaud\LibOpml\Exception + * Raised if the file cannot be read. See also exceptions raised by the + * parseString method. + * + * @return array + * An array reflecting the OPML (the structure is described above). + */ + public function parseFile($filename) + { + $file_content = @file_get_contents($filename); + + if ($file_content === false) { + throw new Exception("OPML file {$filename} cannot be found or read"); + } + + return $this->parseString($file_content); + } + + /** + * Parse a XML string and return the corresponding array. + * + * @param string $xml + * The XML string to parse. + * + * @throws \marienfressinaud\LibOpml\Exception + * Raised if the XML cannot be parsed, if version is missing or + * invalid, if head is missing or contains invalid (or not parsable) + * elements, or if body is missing, empty or contain non outline + * elements. The exceptions (except XML parsing errors) are not raised + * if strict is false. See also exceptions raised by the parseOutline + * method. + * + * @return array + * An array reflecting the OPML (the structure is described above). + */ + public function parseString($xml) + { + $dom = new \DOMDocument(); + $dom->recover = true; + $dom->encoding = 'UTF-8'; + + try { + $result = @$dom->loadXML($xml); + } catch (\Exception | \Error $e) { + $result = false; + } + + if (!$result) { + throw new Exception('OPML string is not valid XML'); + } + + $opml_element = $dom->documentElement; + + // Load the custom namespaces of the document + $xpath = new \DOMXPath($dom); + $this->namespaces = []; + foreach ($xpath->query('//namespace::*') as $node) { + if ($node->prefix === 'xml') { + // This is the base namespace, we don't need to store it + continue; + } + + $this->namespaces[$node->prefix] = $node->namespaceURI; + } + + // Get the version of the document + $version = $opml_element->getAttribute('version'); + if (!$version) { + $this->throwExceptionIfStrict('OPML version attribute is required'); + } + + $version = trim($version); + if ($version === '1.1') { + $version = '1.0'; + } + + if ($version !== '1.0' && $version !== '2.0') { + $this->throwExceptionIfStrict('OPML supported versions are 1.0 and 2.0'); + } + + $this->version = $version; + + // Get head and body child elements + $head_elements = $opml_element->getElementsByTagName('head'); + $child_head_elements = []; + if (count($head_elements) === 1) { + $child_head_elements = $head_elements[0]->childNodes; + } else { + $this->throwExceptionIfStrict('OPML must contain one and only one head element'); + } + + $body_elements = $opml_element->getElementsByTagName('body'); + $child_body_elements = []; + if (count($body_elements) === 1) { + $child_body_elements = $body_elements[0]->childNodes; + } else { + $this->throwExceptionIfStrict('OPML must contain one and only one body element'); + } + + $array = [ + 'version' => $this->version, + 'namespaces' => $this->namespaces, + 'head' => [], + 'body' => [], + ]; + + // Load the child head elements in the head array + foreach ($child_head_elements as $child_head_element) { + if ($child_head_element->nodeType !== XML_ELEMENT_NODE) { + continue; + } + + $name = $child_head_element->nodeName; + $value = $child_head_element->nodeValue; + $namespaced = $child_head_element->namespaceURI !== null; + + if (!in_array($name, self::HEAD_ELEMENTS) && !$namespaced) { + $this->throwExceptionIfStrict( + "OPML head {$name} element is not part of the specification" + ); + } + + if ($name === 'dateCreated' || $name === 'dateModified') { + try { + $value = $this->parseDate($value); + } catch (\DomainException $e) { + $this->throwExceptionIfStrict( + "OPML head {$name} element must be a valid RFC822 or RFC1123 date" + ); + } + } elseif ($name === 'ownerEmail') { + // Testing email validity is hard. PHP filter_var() function is + // too strict compared to the RFC 822, so we can't use it. + if (strpos($value, '@') === false) { + $this->throwExceptionIfStrict( + 'OPML head ownerEmail element must be an email address' + ); + } + } elseif ($name === 'ownerId' || $name === 'docs') { + if (!$this->checkHttpAddress($value)) { + $this->throwExceptionIfStrict( + "OPML head {$name} element must be a HTTP address" + ); + } + } elseif ($name === 'expansionState') { + $numbers = explode(',', $value); + $value = array_map(function ($str_number) { + if (is_numeric($str_number)) { + return intval($str_number); + } else { + $this->throwExceptionIfStrict( + 'OPML head expansionState element must be a list of numbers' + ); + return $str_number; + } + }, $numbers); + } elseif (in_array($name, self::NUMERIC_HEAD_ELEMENTS)) { + if (is_numeric($value)) { + $value = intval($value); + } else { + $this->throwExceptionIfStrict("OPML head {$name} element must be a number"); + } + } + + $array['head'][$name] = $value; + } + + // Load the child body elements in the body array + foreach ($child_body_elements as $child_body_element) { + if ($child_body_element->nodeType !== XML_ELEMENT_NODE) { + continue; + } + + if ($child_body_element->nodeName === 'outline') { + $array['body'][] = $this->parseOutline($child_body_element); + } else { + $this->throwExceptionIfStrict( + 'OPML body element can only contain outline elements' + ); + } + } + + if (empty($array['body'])) { + $this->throwExceptionIfStrict( + 'OPML body element must contain at least one outline element' + ); + } + + return $array; + } + + /** + * Parse a XML element as an outline element and return the corresponding array. + * + * @param \DOMElement $outline_element + * The element to parse. + * + * @throws \marienfressinaud\LibOpml\Exception + * Raised if the outline contains non-outline elements, if it doesn't + * contain a text attribute (or if empty), if a special attribute is + * not parsable, or if type attribute requirements are not met. The + * exceptions are not raised if strict is false. The exception about + * missing text attribute is not raised if version is 1.0. + * + * @return array + * An array reflecting the OPML outline (the structure is described above). + */ + private function parseOutline($outline_element) + { + $outline = []; + + // Load the element attributes in the outline array + foreach ($outline_element->attributes as $outline_attribute) { + $name = $outline_attribute->nodeName; + $value = $outline_attribute->nodeValue; + + if ($name === 'created') { + try { + $value = $this->parseDate($value); + } catch (\DomainException $e) { + $this->throwExceptionIfStrict( + 'OPML outline created attribute must be a valid RFC822 or RFC1123 date' + ); + } + } elseif ($name === 'category') { + $categories = explode(',', $value); + $categories = array_map(function ($category) { + return trim($category); + }, $categories); + $value = $categories; + } elseif ($name === 'isComment' || $name === 'isBreakpoint') { + if ($value === 'true' || $value === 'false') { + $value = $value === 'true'; + } else { + $this->throwExceptionIfStrict( + "OPML outline {$name} attribute must be a boolean (true or false)" + ); + } + } elseif ($name === 'type') { + // type attribute is case-insensitive + $value = strtolower($value); + } + + $outline[$name] = $value; + } + + if (empty($outline['text']) && $this->version !== '1.0') { + $this->throwExceptionIfStrict( + 'OPML outline text attribute is required' + ); + } + + // Perform additional check based on the type of the outline + $type = $outline['type'] ?? ''; + if ($type === 'rss') { + if (empty($outline['xmlUrl'])) { + $this->throwExceptionIfStrict( + 'OPML outline xmlUrl attribute is required when type is "rss"' + ); + } elseif (!$this->checkHttpAddress($outline['xmlUrl'])) { + $this->throwExceptionIfStrict( + 'OPML outline xmlUrl attribute must be a HTTP address when type is "rss"' + ); + } + } elseif ($type === 'link' || $type === 'include') { + if (empty($outline['url'])) { + $this->throwExceptionIfStrict( + "OPML outline url attribute is required when type is \"{$type}\"" + ); + } elseif (!$this->checkHttpAddress($outline['url'])) { + $this->throwExceptionIfStrict( + "OPML outline url attribute must be a HTTP address when type is \"{$type}\"" + ); + } + } + + // Load the sub-outlines in a @outlines array + foreach ($outline_element->childNodes as $child_outline_element) { + if ($child_outline_element->nodeType !== XML_ELEMENT_NODE) { + continue; + } + + if ($child_outline_element->nodeName === 'outline') { + $outline['@outlines'][] = $this->parseOutline($child_outline_element); + } else { + $this->throwExceptionIfStrict( + 'OPML body element can only contain outline elements' + ); + } + } + + return $outline; + } + + /** + * Parse a value as a date. + * + * @param string $value + * + * @throws \DomainException + * Raised if the value cannot be parsed. + * + * @return \DateTime + */ + private function parseDate($value) + { + $formats = [ + \DateTimeInterface::RFC822, + \DateTimeInterface::RFC1123, + ]; + + foreach ($formats as $format) { + $date = date_create_from_format($format, $value); + if ($date !== false) { + return $date; + } + } + + throw new \DomainException('The argument cannot be parsed as a date'); + } + + /** + * Render an OPML array as a string or a \DOMDocument. + * + * @param array $array + * The array to render, it must follow the structure defined above. + * @param bool $as_dom_document + * Set to false (default) to return the array as a string, true to + * return as a \DOMDocument. + * + * @throws \marienfressinaud\LibOpml\Exception + * Raised if the `head` array contains unknown or invalid elements + * (i.e. not of correct type), or if the `body` array is missing or + * empty. The exceptions are not raised if strict is false. See also + * exceptions raised by the renderOutline method. + * + * @return string|\DOMDocument + * The XML string or DOM document corresponding to the given array. + */ + public function render($array, $as_dom_document = false) + { + $dom = new \DOMDocument('1.0', 'UTF-8'); + $opml_element = new \DOMElement('opml'); + $dom->appendChild($opml_element); + + // Set the version attribute of the OPML document + $version = $array['version'] ?? '2.0'; + + if ($version === '1.1') { + $version = '1.0'; + } + + if ($version !== '1.0' && $version !== '2.0') { + $this->throwExceptionIfStrict('OPML supported versions are 1.0 and 2.0'); + } + + $this->version = $version; + $opml_element->setAttribute('version', $this->version); + + // Declare the namespace on the opml element + $this->namespaces = $array['namespaces'] ?? []; + foreach ($this->namespaces as $prefix => $namespace) { + $opml_element->setAttributeNS( + 'http://www.w3.org/2000/xmlns/', + "xmlns:{$prefix}", + $namespace + ); + } + + // Add the head element to the OPML document. $array['head'] is + // optional but head tag will always exist in the final XML. + $head_element = new \DOMElement('head'); + $opml_element->appendChild($head_element); + if (isset($array['head'])) { + foreach ($array['head'] as $name => $value) { + $namespace = $this->getNamespace($name); + + if (!in_array($name, self::HEAD_ELEMENTS, true) && !$namespace) { + $this->throwExceptionIfStrict( + "OPML head {$name} element is not part of the specification" + ); + } + + if ($name === 'dateCreated' || $name === 'dateModified') { + if ($value instanceof \DateTimeInterface) { + $value = $value->format(\DateTimeInterface::RFC1123); + } else { + $this->throwExceptionIfStrict( + "OPML head {$name} element must be a DateTime" + ); + } + } elseif ($name === 'ownerEmail') { + // Testing email validity is hard. PHP filter_var() function is + // too strict compared to the RFC 822, so we can't use it. + if (strpos($value, '@') === false) { + $this->throwExceptionIfStrict( + 'OPML head ownerEmail element must be an email address' + ); + } + } elseif ($name === 'ownerId' || $name === 'docs') { + if (!$this->checkHttpAddress($value)) { + $this->throwExceptionIfStrict( + "OPML head {$name} element must be a HTTP address" + ); + } + } elseif ($name === 'expansionState') { + if (is_array($value)) { + foreach ($value as $number) { + if (!is_int($number)) { + $this->throwExceptionIfStrict( + 'OPML head expansionState element must be an array of integers' + ); + } + } + + $value = implode(', ', $value); + } else { + $this->throwExceptionIfStrict( + 'OPML head expansionState element must be an array of integers' + ); + } + } elseif (in_array($name, self::NUMERIC_HEAD_ELEMENTS)) { + if (!is_int($value)) { + $this->throwExceptionIfStrict( + "OPML head {$name} element must be an integer" + ); + } + } + + $child_head_element = new \DOMElement($name, $value, $namespace); + $head_element->appendChild($child_head_element); + } + } + + // Check body is set and contains at least one element + if (!isset($array['body'])) { + $this->throwExceptionIfStrict('OPML array must contain a body key'); + } + + $array_body = $array['body'] ?? []; + if (count($array_body) <= 0) { + $this->throwExceptionIfStrict( + 'OPML body element must contain at least one outline array' + ); + } + + // Create outline elements in the body element + $body_element = new \DOMElement('body'); + $opml_element->appendChild($body_element); + foreach ($array_body as $outline) { + $this->renderOutline($body_element, $outline); + } + + // And return the final result + if ($as_dom_document) { + return $dom; + } else { + $dom->formatOutput = true; + return $dom->saveXML(); + } + } + + /** + * Transform an outline array to a \DOMElement and add it to a parent element. + * + * @param \DOMElement $parent_element + * The DOM parent element of the current outline. + * @param array $outline + * The outline array to transform in a \DOMElement, it must follow the + * structure defined above. + * + * @throws \marienfressinaud\LibOpml\Exception + * Raised if the outline is not an array, if it doesn't contain a text + * attribute (or if empty), if the `@outlines` key is not an array, if + * a special attribute does not match its corresponding type, or if + * `type` key requirements are not met. The exceptions (except errors + * about outline or suboutlines not being arrays) are not raised if + * strict is false. The exception about missing text attribute is not + * raised if version is 1.0. + */ + private function renderOutline($parent_element, $outline) + { + // Perform initial checks to verify the outline is correctly declared + if (!is_array($outline)) { + throw new Exception( + 'OPML outline element must be defined as an array' + ); + } + + if (empty($outline['text']) && $this->version !== '1.0') { + $this->throwExceptionIfStrict( + 'OPML outline text attribute is required' + ); + } + + if (isset($outline['type'])) { + $type = strtolower($outline['type']); + + if ($type === 'rss') { + if (empty($outline['xmlUrl'])) { + $this->throwExceptionIfStrict( + 'OPML outline xmlUrl attribute is required when type is "rss"' + ); + } elseif (!$this->checkHttpAddress($outline['xmlUrl'])) { + $this->throwExceptionIfStrict( + 'OPML outline xmlUrl attribute must be a HTTP address when type is "rss"' + ); + } + } elseif ($type === 'link' || $type === 'include') { + if (empty($outline['url'])) { + $this->throwExceptionIfStrict( + "OPML outline url attribute is required when type is \"{$type}\"" + ); + } elseif (!$this->checkHttpAddress($outline['url'])) { + $this->throwExceptionIfStrict( + "OPML outline url attribute must be a HTTP address when type is \"{$type}\"" + ); + } + } + } + + // Create the outline element and add it to the parent + $outline_element = new \DOMElement('outline'); + $parent_element->appendChild($outline_element); + + // Load the sub-outlines as child elements + if (isset($outline['@outlines'])) { + $outline_children = $outline['@outlines']; + + if (!is_array($outline_children)) { + throw new Exception( + 'OPML outline element must be defined as an array' + ); + } + + foreach ($outline_children as $outline_child) { + $this->renderOutline($outline_element, $outline_child); + } + + // We don't want the sub-outlines to be loaded as attributes, so we + // remove the key from the array. + unset($outline['@outlines']); + } + + // Load the other elements of the array as attributes + foreach ($outline as $name => $value) { + $namespace = $this->getNamespace($name); + + if ($name === 'created') { + if ($value instanceof \DateTimeInterface) { + $value = $value->format(\DateTimeInterface::RFC1123); + } else { + $this->throwExceptionIfStrict( + 'OPML outline created attribute must be a DateTime' + ); + } + } elseif ($name === 'isComment' || $name === 'isBreakpoint') { + if (is_bool($value)) { + $value = $value ? 'true' : 'false'; + } else { + $this->throwExceptionIfStrict( + "OPML outline {$name} attribute must be a boolean" + ); + } + } elseif (is_array($value)) { + $value = implode(', ', $value); + } + + $outline_element->setAttributeNS($namespace, $name, $value); + } + } + + /** + * Return wether a value is a valid HTTP address or not. + * + * HTTP address is not strictly defined by the OPML spec, so it is assumed: + * + * - it can be parsed by parse_url + * - it has a host part + * - scheme is http or https + * + * filter_var is not used because it would reject internationalized URLs + * (i.e. with non ASCII chars). An alternative would be to punycode such + * URLs, but it's more work to do it properly, and lib_opml needs to stay + * simple. + * + * @param string $value + * + * @return boolean + * Return true if the value is a valid HTTP address, false otherwise. + */ + public function checkHttpAddress($value) + { + $value = trim($value); + $parsed_url = parse_url($value); + if (!$parsed_url) { + return false; + } + + if ( + !isset($parsed_url['scheme']) || + !isset($parsed_url['host']) + ) { + return false; + } + + if ( + $parsed_url['scheme'] !== 'http' && + $parsed_url['scheme'] !== 'https' + ) { + return false; + } + + return true; + } + + /** + * Return the namespace of a qualified name. An empty string is returned if + * the name is not namespaced. + * + * @param string $qualified_name + * + * @throws \marienfressinaud\LibOpml\Exception + * Raised if the namespace prefix isn't declared. + * + * @return string + */ + private function getNamespace($qualified_name) + { + $split_name = explode(':', $qualified_name, 2); + // count will always be 1 or 2. + if (count($split_name) === 1) { + // If 1, there's no prefix, thus no namespace + return ''; + } else { + // If 2, it means it has a namespace prefix, so we get the + // namespace from the declared ones. + $namespace_prefix = $split_name[0]; + if (!isset($this->namespaces[$namespace_prefix])) { + throw new Exception( + "OPML namespace {$namespace_prefix} is not declared" + ); + } + + return $this->namespaces[$namespace_prefix]; + } + } + + /** + * Raise an exception only if strict is true. + * + * @param string $message + * + * @throws \marienfressinaud\LibOpml\Exception + */ + private function throwExceptionIfStrict($message) + { + if ($this->strict) { + throw new Exception($message); + } + } +} @@ -5,6 +5,7 @@ <arg name="tab-width" value="4"/> <exclude-pattern>./.git/</exclude-pattern> <exclude-pattern>./lib/SimplePie/</exclude-pattern> + <exclude-pattern>./lib/marienfressinaud/</exclude-pattern> <exclude-pattern>./lib/phpgt/</exclude-pattern> <exclude-pattern>./lib/phpmailer/</exclude-pattern> <exclude-pattern>./lib/http-conditional.php</exclude-pattern> diff --git a/phpstan.neon b/phpstan.neon index 91509245f..846731c70 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -7,18 +7,16 @@ parameters: paths: - . excludePaths: - - .git/* - - lib/phpmailer/* - - lib/SimplePie/* - - node_modules/* - # TODO: include tests - - tests/* - - vendor/* - scanDirectories: - - lib/phpmailer/ - - lib/SimplePie/ + analyse: + - lib/marienfressinaud/* + - lib/phpmailer/* + - lib/SimplePie/* + analyseAndScan: + - .git/* + - node_modules/* + # TODO: include tests + - tests/* + - vendor/* bootstrapFiles: - cli/_cli.php - lib/favicons.php - - lib/SimplePie/SimplePie.php - - app/SQL/install.sql.sqlite.php |
