diff options
| author | 2014-03-29 20:18:57 +0100 | |
|---|---|---|
| committer | 2014-03-29 20:18:57 +0100 | |
| commit | 9ea3819402746d8425d4a608f2d5f3c0f5bc29fb (patch) | |
| tree | a4836c792d14e5489ebabe6e5d9f46e9b098ce93 | |
| parent | 7676a197a4767f735dabae6ad9cf40ef65e91aa7 (diff) | |
Better OPML import / export
- use a new OPML library (https://github.com/marienfressinaud/lib_opml)
- import has been completely rewritten (far better!)
- introduce addFeedObject and addCategoryObject (in DAO for the moment).
Permit to add easily feeds and categories (check if they already exist in DB)
- introduce html_chars_utf8 (wrap htmlspecialchars for UTF-8)
| -rw-r--r-- | app/Controllers/importExportController.php | 124 | ||||
| -rw-r--r-- | app/Exceptions/OpmlException.php | 6 | ||||
| -rw-r--r-- | app/Models/CategoryDAO.php | 12 | ||||
| -rw-r--r-- | app/Models/FeedDAO.php | 29 | ||||
| -rw-r--r-- | app/views/helpers/export/opml.phtml | 43 | ||||
| -rw-r--r-- | lib/lib_opml.php | 277 | ||||
| -rw-r--r-- | lib/lib_rss.php | 4 |
7 files changed, 345 insertions, 150 deletions
diff --git a/app/Controllers/importExportController.php b/app/Controllers/importExportController.php index cbadeb6ca..b6b4d0fed 100644 --- a/app/Controllers/importExportController.php +++ b/app/Controllers/importExportController.php @@ -129,71 +129,101 @@ class FreshRSS_importExport_Controller extends Minz_ActionController { } private function import_opml($opml_file) { - $categories = array(); - $feeds = array(); + $opml_array = array(); try { - list($categories, $feeds) = opml_import($opml_file); - } catch (FreshRSS_Opml_Exception $e) { + $opml_array = libopml_parse_string($opml_file); + } catch (LibOPML_Exception $e) { Minz_Log::warning($e->getMessage()); return true; } $this->catDAO->checkDefault(); - // on ajoute les catégories en masse dans une fonction à part - $this->addCategories($categories); - - // on calcule la date des articles les plus anciens qu'on accepte - $nb_month_old = $this->view->conf->old_entries; - $date_min = time() - (3600 * 24 * 30 * $nb_month_old); + return $this->addOpmlElements($opml_array['body']); + } - // la variable $error permet de savoir si une erreur est survenue - // Le but est de ne pas arrêter l'import même en cas d'erreur - // L'utilisateur sera mis au courant s'il y a eu des erreurs, mais - // ne connaîtra pas les détails. Ceux-ci seront toutefois logguées + private function addOpmlElements($opml_elements, $parent_cat = null) { $error = false; - foreach ($feeds as $feed) { - try { - $values = array( - 'id' => $feed->id(), - 'url' => $feed->url(), - 'category' => $feed->category(), - 'name' => $feed->name(), - 'website' => $feed->website(), - 'description' => $feed->description(), - 'lastUpdate' => 0, - 'httpAuth' => $feed->httpAuth() - ); + foreach ($opml_elements as $elt) { + $res = false; + if (isset($elt['xmlUrl'])) { + $res = $this->addFeedOpml($elt, $parent_cat); + } else { + $res = $this->addCategoryOpml($elt, $parent_cat); + } - // ajout du flux que s'il n'est pas déjà en BDD - if (!$this->feedDAO->searchByUrl($values['url'])) { - $id = $this->feedDAO->addFeed($values); - if ($id) { - $feed->_id($id); - $feed->faviconPrepare(); - } else { - $error = true; - } - } - } catch (FreshRSS_Feed_Exception $e) { - $error = true; - Minz_Log::record($e->getMessage(), Minz_Log::WARNING); + if (!$error && $res) { + // oops: there is at least one error! + $error = $res; } } return $error; } - private function addCategories($categories) { - foreach ($categories as $cat) { - if (!$this->catDAO->searchByName($cat->name())) { - $values = array( - 'id' => $cat->id(), - 'name' => $cat->name(), - ); - $this->catDAO->addCategory($values); + private function addFeedOpml($feed_elt, $parent_cat) { + if (is_null($parent_cat)) { + // This feed has no parent category so we get the default one + $parent_cat = $catDAO->getDefault()->name(); + } + + $cat = $this->catDAO->searchByName($parent_cat); + + if (!$cat) { + return true; + } + + // We get different useful information + $url = html_chars_utf8($feed_elt['xmlUrl']); + $name = html_chars_utf8($feed_elt['text']); + $website = ''; + if (isset($feed_elt['htmlUrl'])) { + $website = html_chars_utf8($feed_elt['htmlUrl']); + } + $description = ''; + if (isset($feed_elt['description'])) { + $description = html_chars_utf8($feed_elt['description']); + } + + $error = false; + try { + // Create a Feed object and add it in DB + $feed = new FreshRSS_Feed($url); + $feed->_category($cat->id()); + $feed->_name($name); + $feed->_website($website); + $feed->_description($description); + + // addFeedObject checks if feed is already in DB so nothing else to + // check here + $id = $this->feedDAO->addFeedObject($feed); + $error = ($id === false); + } catch (FreshRSS_Feed_Exception $e) { + Minz_Log::record($e->getMessage(), Minz_Log::WARNING); + $error = true; + } + + return $error; + } + + private function addCategoryOpml($cat_elt, $parent_cat) { + // Create a new Category object + $cat = new FreshRSS_Category(html_chars_utf8($cat_elt['text'])); + + $id = $this->catDAO->addCategoryObject($cat); + $error = ($id === false); + + if (isset($cat_elt['@outlines'])) { + // Our cat_elt contains more categories or more feeds, so we + // add them recursively. + // Note: FreshRSS does not support yet category arborescence + $res = $this->addOpmlElements($cat_elt['@outlines'], $cat->name()); + if (!$error && $res) { + $error = true; } } + + return $error; } private function import_articles($article_file, $starred = false) { diff --git a/app/Exceptions/OpmlException.php b/app/Exceptions/OpmlException.php deleted file mode 100644 index e0ea3e493..000000000 --- a/app/Exceptions/OpmlException.php +++ /dev/null @@ -1,6 +0,0 @@ -<?php -class FreshRSS_Opml_Exception extends FreshRSS_Feed_Exception { - public function __construct ($name_file) { - parent::__construct ('OPML file is invalid'); - } -} diff --git a/app/Models/CategoryDAO.php b/app/Models/CategoryDAO.php index f3c02e3e4..8be732b98 100644 --- a/app/Models/CategoryDAO.php +++ b/app/Models/CategoryDAO.php @@ -18,6 +18,18 @@ class FreshRSS_CategoryDAO extends Minz_ModelPdo { } } + public function addCategoryObject($category) { + if (!$this->searchByName($category->name())) { + // Category does not exist yet in DB so we add it before continue + $values = array( + 'name' => $category->name(), + ); + return $this->addCategory($values); + } + + return false; + } + public function updateCategory ($id, $valuesTmp) { $sql = 'UPDATE `' . $this->prefix . 'category` SET name=? WHERE id=?'; $stm = $this->bd->prepare ($sql); diff --git a/app/Models/FeedDAO.php b/app/Models/FeedDAO.php index ca25c3aeb..eac21df7e 100644 --- a/app/Models/FeedDAO.php +++ b/app/Models/FeedDAO.php @@ -24,6 +24,35 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo { } } + public function addFeedObject($feed) { + // TODO: not sure if we should write this method in DAO since DAO + // should not be aware about feed class + + // Add feed only if we don't find it in DB + if (!$this->searchByUrl($feed->url())) { + $values = array( + 'id' => $feed->id(), + 'url' => $feed->url(), + 'category' => $feed->category(), + 'name' => $feed->name(), + 'website' => $feed->website(), + 'description' => $feed->description(), + 'lastUpdate' => 0, + 'httpAuth' => $feed->httpAuth() + ); + + $id = $this->addFeed($values); + if ($id) { + $feed->_id($id); + $feed->faviconPrepare(); + } + + return $id; + } + + return false; + } + public function updateFeed ($id, $valuesTmp) { $set = ''; foreach ($valuesTmp as $key => $v) { diff --git a/app/views/helpers/export/opml.phtml b/app/views/helpers/export/opml.phtml index 2e66e5054..adbac904d 100644 --- a/app/views/helpers/export/opml.phtml +++ b/app/views/helpers/export/opml.phtml @@ -1,15 +1,30 @@ <?php -require_once(LIB_PATH . '/lib_opml.php'); - -echo '<?xml version="1.0" encoding="UTF-8" ?>'; -?> -<!-- Generated by <?php echo Minz_Configuration::title (); ?> --> -<opml version="2.0"> - <head> - <title><?php echo Minz_Configuration::title (); ?> OPML Feed</title> - <dateCreated><?php echo date('D, d M Y H:i:s'); ?></dateCreated> - </head> - <body> -<?php echo opml_export ($this->categories); ?> - </body> -</opml> + +$opml_array = array( + 'head' => array( + 'title' => Minz_Configuration::title(), + 'dateCreated' => date('D, d M Y H:i:s') + ), + 'body' => array() +); + +foreach ($this->categories as $key => $cat) { + $opml_array['body'][$key] = array( + 'text' => $cat['name'], + '@outlines' => array() + ); + + foreach ($cat['feeds'] as $feed) { + $opml_array['body'][$key]['@outlines'][] = array( + 'text' => $feed->name(), + 'type' => 'rss', + 'xmlUrl' => $feed->url(), + 'htmlUrl' => $feed->website(), + 'description' => htmlspecialchars( + $feed->description(), ENT_COMPAT, 'UTF-8' + ) + ); + } +} + +echo libopml_render($opml_array); diff --git a/lib/lib_opml.php b/lib/lib_opml.php index 05e54d85e..16a9921ea 100644 --- a/lib/lib_opml.php +++ b/lib/lib_opml.php @@ -1,23 +1,86 @@ <?php -function opml_export ($cats) { - $txt = ''; - foreach ($cats as $cat) { - $txt .= '<outline text="' . $cat['name'] . '">' . "\n"; - - foreach ($cat['feeds'] as $feed) { - $txt .= "\t" . '<outline text="' . $feed->name () . '" type="rss" xmlUrl="' . $feed->url () . '" htmlUrl="' . $feed->website () . '" description="' . htmlspecialchars($feed->description(), ENT_COMPAT, 'UTF-8') . '" />' . "\n"; +/* * + * lib_opml is a free library to manage OPML format in PHP. + * It takes in consideration only version 2.0 (http://dev.opml.org/spec2.html). + * Basically it means "text" attribute for outline elements is required. + * + * lib_opml requires SimpleXML (http://php.net/manual/en/book.simplexml.php) + * + * Usages: + * > include('lib_opml.php'); + * > $filename = 'my_opml_file.xml'; + * > $opml_array = libopml_parse_file($filename); + * > print_r($opml_array); + * + * > $opml_string = [...]; + * > $opml_array = libopml_parse_string($opml_string); + * > print_r($opml_array); + * + * > $opml_array = [...]; + * > $opml_string = libopml_render($opml_array); + * > $opml_object = libopml_render($opml_array, true); + * > echo $opml_string; + * > print_r($opml_object); + * + * If parsing fails for any reason (e.g. not an XML string, does not match with + * the specifications), a LibOPML_Exception is raised. + * + * Author: Marien Fressinaud <dev@marienfressinaud.fr> + * Url: https://github.com/marienfressinaud/lib_opml + * Version: 0.1 + * Date: 2014-03-29 + * License: public domain + * + * */ + +class LibOPML_Exception extends Exception {} + + +// These elements are optional +define('HEAD_ELEMENTS', serialize(array( + 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail', + 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop', + 'windowLeft', 'windowBottom', 'windowRight' +))); + + +function libopml_parse_outline($outline_xml) { + $outline = array(); + + // An outline may contain any kind of attributes but "text" attribute is + // required ! + $text_is_present = false; + foreach ($outline_xml->attributes() as $key => $value) { + $outline[$key] = (string)$value; + + if ($key === 'text') { + $text_is_present = true; } + } - $txt .= '</outline>' . "\n"; + if (!$text_is_present) { + throw new LibOPML_Exception( + 'Outline does not contain any text attribute' + ); } - return $txt; + foreach ($outline_xml->children() as $key => $value) { + // An outline may contain any number of outline children + if ($key === 'outline') { + $outline['@outlines'][] = libopml_parse_outline($value); + } else { + throw new LibOPML_Exception( + 'Body can contain only outline elements' + ); + } + } + + return $outline; } -function opml_import ($xml) { - $xml = html_only_entity_decode($xml); //!\ Assume UTF-8 +function libopml_parse_string($xml) { $dom = new DOMDocument(); $dom->recover = true; $dom->strictErrorChecking = false; @@ -27,94 +90,142 @@ function opml_import ($xml) { $opml = simplexml_import_dom($dom); if (!$opml) { - throw new FreshRSS_Opml_Exception (); + throw new LibOPML_Exception(); } - $catDAO = new FreshRSS_CategoryDAO(); - $catDAO->checkDefault(); - $defCat = $catDAO->getDefault(); + $array = array( + 'version' => (string)$opml['version'], + 'head' => array(), + 'body' => array() + ); + + // First, we get all "head" elements. Head is required but its sub-elements + // are optional. + foreach ($opml->head->children() as $key => $value) { + if (in_array($key, unserialize(HEAD_ELEMENTS), true)) { + $array['head'][$key] = (string)$value; + } else { + throw new LibOPML_Exception( + $key . 'is not part of OPML format' + ); + } + } - $categories = array (); - $feeds = array (); + // Then, we get body oulines. Body must contain at least one outline + // element. + $at_least_one_outline = false; + foreach ($opml->body->children() as $key => $value) { + if ($key === 'outline') { + $at_least_one_outline = true; + $array['body'][] = libopml_parse_outline($value); + } else { + throw new LibOPML_Exception( + 'Body can contain only outline elements' + ); + } + } + + if (!$at_least_one_outline) { + throw new LibOPML_Exception( + 'Body must contain at least one outline element' + ); + } - foreach ($opml->body->outline as $outline) { - if (!isset ($outline['xmlUrl'])) { - // Catégorie - $title = ''; + return $array; +} - if (isset ($outline['text'])) { - $title = (string) $outline['text']; - } elseif (isset ($outline['title'])) { - $title = (string) $outline['title']; - } - if ($title) { - // Permet d'éviter les soucis au niveau des id : - // ceux-ci sont générés en fonction de la date, - // un flux pourrait être dans une catégorie X avec l'id Y - // alors qu'il existe déjà la catégorie X mais avec l'id Z - // Y ne sera pas ajouté et le flux non plus vu que l'id - // de sa catégorie n'exisera pas - $title = htmlspecialchars($title, ENT_COMPAT, 'UTF-8'); - $catDAO = new FreshRSS_CategoryDAO (); - $cat = $catDAO->searchByName ($title); - if ($cat == null) { - $cat = new FreshRSS_Category ($title); - $values = array ( - 'name' => $cat->name () - ); - $cat->_id ($catDAO->addCategory ($values)); - } - - $feeds = array_merge ($feeds, getFeedsOutline ($outline, $cat->id ())); +function libopml_parse_file($filename) { + $file_content = file_get_contents($filename); + + if ($file_content === false) { + throw new LibOPML_Exception( + $filename . ' cannot be found' + ); + } + + return libopml_parse_string($file_content); +} + + +function libopml_render_outline($parent_elt, $outline) { + // Outline MUST be an array! + if (!is_array($outline)) { + throw new LibOPML_Exception( + 'Outline element must be defined as array' + ); + } + + $outline_elt = $parent_elt->addChild('outline'); + $text_is_present = false; + foreach ($outline as $key => $value) { + // Only outlines can be an array and so we consider children are also + // outline elements. + if ($key === '@outlines' && is_array($value)) { + foreach ($value as $outline_child) { + libopml_render_outline($outline_elt, $outline_child); } + } elseif (is_array($value)) { + throw new LibOPML_Exception( + 'Type of outline elements cannot be array: ' . $key + ); } else { - // Flux rss sans catégorie, on récupère l'ajoute dans la catégorie par défaut - $feeds[] = getFeed ($outline, $defCat->id()); + // Detect text attribute is present, that's good :) + if ($key === 'text') { + $text_is_present = true; + } + + $outline_elt->addAttribute($key, $value); } } - return array ($categories, $feeds); + if (!$text_is_present) { + throw new LibOPML_Exception( + 'You must define at least a text element for all outlines' + ); + } } -/** - * import all feeds of a given outline tag - */ -function getFeedsOutline ($outline, $cat_id) { - $feeds = array (); - foreach ($outline->children () as $child) { - if (isset ($child['xmlUrl'])) { - $feeds[] = getFeed ($child, $cat_id); - } else { - $feeds = array_merge( - $feeds, - getFeedsOutline ($child, $cat_id) - ); +function libopml_render($array, $as_xml_object = false) { + $opml = new SimpleXMLElement('<opml version="2.0"></opml>'); + + // Create head element. $array['head'] is optional but head element will + // exist in the final XML object. + $head = $opml->addChild('head'); + if (isset($array['head'])) { + foreach ($array['head'] as $key => $value) { + if (in_array($key, unserialize(HEAD_ELEMENTS), true)) { + $head->addChild($key, $value); + } } } - return $feeds; -} + // Check body is set and contains at least one element + if (!isset($array['body'])) { + throw new LibOPML_Exception( + '$array must contain a body element' + ); + } + if (count($array['body']) <= 0) { + throw new LibOPML_Exception( + 'Body element must contain at least one element (array)' + ); + } -function getFeed ($outline, $cat_id) { - $url = (string) $outline['xmlUrl']; - $url = htmlspecialchars($url, ENT_COMPAT, 'UTF-8'); - $title = ''; - if (isset ($outline['text'])) { - $title = (string) $outline['text']; - } elseif (isset ($outline['title'])) { - $title = (string) $outline['title']; - } - $title = htmlspecialchars($title, ENT_COMPAT, 'UTF-8'); - $feed = new FreshRSS_Feed ($url); - $feed->_category ($cat_id); - $feed->_name ($title); - if (isset($outline['htmlUrl'])) { - $feed->_website(htmlspecialchars((string)$outline['htmlUrl'], ENT_COMPAT, 'UTF-8')); - } - if (isset($outline['description'])) { - $feed->_description(sanitizeHTML((string)$outline['description'])); - } - return $feed; + // Create outline elements + $body = $opml->addChild('body'); + foreach ($array['body'] as $outline) { + libopml_render_outline($body, $outline); + } + + // And return the final result + if ($as_xml_object) { + return $opml; + } else { + $dom = dom_import_simplexml($opml)->ownerDocument; + $dom->formatOutput = true; + $dom->encoding = 'UTF-8'; + return $dom->saveXML(); + } } diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 2077fe63f..0f8161129 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -244,3 +244,7 @@ function cryptAvailable() { } return false; } + +function html_chars_utf8($str) { + return htmlspecialchars($str, ENT_COMPAT, 'UTF-8'); +} |
