aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Marien Fressinaud <dev@marienfressinaud.fr> 2014-03-29 20:18:57 +0100
committerGravatar Marien Fressinaud <dev@marienfressinaud.fr> 2014-03-29 20:18:57 +0100
commit9ea3819402746d8425d4a608f2d5f3c0f5bc29fb (patch)
treea4836c792d14e5489ebabe6e5d9f46e9b098ce93
parent7676a197a4767f735dabae6ad9cf40ef65e91aa7 (diff)
Better OPML import / export
- use a new OPML library (https://github.com/marienfressinaud/lib_opml) - import has been completely rewritten (far better!) - introduce addFeedObject and addCategoryObject (in DAO for the moment). Permit to add easily feeds and categories (check if they already exist in DB) - introduce html_chars_utf8 (wrap htmlspecialchars for UTF-8)
-rw-r--r--app/Controllers/importExportController.php124
-rw-r--r--app/Exceptions/OpmlException.php6
-rw-r--r--app/Models/CategoryDAO.php12
-rw-r--r--app/Models/FeedDAO.php29
-rw-r--r--app/views/helpers/export/opml.phtml43
-rw-r--r--lib/lib_opml.php277
-rw-r--r--lib/lib_rss.php4
7 files changed, 345 insertions, 150 deletions
diff --git a/app/Controllers/importExportController.php b/app/Controllers/importExportController.php
index cbadeb6ca..b6b4d0fed 100644
--- a/app/Controllers/importExportController.php
+++ b/app/Controllers/importExportController.php
@@ -129,71 +129,101 @@ class FreshRSS_importExport_Controller extends Minz_ActionController {
}
private function import_opml($opml_file) {
- $categories = array();
- $feeds = array();
+ $opml_array = array();
try {
- list($categories, $feeds) = opml_import($opml_file);
- } catch (FreshRSS_Opml_Exception $e) {
+ $opml_array = libopml_parse_string($opml_file);
+ } catch (LibOPML_Exception $e) {
Minz_Log::warning($e->getMessage());
return true;
}
$this->catDAO->checkDefault();
- // on ajoute les catégories en masse dans une fonction à part
- $this->addCategories($categories);
-
- // on calcule la date des articles les plus anciens qu'on accepte
- $nb_month_old = $this->view->conf->old_entries;
- $date_min = time() - (3600 * 24 * 30 * $nb_month_old);
+ return $this->addOpmlElements($opml_array['body']);
+ }
- // la variable $error permet de savoir si une erreur est survenue
- // Le but est de ne pas arrêter l'import même en cas d'erreur
- // L'utilisateur sera mis au courant s'il y a eu des erreurs, mais
- // ne connaîtra pas les détails. Ceux-ci seront toutefois logguées
+ private function addOpmlElements($opml_elements, $parent_cat = null) {
$error = false;
- foreach ($feeds as $feed) {
- try {
- $values = array(
- 'id' => $feed->id(),
- 'url' => $feed->url(),
- 'category' => $feed->category(),
- 'name' => $feed->name(),
- 'website' => $feed->website(),
- 'description' => $feed->description(),
- 'lastUpdate' => 0,
- 'httpAuth' => $feed->httpAuth()
- );
+ foreach ($opml_elements as $elt) {
+ $res = false;
+ if (isset($elt['xmlUrl'])) {
+ $res = $this->addFeedOpml($elt, $parent_cat);
+ } else {
+ $res = $this->addCategoryOpml($elt, $parent_cat);
+ }
- // ajout du flux que s'il n'est pas déjà en BDD
- if (!$this->feedDAO->searchByUrl($values['url'])) {
- $id = $this->feedDAO->addFeed($values);
- if ($id) {
- $feed->_id($id);
- $feed->faviconPrepare();
- } else {
- $error = true;
- }
- }
- } catch (FreshRSS_Feed_Exception $e) {
- $error = true;
- Minz_Log::record($e->getMessage(), Minz_Log::WARNING);
+ if (!$error && $res) {
+ // oops: there is at least one error!
+ $error = $res;
}
}
return $error;
}
- private function addCategories($categories) {
- foreach ($categories as $cat) {
- if (!$this->catDAO->searchByName($cat->name())) {
- $values = array(
- 'id' => $cat->id(),
- 'name' => $cat->name(),
- );
- $this->catDAO->addCategory($values);
+ private function addFeedOpml($feed_elt, $parent_cat) {
+ if (is_null($parent_cat)) {
+ // This feed has no parent category so we get the default one
+ $parent_cat = $catDAO->getDefault()->name();
+ }
+
+ $cat = $this->catDAO->searchByName($parent_cat);
+
+ if (!$cat) {
+ return true;
+ }
+
+ // We get different useful information
+ $url = html_chars_utf8($feed_elt['xmlUrl']);
+ $name = html_chars_utf8($feed_elt['text']);
+ $website = '';
+ if (isset($feed_elt['htmlUrl'])) {
+ $website = html_chars_utf8($feed_elt['htmlUrl']);
+ }
+ $description = '';
+ if (isset($feed_elt['description'])) {
+ $description = html_chars_utf8($feed_elt['description']);
+ }
+
+ $error = false;
+ try {
+ // Create a Feed object and add it in DB
+ $feed = new FreshRSS_Feed($url);
+ $feed->_category($cat->id());
+ $feed->_name($name);
+ $feed->_website($website);
+ $feed->_description($description);
+
+ // addFeedObject checks if feed is already in DB so nothing else to
+ // check here
+ $id = $this->feedDAO->addFeedObject($feed);
+ $error = ($id === false);
+ } catch (FreshRSS_Feed_Exception $e) {
+ Minz_Log::record($e->getMessage(), Minz_Log::WARNING);
+ $error = true;
+ }
+
+ return $error;
+ }
+
+ private function addCategoryOpml($cat_elt, $parent_cat) {
+ // Create a new Category object
+ $cat = new FreshRSS_Category(html_chars_utf8($cat_elt['text']));
+
+ $id = $this->catDAO->addCategoryObject($cat);
+ $error = ($id === false);
+
+ if (isset($cat_elt['@outlines'])) {
+ // Our cat_elt contains more categories or more feeds, so we
+ // add them recursively.
+ // Note: FreshRSS does not support yet category arborescence
+ $res = $this->addOpmlElements($cat_elt['@outlines'], $cat->name());
+ if (!$error && $res) {
+ $error = true;
}
}
+
+ return $error;
}
private function import_articles($article_file, $starred = false) {
diff --git a/app/Exceptions/OpmlException.php b/app/Exceptions/OpmlException.php
deleted file mode 100644
index e0ea3e493..000000000
--- a/app/Exceptions/OpmlException.php
+++ /dev/null
@@ -1,6 +0,0 @@
-<?php
-class FreshRSS_Opml_Exception extends FreshRSS_Feed_Exception {
- public function __construct ($name_file) {
- parent::__construct ('OPML file is invalid');
- }
-}
diff --git a/app/Models/CategoryDAO.php b/app/Models/CategoryDAO.php
index f3c02e3e4..8be732b98 100644
--- a/app/Models/CategoryDAO.php
+++ b/app/Models/CategoryDAO.php
@@ -18,6 +18,18 @@ class FreshRSS_CategoryDAO extends Minz_ModelPdo {
}
}
+ public function addCategoryObject($category) {
+ if (!$this->searchByName($category->name())) {
+ // Category does not exist yet in DB so we add it before continue
+ $values = array(
+ 'name' => $category->name(),
+ );
+ return $this->addCategory($values);
+ }
+
+ return false;
+ }
+
public function updateCategory ($id, $valuesTmp) {
$sql = 'UPDATE `' . $this->prefix . 'category` SET name=? WHERE id=?';
$stm = $this->bd->prepare ($sql);
diff --git a/app/Models/FeedDAO.php b/app/Models/FeedDAO.php
index ca25c3aeb..eac21df7e 100644
--- a/app/Models/FeedDAO.php
+++ b/app/Models/FeedDAO.php
@@ -24,6 +24,35 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo {
}
}
+ public function addFeedObject($feed) {
+ // TODO: not sure if we should write this method in DAO since DAO
+ // should not be aware about feed class
+
+ // Add feed only if we don't find it in DB
+ if (!$this->searchByUrl($feed->url())) {
+ $values = array(
+ 'id' => $feed->id(),
+ 'url' => $feed->url(),
+ 'category' => $feed->category(),
+ 'name' => $feed->name(),
+ 'website' => $feed->website(),
+ 'description' => $feed->description(),
+ 'lastUpdate' => 0,
+ 'httpAuth' => $feed->httpAuth()
+ );
+
+ $id = $this->addFeed($values);
+ if ($id) {
+ $feed->_id($id);
+ $feed->faviconPrepare();
+ }
+
+ return $id;
+ }
+
+ return false;
+ }
+
public function updateFeed ($id, $valuesTmp) {
$set = '';
foreach ($valuesTmp as $key => $v) {
diff --git a/app/views/helpers/export/opml.phtml b/app/views/helpers/export/opml.phtml
index 2e66e5054..adbac904d 100644
--- a/app/views/helpers/export/opml.phtml
+++ b/app/views/helpers/export/opml.phtml
@@ -1,15 +1,30 @@
<?php
-require_once(LIB_PATH . '/lib_opml.php');
-
-echo '<?xml version="1.0" encoding="UTF-8" ?>';
-?>
-<!-- Generated by <?php echo Minz_Configuration::title (); ?> -->
-<opml version="2.0">
- <head>
- <title><?php echo Minz_Configuration::title (); ?> OPML Feed</title>
- <dateCreated><?php echo date('D, d M Y H:i:s'); ?></dateCreated>
- </head>
- <body>
-<?php echo opml_export ($this->categories); ?>
- </body>
-</opml>
+
+$opml_array = array(
+ 'head' => array(
+ 'title' => Minz_Configuration::title(),
+ 'dateCreated' => date('D, d M Y H:i:s')
+ ),
+ 'body' => array()
+);
+
+foreach ($this->categories as $key => $cat) {
+ $opml_array['body'][$key] = array(
+ 'text' => $cat['name'],
+ '@outlines' => array()
+ );
+
+ foreach ($cat['feeds'] as $feed) {
+ $opml_array['body'][$key]['@outlines'][] = array(
+ 'text' => $feed->name(),
+ 'type' => 'rss',
+ 'xmlUrl' => $feed->url(),
+ 'htmlUrl' => $feed->website(),
+ 'description' => htmlspecialchars(
+ $feed->description(), ENT_COMPAT, 'UTF-8'
+ )
+ );
+ }
+}
+
+echo libopml_render($opml_array);
diff --git a/lib/lib_opml.php b/lib/lib_opml.php
index 05e54d85e..16a9921ea 100644
--- a/lib/lib_opml.php
+++ b/lib/lib_opml.php
@@ -1,23 +1,86 @@
<?php
-function opml_export ($cats) {
- $txt = '';
- foreach ($cats as $cat) {
- $txt .= '<outline text="' . $cat['name'] . '">' . "\n";
-
- foreach ($cat['feeds'] as $feed) {
- $txt .= "\t" . '<outline text="' . $feed->name () . '" type="rss" xmlUrl="' . $feed->url () . '" htmlUrl="' . $feed->website () . '" description="' . htmlspecialchars($feed->description(), ENT_COMPAT, 'UTF-8') . '" />' . "\n";
+/* *
+ * lib_opml is a free library to manage OPML format in PHP.
+ * It takes in consideration only version 2.0 (http://dev.opml.org/spec2.html).
+ * Basically it means "text" attribute for outline elements is required.
+ *
+ * lib_opml requires SimpleXML (http://php.net/manual/en/book.simplexml.php)
+ *
+ * Usages:
+ * > include('lib_opml.php');
+ * > $filename = 'my_opml_file.xml';
+ * > $opml_array = libopml_parse_file($filename);
+ * > print_r($opml_array);
+ *
+ * > $opml_string = [...];
+ * > $opml_array = libopml_parse_string($opml_string);
+ * > print_r($opml_array);
+ *
+ * > $opml_array = [...];
+ * > $opml_string = libopml_render($opml_array);
+ * > $opml_object = libopml_render($opml_array, true);
+ * > echo $opml_string;
+ * > print_r($opml_object);
+ *
+ * If parsing fails for any reason (e.g. not an XML string, does not match with
+ * the specifications), a LibOPML_Exception is raised.
+ *
+ * Author: Marien Fressinaud <dev@marienfressinaud.fr>
+ * Url: https://github.com/marienfressinaud/lib_opml
+ * Version: 0.1
+ * Date: 2014-03-29
+ * License: public domain
+ *
+ * */
+
+class LibOPML_Exception extends Exception {}
+
+
+// These elements are optional
+define('HEAD_ELEMENTS', serialize(array(
+ 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail',
+ 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop',
+ 'windowLeft', 'windowBottom', 'windowRight'
+)));
+
+
+function libopml_parse_outline($outline_xml) {
+ $outline = array();
+
+ // An outline may contain any kind of attributes but "text" attribute is
+ // required !
+ $text_is_present = false;
+ foreach ($outline_xml->attributes() as $key => $value) {
+ $outline[$key] = (string)$value;
+
+ if ($key === 'text') {
+ $text_is_present = true;
}
+ }
- $txt .= '</outline>' . "\n";
+ if (!$text_is_present) {
+ throw new LibOPML_Exception(
+ 'Outline does not contain any text attribute'
+ );
}
- return $txt;
+ foreach ($outline_xml->children() as $key => $value) {
+ // An outline may contain any number of outline children
+ if ($key === 'outline') {
+ $outline['@outlines'][] = libopml_parse_outline($value);
+ } else {
+ throw new LibOPML_Exception(
+ 'Body can contain only outline elements'
+ );
+ }
+ }
+
+ return $outline;
}
-function opml_import ($xml) {
- $xml = html_only_entity_decode($xml); //!\ Assume UTF-8
+function libopml_parse_string($xml) {
$dom = new DOMDocument();
$dom->recover = true;
$dom->strictErrorChecking = false;
@@ -27,94 +90,142 @@ function opml_import ($xml) {
$opml = simplexml_import_dom($dom);
if (!$opml) {
- throw new FreshRSS_Opml_Exception ();
+ throw new LibOPML_Exception();
}
- $catDAO = new FreshRSS_CategoryDAO();
- $catDAO->checkDefault();
- $defCat = $catDAO->getDefault();
+ $array = array(
+ 'version' => (string)$opml['version'],
+ 'head' => array(),
+ 'body' => array()
+ );
+
+ // First, we get all "head" elements. Head is required but its sub-elements
+ // are optional.
+ foreach ($opml->head->children() as $key => $value) {
+ if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
+ $array['head'][$key] = (string)$value;
+ } else {
+ throw new LibOPML_Exception(
+ $key . 'is not part of OPML format'
+ );
+ }
+ }
- $categories = array ();
- $feeds = array ();
+ // Then, we get body oulines. Body must contain at least one outline
+ // element.
+ $at_least_one_outline = false;
+ foreach ($opml->body->children() as $key => $value) {
+ if ($key === 'outline') {
+ $at_least_one_outline = true;
+ $array['body'][] = libopml_parse_outline($value);
+ } else {
+ throw new LibOPML_Exception(
+ 'Body can contain only outline elements'
+ );
+ }
+ }
+
+ if (!$at_least_one_outline) {
+ throw new LibOPML_Exception(
+ 'Body must contain at least one outline element'
+ );
+ }
- foreach ($opml->body->outline as $outline) {
- if (!isset ($outline['xmlUrl'])) {
- // Catégorie
- $title = '';
+ return $array;
+}
- if (isset ($outline['text'])) {
- $title = (string) $outline['text'];
- } elseif (isset ($outline['title'])) {
- $title = (string) $outline['title'];
- }
- if ($title) {
- // Permet d'éviter les soucis au niveau des id :
- // ceux-ci sont générés en fonction de la date,
- // un flux pourrait être dans une catégorie X avec l'id Y
- // alors qu'il existe déjà la catégorie X mais avec l'id Z
- // Y ne sera pas ajouté et le flux non plus vu que l'id
- // de sa catégorie n'exisera pas
- $title = htmlspecialchars($title, ENT_COMPAT, 'UTF-8');
- $catDAO = new FreshRSS_CategoryDAO ();
- $cat = $catDAO->searchByName ($title);
- if ($cat == null) {
- $cat = new FreshRSS_Category ($title);
- $values = array (
- 'name' => $cat->name ()
- );
- $cat->_id ($catDAO->addCategory ($values));
- }
-
- $feeds = array_merge ($feeds, getFeedsOutline ($outline, $cat->id ()));
+function libopml_parse_file($filename) {
+ $file_content = file_get_contents($filename);
+
+ if ($file_content === false) {
+ throw new LibOPML_Exception(
+ $filename . ' cannot be found'
+ );
+ }
+
+ return libopml_parse_string($file_content);
+}
+
+
+function libopml_render_outline($parent_elt, $outline) {
+ // Outline MUST be an array!
+ if (!is_array($outline)) {
+ throw new LibOPML_Exception(
+ 'Outline element must be defined as array'
+ );
+ }
+
+ $outline_elt = $parent_elt->addChild('outline');
+ $text_is_present = false;
+ foreach ($outline as $key => $value) {
+ // Only outlines can be an array and so we consider children are also
+ // outline elements.
+ if ($key === '@outlines' && is_array($value)) {
+ foreach ($value as $outline_child) {
+ libopml_render_outline($outline_elt, $outline_child);
}
+ } elseif (is_array($value)) {
+ throw new LibOPML_Exception(
+ 'Type of outline elements cannot be array: ' . $key
+ );
} else {
- // Flux rss sans catégorie, on récupère l'ajoute dans la catégorie par défaut
- $feeds[] = getFeed ($outline, $defCat->id());
+ // Detect text attribute is present, that's good :)
+ if ($key === 'text') {
+ $text_is_present = true;
+ }
+
+ $outline_elt->addAttribute($key, $value);
}
}
- return array ($categories, $feeds);
+ if (!$text_is_present) {
+ throw new LibOPML_Exception(
+ 'You must define at least a text element for all outlines'
+ );
+ }
}
-/**
- * import all feeds of a given outline tag
- */
-function getFeedsOutline ($outline, $cat_id) {
- $feeds = array ();
- foreach ($outline->children () as $child) {
- if (isset ($child['xmlUrl'])) {
- $feeds[] = getFeed ($child, $cat_id);
- } else {
- $feeds = array_merge(
- $feeds,
- getFeedsOutline ($child, $cat_id)
- );
+function libopml_render($array, $as_xml_object = false) {
+ $opml = new SimpleXMLElement('<opml version="2.0"></opml>');
+
+ // Create head element. $array['head'] is optional but head element will
+ // exist in the final XML object.
+ $head = $opml->addChild('head');
+ if (isset($array['head'])) {
+ foreach ($array['head'] as $key => $value) {
+ if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
+ $head->addChild($key, $value);
+ }
}
}
- return $feeds;
-}
+ // Check body is set and contains at least one element
+ if (!isset($array['body'])) {
+ throw new LibOPML_Exception(
+ '$array must contain a body element'
+ );
+ }
+ if (count($array['body']) <= 0) {
+ throw new LibOPML_Exception(
+ 'Body element must contain at least one element (array)'
+ );
+ }
-function getFeed ($outline, $cat_id) {
- $url = (string) $outline['xmlUrl'];
- $url = htmlspecialchars($url, ENT_COMPAT, 'UTF-8');
- $title = '';
- if (isset ($outline['text'])) {
- $title = (string) $outline['text'];
- } elseif (isset ($outline['title'])) {
- $title = (string) $outline['title'];
- }
- $title = htmlspecialchars($title, ENT_COMPAT, 'UTF-8');
- $feed = new FreshRSS_Feed ($url);
- $feed->_category ($cat_id);
- $feed->_name ($title);
- if (isset($outline['htmlUrl'])) {
- $feed->_website(htmlspecialchars((string)$outline['htmlUrl'], ENT_COMPAT, 'UTF-8'));
- }
- if (isset($outline['description'])) {
- $feed->_description(sanitizeHTML((string)$outline['description']));
- }
- return $feed;
+ // Create outline elements
+ $body = $opml->addChild('body');
+ foreach ($array['body'] as $outline) {
+ libopml_render_outline($body, $outline);
+ }
+
+ // And return the final result
+ if ($as_xml_object) {
+ return $opml;
+ } else {
+ $dom = dom_import_simplexml($opml)->ownerDocument;
+ $dom->formatOutput = true;
+ $dom->encoding = 'UTF-8';
+ return $dom->saveXML();
+ }
}
diff --git a/lib/lib_rss.php b/lib/lib_rss.php
index 2077fe63f..0f8161129 100644
--- a/lib/lib_rss.php
+++ b/lib/lib_rss.php
@@ -244,3 +244,7 @@ function cryptAvailable() {
}
return false;
}
+
+function html_chars_utf8($str) {
+ return htmlspecialchars($str, ENT_COMPAT, 'UTF-8');
+}