aboutsummaryrefslogtreecommitdiff
path: root/lib/lib_opml.php
diff options
context:
space:
mode:
authorGravatar Clément <clement@selfhost.fr> 2017-02-15 14:14:03 +0100
committerGravatar Clément <clement@selfhost.fr> 2017-02-15 14:14:03 +0100
commit5a1bb1393b4496eb35a2ffb3cc63d41c9dc1e2e5 (patch)
tree67028e45792c575c25c92616633f64cc7a4a13eb /lib/lib_opml.php
parent7e949d50320317b5c3b5a2da2bdaf324e794b2f7 (diff)
parent5f637bd816b7323885bfe1751a1724ee59a822f6 (diff)
Merge remote-tracking branch 'FreshRSS/master'
Diffstat (limited to 'lib/lib_opml.php')
-rw-r--r--lib/lib_opml.php382
1 files changed, 302 insertions, 80 deletions
diff --git a/lib/lib_opml.php b/lib/lib_opml.php
index 9feb12ae0..b89e92977 100644
--- a/lib/lib_opml.php
+++ b/lib/lib_opml.php
@@ -1,120 +1,342 @@
<?php
-function opml_export ($cats) {
- $txt = '';
- foreach ($cats as $cat) {
- $txt .= '<outline text="' . $cat['name'] . '">' . "\n";
+/**
+ * lib_opml is a free library to manage OPML format in PHP.
+ *
+ * By default, it takes in consideration version 2.0 but can be compatible with
+ * OPML 1.0. More information on http://dev.opml.org.
+ * Difference is "text" attribute is optional in version 1.0. It is highly
+ * recommended to use this attribute.
+ *
+ * lib_opml requires SimpleXML (php.net/simplexml) and DOMDocument (php.net/domdocument)
+ *
+ * @author Marien Fressinaud <dev@marienfressinaud.fr>
+ * @link https://github.com/marienfressinaud/lib_opml
+ * @version 0.2-FreshRSS~1.5.1
+ * @license public domain
+ *
+ * Usages:
+ * > include('lib_opml.php');
+ * > $filename = 'my_opml_file.xml';
+ * > $opml_array = libopml_parse_file($filename);
+ * > print_r($opml_array);
+ *
+ * > $opml_string = [...];
+ * > $opml_array = libopml_parse_string($opml_string);
+ * > print_r($opml_array);
+ *
+ * > $opml_array = [...];
+ * > $opml_string = libopml_render($opml_array);
+ * > $opml_object = libopml_render($opml_array, true);
+ * > echo $opml_string;
+ * > print_r($opml_object);
+ *
+ * You can set $strict argument to false if you want to bypass "text" attribute
+ * requirement.
+ *
+ * If parsing fails for any reason (e.g. not an XML string, does not match with
+ * the specifications), a LibOPML_Exception is raised.
+ *
+ * lib_opml array format is described here:
+ * $array = array(
+ * 'head' => array( // 'head' element is optional (but recommended)
+ * 'key' => 'value', // key must be a part of available OPML head elements
+ * ),
+ * 'body' => array( // body is required
+ * array( // this array represents an outline (at least one)
+ * 'text' => 'value', // 'text' element is required if $strict is true
+ * 'key' => 'value', // key and value are what you want (optional)
+ * '@outlines' = array( // @outlines is a special value and represents sub-outlines
+ * array(
+ * [...] // where [...] is a valid outline definition
+ * ),
+ * ),
+ * ),
+ * array( // other outline definitions
+ * [...]
+ * ),
+ * [...],
+ * )
+ * )
+ *
+ */
+
+/**
+ * A simple Exception class which represents any kind of OPML problem.
+ * Message should precise the current problem.
+ */
+class LibOPML_Exception extends Exception {}
- foreach ($cat['feeds'] as $feed) {
- $txt .= "\t" . '<outline text="' . $feed->name () . '" type="rss" xmlUrl="' . $feed->url () . '" htmlUrl="' . $feed->website () . '" description="' . htmlspecialchars($feed->description(), ENT_COMPAT, 'UTF-8') . '" />' . "\n";
+
+// Define the list of available head attributes. All of them are optional.
+define('HEAD_ELEMENTS', serialize(array(
+ 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail',
+ 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop',
+ 'windowLeft', 'windowBottom', 'windowRight'
+)));
+
+
+/**
+ * Parse an XML object as an outline object and return corresponding array
+ *
+ * @param SimpleXMLElement $outline_xml the XML object we want to parse
+ * @param bool $strict true if "text" attribute is required, false else
+ * @return array corresponding to an outline and following format described above
+ * @throws LibOPML_Exception
+ * @access private
+ */
+function libopml_parse_outline($outline_xml, $strict = true) {
+ $outline = array();
+
+ // An outline may contain any kind of attributes but "text" attribute is
+ // required !
+ $text_is_present = false;
+ foreach ($outline_xml->attributes() as $key => $value) {
+ $outline[$key] = (string)$value;
+
+ if ($key === 'text') {
+ $text_is_present = true;
}
+ }
+
+ if (!$text_is_present && $strict) {
+ throw new LibOPML_Exception(
+ 'Outline does not contain any text attribute'
+ );
+ }
+
+ if (empty($outline['text']) && isset($outline['title'])) {
+ $outline['text'] = $outline['title'];
+ }
- $txt .= '</outline>' . "\n";
+ foreach ($outline_xml->children() as $key => $value) {
+ // An outline may contain any number of outline children
+ if ($key === 'outline') {
+ $outline['@outlines'][] = libopml_parse_outline($value, $strict);
+ } else {
+ throw new LibOPML_Exception(
+ 'Body can contain only outline elements'
+ );
+ }
}
- return $txt;
+ return $outline;
}
-function opml_import ($xml) {
- $xml = html_only_entity_decode($xml); //!\ Assume UTF-8
+/**
+ * Reformat the XML document as a hierarchy when
+ * the OPML 2.0 category attribute is used
+ */
+function preprocessing_categories($doc) {
+ $outline_categories = array();
+ $body = $doc->getElementsByTagName('body')->item(0);
+ $xpath = new DOMXpath($doc);
+ $outlines = $xpath->query('/opml/body/outline[@category]');
+ foreach ($outlines as $outline) {
+ $category = trim($outline->getAttribute('category'));
+ if ($category != '') {
+ $outline_categorie = null;
+ if (!isset($outline_categories[$category])) {
+ $outline_categorie = $doc->createElement('outline');
+ $outline_categorie->setAttribute('text', $category);
+ $body->insertBefore($outline_categorie, $body->firstChild);
+ $outline_categories[$category] = $outline_categorie;
+ } else {
+ $outline_categorie = $outline_categories[$category];
+ }
+ $outline->parentNode->removeChild($outline);
+ $outline_categorie->appendChild($outline);
+ }
+ }
+}
+/**
+ * Parse a string as a XML one and returns the corresponding array
+ *
+ * @param string $xml is the string we want to parse
+ * @param bool $strict true if "text" attribute is required, false else
+ * @return array corresponding to the XML string and following format described above
+ * @throws LibOPML_Exception
+ * @access public
+ */
+function libopml_parse_string($xml, $strict = true) {
$dom = new DOMDocument();
$dom->recover = true;
$dom->strictErrorChecking = false;
$dom->loadXML($xml);
$dom->encoding = 'UTF-8';
+ //Partial compatibility with the category attribute of OPML 2.0
+ preprocessing_categories($dom);
+
$opml = simplexml_import_dom($dom);
if (!$opml) {
- throw new FreshRSS_Opml_Exception ();
+ throw new LibOPML_Exception();
+ }
+
+ $array = array(
+ 'version' => (string)$opml['version'],
+ 'head' => array(),
+ 'body' => array()
+ );
+
+ // First, we get all "head" elements. Head is required but its sub-elements
+ // are optional.
+ foreach ($opml->head->children() as $key => $value) {
+ if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
+ $array['head'][$key] = (string)$value;
+ } else {
+ throw new LibOPML_Exception(
+ $key . 'is not part of OPML format'
+ );
+ }
}
- $catDAO = new FreshRSS_CategoryDAO();
- $catDAO->checkDefault();
- $defCat = $catDAO->getDefault();
+ // Then, we get body oulines. Body must contain at least one outline
+ // element.
+ $at_least_one_outline = false;
+ foreach ($opml->body->children() as $key => $value) {
+ if ($key === 'outline') {
+ $at_least_one_outline = true;
+ $array['body'][] = libopml_parse_outline($value, $strict);
+ } else {
+ throw new LibOPML_Exception(
+ 'Body can contain only outline elements'
+ );
+ }
+ }
- $categories = array ();
- $feeds = array ();
+ if (!$at_least_one_outline) {
+ throw new LibOPML_Exception(
+ 'Body must contain at least one outline element'
+ );
+ }
+
+ return $array;
+}
- foreach ($opml->body->outline as $outline) {
- if (!isset ($outline['xmlUrl'])) {
- // Catégorie
- $title = '';
- if (isset ($outline['text'])) {
- $title = (string) $outline['text'];
- } elseif (isset ($outline['title'])) {
- $title = (string) $outline['title'];
- }
+/**
+ * Parse a string contained into a file as a XML string and returns the corresponding array
+ *
+ * @param string $filename should indicates a valid XML file
+ * @param bool $strict true if "text" attribute is required, false else
+ * @return array corresponding to the file content and following format described above
+ * @throws LibOPML_Exception
+ * @access public
+ */
+function libopml_parse_file($filename, $strict = true) {
+ $file_content = file_get_contents($filename);
+
+ if ($file_content === false) {
+ throw new LibOPML_Exception(
+ $filename . ' cannot be found'
+ );
+ }
+
+ return libopml_parse_string($file_content, $strict);
+}
+
+
+/**
+ * Create a XML outline object in a parent object.
+ *
+ * @param SimpleXMLElement $parent_elt is the parent object of current outline
+ * @param array $outline array representing an outline object
+ * @param bool $strict true if "text" attribute is required, false else
+ * @throws LibOPML_Exception
+ * @access private
+ */
+function libopml_render_outline($parent_elt, $outline, $strict) {
+ // Outline MUST be an array!
+ if (!is_array($outline)) {
+ throw new LibOPML_Exception(
+ 'Outline element must be defined as array'
+ );
+ }
- if ($title) {
- // Permet d'éviter les soucis au niveau des id :
- // ceux-ci sont générés en fonction de la date,
- // un flux pourrait être dans une catégorie X avec l'id Y
- // alors qu'il existe déjà la catégorie X mais avec l'id Z
- // Y ne sera pas ajouté et le flux non plus vu que l'id
- // de sa catégorie n'exisera pas
- $title = htmlspecialchars($title, ENT_COMPAT, 'UTF-8');
- $catDAO = new FreshRSS_CategoryDAO ();
- $cat = $catDAO->searchByName ($title);
- if ($cat === false) {
- $cat = new FreshRSS_Category ($title);
- $values = array (
- 'name' => $cat->name ()
- );
- $cat->_id ($catDAO->addCategory ($values));
- }
-
- $feeds = array_merge ($feeds, getFeedsOutline ($outline, $cat->id ()));
+ $outline_elt = $parent_elt->addChild('outline');
+ $text_is_present = false;
+ foreach ($outline as $key => $value) {
+ // Only outlines can be an array and so we consider children are also
+ // outline elements.
+ if ($key === '@outlines' && is_array($value)) {
+ foreach ($value as $outline_child) {
+ libopml_render_outline($outline_elt, $outline_child, $strict);
}
+ } elseif (is_array($value)) {
+ throw new LibOPML_Exception(
+ 'Type of outline elements cannot be array: ' . $key
+ );
} else {
- // Flux rss sans catégorie, on récupère l'ajoute dans la catégorie par défaut
- $feeds[] = getFeed ($outline, $defCat->id());
+ // Detect text attribute is present, that's good :)
+ if ($key === 'text') {
+ $text_is_present = true;
+ }
+
+ $outline_elt->addAttribute($key, $value);
}
}
- return array ($categories, $feeds);
+ if (!$text_is_present && $strict) {
+ throw new LibOPML_Exception(
+ 'You must define at least a text element for all outlines'
+ );
+ }
}
+
/**
- * import all feeds of a given outline tag
+ * Render an array as an OPML string or a XML object.
+ *
+ * @param array $array is the array we want to render and must follow structure defined above
+ * @param bool $as_xml_object false if function must return a string, true for a XML object
+ * @param bool $strict true if "text" attribute is required, false else
+ * @return string|SimpleXMLElement XML string corresponding to $array or XML object
+ * @throws LibOPML_Exception
+ * @access public
*/
-function getFeedsOutline ($outline, $cat_id) {
- $feeds = array ();
+function libopml_render($array, $as_xml_object = false, $strict = true) {
+ $opml = new SimpleXMLElement('<opml></opml>');
+ $opml->addAttribute('version', $strict ? '2.0' : '1.0');
- foreach ($outline->children () as $child) {
- if (isset ($child['xmlUrl'])) {
- $feeds[] = getFeed ($child, $cat_id);
- } else {
- $feeds = array_merge(
- $feeds,
- getFeedsOutline ($child, $cat_id)
- );
+ // Create head element. $array['head'] is optional but head element will
+ // exist in the final XML object.
+ $head = $opml->addChild('head');
+ if (isset($array['head'])) {
+ foreach ($array['head'] as $key => $value) {
+ if (in_array($key, unserialize(HEAD_ELEMENTS), true)) {
+ $head->addChild($key, $value);
+ }
}
}
- return $feeds;
-}
+ // Check body is set and contains at least one element
+ if (!isset($array['body'])) {
+ throw new LibOPML_Exception(
+ '$array must contain a body element'
+ );
+ }
+ if (count($array['body']) <= 0) {
+ throw new LibOPML_Exception(
+ 'Body element must contain at least one element (array)'
+ );
+ }
+
+ // Create outline elements
+ $body = $opml->addChild('body');
+ foreach ($array['body'] as $outline) {
+ libopml_render_outline($body, $outline, $strict);
+ }
-function getFeed ($outline, $cat_id) {
- $url = (string) $outline['xmlUrl'];
- $url = htmlspecialchars($url, ENT_COMPAT, 'UTF-8');
- $title = '';
- if (isset ($outline['text'])) {
- $title = (string) $outline['text'];
- } elseif (isset ($outline['title'])) {
- $title = (string) $outline['title'];
- }
- $title = htmlspecialchars($title, ENT_COMPAT, 'UTF-8');
- $feed = new FreshRSS_Feed ($url);
- $feed->_category ($cat_id);
- $feed->_name ($title);
- if (isset($outline['htmlUrl'])) {
- $feed->_website(htmlspecialchars((string)$outline['htmlUrl'], ENT_COMPAT, 'UTF-8'));
- }
- if (isset($outline['description'])) {
- $feed->_description(sanitizeHTML((string)$outline['description']));
- }
- return $feed;
+ // And return the final result
+ if ($as_xml_object) {
+ return $opml;
+ } else {
+ $dom = dom_import_simplexml($opml)->ownerDocument;
+ $dom->formatOutput = true;
+ $dom->encoding = 'UTF-8';
+ return $dom->saveXML();
+ }
}