diff options
Diffstat (limited to 'lib/lib_opml.php')
| -rw-r--r-- | lib/lib_opml.php | 382 |
1 files changed, 302 insertions, 80 deletions
diff --git a/lib/lib_opml.php b/lib/lib_opml.php index 9feb12ae0..b89e92977 100644 --- a/lib/lib_opml.php +++ b/lib/lib_opml.php @@ -1,120 +1,342 @@ <?php -function opml_export ($cats) { - $txt = ''; - foreach ($cats as $cat) { - $txt .= '<outline text="' . $cat['name'] . '">' . "\n"; +/** + * lib_opml is a free library to manage OPML format in PHP. + * + * By default, it takes in consideration version 2.0 but can be compatible with + * OPML 1.0. More information on http://dev.opml.org. + * Difference is "text" attribute is optional in version 1.0. It is highly + * recommended to use this attribute. + * + * lib_opml requires SimpleXML (php.net/simplexml) and DOMDocument (php.net/domdocument) + * + * @author Marien Fressinaud <dev@marienfressinaud.fr> + * @link https://github.com/marienfressinaud/lib_opml + * @version 0.2-FreshRSS~1.5.1 + * @license public domain + * + * Usages: + * > include('lib_opml.php'); + * > $filename = 'my_opml_file.xml'; + * > $opml_array = libopml_parse_file($filename); + * > print_r($opml_array); + * + * > $opml_string = [...]; + * > $opml_array = libopml_parse_string($opml_string); + * > print_r($opml_array); + * + * > $opml_array = [...]; + * > $opml_string = libopml_render($opml_array); + * > $opml_object = libopml_render($opml_array, true); + * > echo $opml_string; + * > print_r($opml_object); + * + * You can set $strict argument to false if you want to bypass "text" attribute + * requirement. + * + * If parsing fails for any reason (e.g. not an XML string, does not match with + * the specifications), a LibOPML_Exception is raised. + * + * lib_opml array format is described here: + * $array = array( + * 'head' => array( // 'head' element is optional (but recommended) + * 'key' => 'value', // key must be a part of available OPML head elements + * ), + * 'body' => array( // body is required + * array( // this array represents an outline (at least one) + * 'text' => 'value', // 'text' element is required if $strict is true + * 'key' => 'value', // key and value are what you want (optional) + * '@outlines' = array( // @outlines is a special value and represents sub-outlines + * array( + * [...] // where [...] is a valid outline definition + * ), + * ), + * ), + * array( // other outline definitions + * [...] + * ), + * [...], + * ) + * ) + * + */ + +/** + * A simple Exception class which represents any kind of OPML problem. + * Message should precise the current problem. + */ +class LibOPML_Exception extends Exception {} - foreach ($cat['feeds'] as $feed) { - $txt .= "\t" . '<outline text="' . $feed->name () . '" type="rss" xmlUrl="' . $feed->url () . '" htmlUrl="' . $feed->website () . '" description="' . htmlspecialchars($feed->description(), ENT_COMPAT, 'UTF-8') . '" />' . "\n"; + +// Define the list of available head attributes. All of them are optional. +define('HEAD_ELEMENTS', serialize(array( + 'title', 'dateCreated', 'dateModified', 'ownerName', 'ownerEmail', + 'ownerId', 'docs', 'expansionState', 'vertScrollState', 'windowTop', + 'windowLeft', 'windowBottom', 'windowRight' +))); + + +/** + * Parse an XML object as an outline object and return corresponding array + * + * @param SimpleXMLElement $outline_xml the XML object we want to parse + * @param bool $strict true if "text" attribute is required, false else + * @return array corresponding to an outline and following format described above + * @throws LibOPML_Exception + * @access private + */ +function libopml_parse_outline($outline_xml, $strict = true) { + $outline = array(); + + // An outline may contain any kind of attributes but "text" attribute is + // required ! + $text_is_present = false; + foreach ($outline_xml->attributes() as $key => $value) { + $outline[$key] = (string)$value; + + if ($key === 'text') { + $text_is_present = true; } + } + + if (!$text_is_present && $strict) { + throw new LibOPML_Exception( + 'Outline does not contain any text attribute' + ); + } + + if (empty($outline['text']) && isset($outline['title'])) { + $outline['text'] = $outline['title']; + } - $txt .= '</outline>' . "\n"; + foreach ($outline_xml->children() as $key => $value) { + // An outline may contain any number of outline children + if ($key === 'outline') { + $outline['@outlines'][] = libopml_parse_outline($value, $strict); + } else { + throw new LibOPML_Exception( + 'Body can contain only outline elements' + ); + } } - return $txt; + return $outline; } -function opml_import ($xml) { - $xml = html_only_entity_decode($xml); //!\ Assume UTF-8 +/** + * Reformat the XML document as a hierarchy when + * the OPML 2.0 category attribute is used + */ +function preprocessing_categories($doc) { + $outline_categories = array(); + $body = $doc->getElementsByTagName('body')->item(0); + $xpath = new DOMXpath($doc); + $outlines = $xpath->query('/opml/body/outline[@category]'); + foreach ($outlines as $outline) { + $category = trim($outline->getAttribute('category')); + if ($category != '') { + $outline_categorie = null; + if (!isset($outline_categories[$category])) { + $outline_categorie = $doc->createElement('outline'); + $outline_categorie->setAttribute('text', $category); + $body->insertBefore($outline_categorie, $body->firstChild); + $outline_categories[$category] = $outline_categorie; + } else { + $outline_categorie = $outline_categories[$category]; + } + $outline->parentNode->removeChild($outline); + $outline_categorie->appendChild($outline); + } + } +} +/** + * Parse a string as a XML one and returns the corresponding array + * + * @param string $xml is the string we want to parse + * @param bool $strict true if "text" attribute is required, false else + * @return array corresponding to the XML string and following format described above + * @throws LibOPML_Exception + * @access public + */ +function libopml_parse_string($xml, $strict = true) { $dom = new DOMDocument(); $dom->recover = true; $dom->strictErrorChecking = false; $dom->loadXML($xml); $dom->encoding = 'UTF-8'; + //Partial compatibility with the category attribute of OPML 2.0 + preprocessing_categories($dom); + $opml = simplexml_import_dom($dom); if (!$opml) { - throw new FreshRSS_Opml_Exception (); + throw new LibOPML_Exception(); + } + + $array = array( + 'version' => (string)$opml['version'], + 'head' => array(), + 'body' => array() + ); + + // First, we get all "head" elements. Head is required but its sub-elements + // are optional. + foreach ($opml->head->children() as $key => $value) { + if (in_array($key, unserialize(HEAD_ELEMENTS), true)) { + $array['head'][$key] = (string)$value; + } else { + throw new LibOPML_Exception( + $key . 'is not part of OPML format' + ); + } } - $catDAO = new FreshRSS_CategoryDAO(); - $catDAO->checkDefault(); - $defCat = $catDAO->getDefault(); + // Then, we get body oulines. Body must contain at least one outline + // element. + $at_least_one_outline = false; + foreach ($opml->body->children() as $key => $value) { + if ($key === 'outline') { + $at_least_one_outline = true; + $array['body'][] = libopml_parse_outline($value, $strict); + } else { + throw new LibOPML_Exception( + 'Body can contain only outline elements' + ); + } + } - $categories = array (); - $feeds = array (); + if (!$at_least_one_outline) { + throw new LibOPML_Exception( + 'Body must contain at least one outline element' + ); + } + + return $array; +} - foreach ($opml->body->outline as $outline) { - if (!isset ($outline['xmlUrl'])) { - // Catégorie - $title = ''; - if (isset ($outline['text'])) { - $title = (string) $outline['text']; - } elseif (isset ($outline['title'])) { - $title = (string) $outline['title']; - } +/** + * Parse a string contained into a file as a XML string and returns the corresponding array + * + * @param string $filename should indicates a valid XML file + * @param bool $strict true if "text" attribute is required, false else + * @return array corresponding to the file content and following format described above + * @throws LibOPML_Exception + * @access public + */ +function libopml_parse_file($filename, $strict = true) { + $file_content = file_get_contents($filename); + + if ($file_content === false) { + throw new LibOPML_Exception( + $filename . ' cannot be found' + ); + } + + return libopml_parse_string($file_content, $strict); +} + + +/** + * Create a XML outline object in a parent object. + * + * @param SimpleXMLElement $parent_elt is the parent object of current outline + * @param array $outline array representing an outline object + * @param bool $strict true if "text" attribute is required, false else + * @throws LibOPML_Exception + * @access private + */ +function libopml_render_outline($parent_elt, $outline, $strict) { + // Outline MUST be an array! + if (!is_array($outline)) { + throw new LibOPML_Exception( + 'Outline element must be defined as array' + ); + } - if ($title) { - // Permet d'éviter les soucis au niveau des id : - // ceux-ci sont générés en fonction de la date, - // un flux pourrait être dans une catégorie X avec l'id Y - // alors qu'il existe déjà la catégorie X mais avec l'id Z - // Y ne sera pas ajouté et le flux non plus vu que l'id - // de sa catégorie n'exisera pas - $title = htmlspecialchars($title, ENT_COMPAT, 'UTF-8'); - $catDAO = new FreshRSS_CategoryDAO (); - $cat = $catDAO->searchByName ($title); - if ($cat === false) { - $cat = new FreshRSS_Category ($title); - $values = array ( - 'name' => $cat->name () - ); - $cat->_id ($catDAO->addCategory ($values)); - } - - $feeds = array_merge ($feeds, getFeedsOutline ($outline, $cat->id ())); + $outline_elt = $parent_elt->addChild('outline'); + $text_is_present = false; + foreach ($outline as $key => $value) { + // Only outlines can be an array and so we consider children are also + // outline elements. + if ($key === '@outlines' && is_array($value)) { + foreach ($value as $outline_child) { + libopml_render_outline($outline_elt, $outline_child, $strict); } + } elseif (is_array($value)) { + throw new LibOPML_Exception( + 'Type of outline elements cannot be array: ' . $key + ); } else { - // Flux rss sans catégorie, on récupère l'ajoute dans la catégorie par défaut - $feeds[] = getFeed ($outline, $defCat->id()); + // Detect text attribute is present, that's good :) + if ($key === 'text') { + $text_is_present = true; + } + + $outline_elt->addAttribute($key, $value); } } - return array ($categories, $feeds); + if (!$text_is_present && $strict) { + throw new LibOPML_Exception( + 'You must define at least a text element for all outlines' + ); + } } + /** - * import all feeds of a given outline tag + * Render an array as an OPML string or a XML object. + * + * @param array $array is the array we want to render and must follow structure defined above + * @param bool $as_xml_object false if function must return a string, true for a XML object + * @param bool $strict true if "text" attribute is required, false else + * @return string|SimpleXMLElement XML string corresponding to $array or XML object + * @throws LibOPML_Exception + * @access public */ -function getFeedsOutline ($outline, $cat_id) { - $feeds = array (); +function libopml_render($array, $as_xml_object = false, $strict = true) { + $opml = new SimpleXMLElement('<opml></opml>'); + $opml->addAttribute('version', $strict ? '2.0' : '1.0'); - foreach ($outline->children () as $child) { - if (isset ($child['xmlUrl'])) { - $feeds[] = getFeed ($child, $cat_id); - } else { - $feeds = array_merge( - $feeds, - getFeedsOutline ($child, $cat_id) - ); + // Create head element. $array['head'] is optional but head element will + // exist in the final XML object. + $head = $opml->addChild('head'); + if (isset($array['head'])) { + foreach ($array['head'] as $key => $value) { + if (in_array($key, unserialize(HEAD_ELEMENTS), true)) { + $head->addChild($key, $value); + } } } - return $feeds; -} + // Check body is set and contains at least one element + if (!isset($array['body'])) { + throw new LibOPML_Exception( + '$array must contain a body element' + ); + } + if (count($array['body']) <= 0) { + throw new LibOPML_Exception( + 'Body element must contain at least one element (array)' + ); + } + + // Create outline elements + $body = $opml->addChild('body'); + foreach ($array['body'] as $outline) { + libopml_render_outline($body, $outline, $strict); + } -function getFeed ($outline, $cat_id) { - $url = (string) $outline['xmlUrl']; - $url = htmlspecialchars($url, ENT_COMPAT, 'UTF-8'); - $title = ''; - if (isset ($outline['text'])) { - $title = (string) $outline['text']; - } elseif (isset ($outline['title'])) { - $title = (string) $outline['title']; - } - $title = htmlspecialchars($title, ENT_COMPAT, 'UTF-8'); - $feed = new FreshRSS_Feed ($url); - $feed->_category ($cat_id); - $feed->_name ($title); - if (isset($outline['htmlUrl'])) { - $feed->_website(htmlspecialchars((string)$outline['htmlUrl'], ENT_COMPAT, 'UTF-8')); - } - if (isset($outline['description'])) { - $feed->_description(sanitizeHTML((string)$outline['description'])); - } - return $feed; + // And return the final result + if ($as_xml_object) { + return $opml; + } else { + $dom = dom_import_simplexml($opml)->ownerDocument; + $dom->formatOutput = true; + $dom->encoding = 'UTF-8'; + return $dom->saveXML(); + } } |
