diff options
| author | 2013-11-07 19:18:52 +0100 | |
|---|---|---|
| committer | 2013-11-07 19:18:52 +0100 | |
| commit | 6464666075170b006501c4f12d6a2f470300af46 (patch) | |
| tree | 28239a454392f3ceb5ea79063ef8cecaf1a3820c | |
| parent | ec052432c081cd0db4717cd3ee3c3f9f47785acc (diff) | |
Plus de tolérance pour les flux
Utilise une méthode moins stricte qui tolère des erreurs XML dans les
flux.
Le choix de l'encodage en entrée a dû être légèrement changé car
SimplePie a une stratégie d'essayer plusieurs encodages jusqu'à en
trouver un qui marche.
En étant moins strict sur les erreurs, ça marche plus souvent, et du
coup les encodages n'étaient plus bon.
À essayer avec plein de flux.
Tous mes flux passent (~150).
Devrait permettre de fermer
https://github.com/marienfressinaud/FreshRSS/issues/233
Fonctionne aussi avec des flux à encodage invalide comme
http://travaux.ovh.net/rss.php qui se déclare en "text/xml" (du coup
ASCII) mais dans le flux avec un entête XML déclarant de l'UTF-8
| -rw-r--r-- | app/i18n/en.php | 2 | ||||
| -rw-r--r-- | app/i18n/fr.php | 2 | ||||
| -rw-r--r-- | app/models/Feed.php | 2 | ||||
| -rw-r--r-- | lib/SimplePie/SimplePie/Parser.php | 105 |
4 files changed, 64 insertions, 47 deletions
diff --git a/app/i18n/en.php b/app/i18n/en.php index bf4d3f550..9366287c3 100644 --- a/app/i18n/en.php +++ b/app/i18n/en.php @@ -76,7 +76,7 @@ return array ( 'already_subscribed' => 'You have already subscribed to <em>%s</em>', 'feed_added' => 'RSS feed <em>%s</em> has been added', 'feed_not_added' => '<em>%s</em> could not be added', - 'internal_problem_feed' => 'An internal problem occurred, RSS feed could not be added', + 'internal_problem_feed' => 'The RSS feed could not be added. Check FressRSS logs for details.', 'invalid_url' => 'URL <em>%s</em> is invalid', 'feed_actualized' => '<em>%s</em> has been updated', 'n_feeds_actualized' => '%d feeds have been updated', diff --git a/app/i18n/fr.php b/app/i18n/fr.php index 3af8fef7c..0cb40fd82 100644 --- a/app/i18n/fr.php +++ b/app/i18n/fr.php @@ -76,7 +76,7 @@ return array ( 'already_subscribed' => 'Vous êtes déjà abonné à <em>%s</em>', 'feed_added' => 'Le flux <em>%s</em> a bien été ajouté', 'feed_not_added' => '<em>%s</em> n’ a pas pu être ajouté', - 'internal_problem_feed' => 'Un problème interne a été rencontré, le flux n’a pas pu être ajouté', + 'internal_problem_feed' => 'Le flux n’a pas pu être ajouté. Consulter les logs de FreshRSS pour plus de détails.', 'invalid_url' => 'L’url <em>%s</em> est invalide', 'feed_actualized' => '<em>%s</em> a été mis à jour', 'n_feeds_actualized' => '%d flux ont été mis à jour', diff --git a/app/models/Feed.php b/app/models/Feed.php index 97fa7aabc..3877c5b33 100644 --- a/app/models/Feed.php +++ b/app/models/Feed.php @@ -117,7 +117,7 @@ class Feed extends Model { if (empty ($value)) { throw new BadUrlException ($value); } - if (!preg_match ('#^https?://#', $value)) { + if (!preg_match ('#^https?://#i', $value)) { $value = 'http://' . $value; } diff --git a/lib/SimplePie/SimplePie/Parser.php b/lib/SimplePie/SimplePie/Parser.php index d698552ca..72878c25a 100644 --- a/lib/SimplePie/SimplePie/Parser.php +++ b/lib/SimplePie/SimplePie/Parser.php @@ -77,56 +77,73 @@ class SimplePie_Parser public function parse(&$data, $encoding) { - // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character - if (strtoupper($encoding) === 'US-ASCII') + if (!empty($encoding)) { - $this->encoding = 'UTF-8'; - } - else - { - $this->encoding = $encoding; - } - - // Strip BOM: - // UTF-32 Big Endian BOM - if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") - { - $data = substr($data, 4); - } - // UTF-32 Little Endian BOM - elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00") - { - $data = substr($data, 4); - } - // UTF-16 Big Endian BOM - elseif (substr($data, 0, 2) === "\xFE\xFF") - { - $data = substr($data, 2); - } - // UTF-16 Little Endian BOM - elseif (substr($data, 0, 2) === "\xFF\xFE") - { - $data = substr($data, 2); - } - // UTF-8 BOM - elseif (substr($data, 0, 3) === "\xEF\xBB\xBF") - { - $data = substr($data, 3); - } - - if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false) - { - $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5))); - if ($declaration->parse()) + // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character + if (strtoupper($encoding) === 'US-ASCII') { - $data = substr($data, $pos + 2); - $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data; + $this->encoding = 'UTF-8'; } else { - $this->error_string = 'SimplePie bug! Please report this!'; - return false; + $this->encoding = $encoding; + } + + // Strip BOM: + // UTF-32 Big Endian BOM + if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") + { + $data = substr($data, 4); } + // UTF-32 Little Endian BOM + elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00") + { + $data = substr($data, 4); + } + // UTF-16 Big Endian BOM + elseif (substr($data, 0, 2) === "\xFE\xFF") + { + $data = substr($data, 2); + } + // UTF-16 Little Endian BOM + elseif (substr($data, 0, 2) === "\xFF\xFE") + { + $data = substr($data, 2); + } + // UTF-8 BOM + elseif (substr($data, 0, 3) === "\xEF\xBB\xBF") + { + $data = substr($data, 3); + } + + if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false) + { + $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5))); + if ($declaration->parse()) + { + $data = substr($data, $pos + 2); + $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data; + } + else + { + $this->error_string = 'SimplePie bug! Please report this!'; + return false; + } + } + } + + try + { + $dom = new DOMDocument(); + $dom->recover = true; + $dom->strictErrorChecking = false; + $dom->loadXML($data); + $this->encoding = $encoding = $dom->encoding = 'UTF-8'; + $data = $dom->saveXML(); + //file_put_contents('/home/alex/public_html/alexandre.alapetite.fr/prive/FreshRSS/log/parser.log', date('c') . ' ' . 'OK' . "\n", FILE_APPEND); + } + catch (Exception $e) + { } $return = true; |
