diff options
| author | 2013-11-07 19:18:52 +0100 | |
|---|---|---|
| committer | 2013-11-07 19:18:52 +0100 | |
| commit | 6464666075170b006501c4f12d6a2f470300af46 (patch) | |
| tree | 28239a454392f3ceb5ea79063ef8cecaf1a3820c /lib/SimplePie | |
| parent | ec052432c081cd0db4717cd3ee3c3f9f47785acc (diff) | |
Plus de tolérance pour les flux
Utilise une méthode moins stricte qui tolère des erreurs XML dans les
flux.
Le choix de l'encodage en entrée a dû être légèrement changé car
SimplePie a une stratégie d'essayer plusieurs encodages jusqu'à en
trouver un qui marche.
En étant moins strict sur les erreurs, ça marche plus souvent, et du
coup les encodages n'étaient plus bon.
À essayer avec plein de flux.
Tous mes flux passent (~150).
Devrait permettre de fermer
https://github.com/marienfressinaud/FreshRSS/issues/233
Fonctionne aussi avec des flux à encodage invalide comme
http://travaux.ovh.net/rss.php qui se déclare en "text/xml" (du coup
ASCII) mais dans le flux avec un entête XML déclarant de l'UTF-8
Diffstat (limited to 'lib/SimplePie')
| -rw-r--r-- | lib/SimplePie/SimplePie/Parser.php | 105 |
1 files changed, 61 insertions, 44 deletions
diff --git a/lib/SimplePie/SimplePie/Parser.php b/lib/SimplePie/SimplePie/Parser.php index d698552ca..72878c25a 100644 --- a/lib/SimplePie/SimplePie/Parser.php +++ b/lib/SimplePie/SimplePie/Parser.php @@ -77,56 +77,73 @@ class SimplePie_Parser public function parse(&$data, $encoding) { - // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character - if (strtoupper($encoding) === 'US-ASCII') + if (!empty($encoding)) { - $this->encoding = 'UTF-8'; - } - else - { - $this->encoding = $encoding; - } - - // Strip BOM: - // UTF-32 Big Endian BOM - if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") - { - $data = substr($data, 4); - } - // UTF-32 Little Endian BOM - elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00") - { - $data = substr($data, 4); - } - // UTF-16 Big Endian BOM - elseif (substr($data, 0, 2) === "\xFE\xFF") - { - $data = substr($data, 2); - } - // UTF-16 Little Endian BOM - elseif (substr($data, 0, 2) === "\xFF\xFE") - { - $data = substr($data, 2); - } - // UTF-8 BOM - elseif (substr($data, 0, 3) === "\xEF\xBB\xBF") - { - $data = substr($data, 3); - } - - if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false) - { - $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5))); - if ($declaration->parse()) + // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character + if (strtoupper($encoding) === 'US-ASCII') { - $data = substr($data, $pos + 2); - $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data; + $this->encoding = 'UTF-8'; } else { - $this->error_string = 'SimplePie bug! Please report this!'; - return false; + $this->encoding = $encoding; + } + + // Strip BOM: + // UTF-32 Big Endian BOM + if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") + { + $data = substr($data, 4); } + // UTF-32 Little Endian BOM + elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00") + { + $data = substr($data, 4); + } + // UTF-16 Big Endian BOM + elseif (substr($data, 0, 2) === "\xFE\xFF") + { + $data = substr($data, 2); + } + // UTF-16 Little Endian BOM + elseif (substr($data, 0, 2) === "\xFF\xFE") + { + $data = substr($data, 2); + } + // UTF-8 BOM + elseif (substr($data, 0, 3) === "\xEF\xBB\xBF") + { + $data = substr($data, 3); + } + + if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false) + { + $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5))); + if ($declaration->parse()) + { + $data = substr($data, $pos + 2); + $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data; + } + else + { + $this->error_string = 'SimplePie bug! Please report this!'; + return false; + } + } + } + + try + { + $dom = new DOMDocument(); + $dom->recover = true; + $dom->strictErrorChecking = false; + $dom->loadXML($data); + $this->encoding = $encoding = $dom->encoding = 'UTF-8'; + $data = $dom->saveXML(); + //file_put_contents('/home/alex/public_html/alexandre.alapetite.fr/prive/FreshRSS/log/parser.log', date('c') . ' ' . 'OK' . "\n", FILE_APPEND); + } + catch (Exception $e) + { } $return = true; |
