aboutsummaryrefslogtreecommitdiff
path: root/lib/SimplePie
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2013-11-07 19:18:52 +0100
committerGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2013-11-07 19:18:52 +0100
commit6464666075170b006501c4f12d6a2f470300af46 (patch)
tree28239a454392f3ceb5ea79063ef8cecaf1a3820c /lib/SimplePie
parentec052432c081cd0db4717cd3ee3c3f9f47785acc (diff)
Plus de tolérance pour les flux
Utilise une méthode moins stricte qui tolère des erreurs XML dans les flux. Le choix de l'encodage en entrée a dû être légèrement changé car SimplePie a une stratégie d'essayer plusieurs encodages jusqu'à en trouver un qui marche. En étant moins strict sur les erreurs, ça marche plus souvent, et du coup les encodages n'étaient plus bon. À essayer avec plein de flux. Tous mes flux passent (~150). Devrait permettre de fermer https://github.com/marienfressinaud/FreshRSS/issues/233 Fonctionne aussi avec des flux à encodage invalide comme http://travaux.ovh.net/rss.php qui se déclare en "text/xml" (du coup ASCII) mais dans le flux avec un entête XML déclarant de l'UTF-8
Diffstat (limited to 'lib/SimplePie')
-rw-r--r--lib/SimplePie/SimplePie/Parser.php105
1 files changed, 61 insertions, 44 deletions
diff --git a/lib/SimplePie/SimplePie/Parser.php b/lib/SimplePie/SimplePie/Parser.php
index d698552ca..72878c25a 100644
--- a/lib/SimplePie/SimplePie/Parser.php
+++ b/lib/SimplePie/SimplePie/Parser.php
@@ -77,56 +77,73 @@ class SimplePie_Parser
public function parse(&$data, $encoding)
{
- // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
- if (strtoupper($encoding) === 'US-ASCII')
+ if (!empty($encoding))
{
- $this->encoding = 'UTF-8';
- }
- else
- {
- $this->encoding = $encoding;
- }
-
- // Strip BOM:
- // UTF-32 Big Endian BOM
- if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
- {
- $data = substr($data, 4);
- }
- // UTF-32 Little Endian BOM
- elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
- {
- $data = substr($data, 4);
- }
- // UTF-16 Big Endian BOM
- elseif (substr($data, 0, 2) === "\xFE\xFF")
- {
- $data = substr($data, 2);
- }
- // UTF-16 Little Endian BOM
- elseif (substr($data, 0, 2) === "\xFF\xFE")
- {
- $data = substr($data, 2);
- }
- // UTF-8 BOM
- elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
- {
- $data = substr($data, 3);
- }
-
- if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
- {
- $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
- if ($declaration->parse())
+ // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
+ if (strtoupper($encoding) === 'US-ASCII')
{
- $data = substr($data, $pos + 2);
- $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
+ $this->encoding = 'UTF-8';
}
else
{
- $this->error_string = 'SimplePie bug! Please report this!';
- return false;
+ $this->encoding = $encoding;
+ }
+
+ // Strip BOM:
+ // UTF-32 Big Endian BOM
+ if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
+ {
+ $data = substr($data, 4);
}
+ // UTF-32 Little Endian BOM
+ elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
+ {
+ $data = substr($data, 4);
+ }
+ // UTF-16 Big Endian BOM
+ elseif (substr($data, 0, 2) === "\xFE\xFF")
+ {
+ $data = substr($data, 2);
+ }
+ // UTF-16 Little Endian BOM
+ elseif (substr($data, 0, 2) === "\xFF\xFE")
+ {
+ $data = substr($data, 2);
+ }
+ // UTF-8 BOM
+ elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
+ {
+ $data = substr($data, 3);
+ }
+
+ if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
+ {
+ $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
+ if ($declaration->parse())
+ {
+ $data = substr($data, $pos + 2);
+ $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
+ }
+ else
+ {
+ $this->error_string = 'SimplePie bug! Please report this!';
+ return false;
+ }
+ }
+ }
+
+ try
+ {
+ $dom = new DOMDocument();
+ $dom->recover = true;
+ $dom->strictErrorChecking = false;
+ $dom->loadXML($data);
+ $this->encoding = $encoding = $dom->encoding = 'UTF-8';
+ $data = $dom->saveXML();
+ //file_put_contents('/home/alex/public_html/alexandre.alapetite.fr/prive/FreshRSS/log/parser.log', date('c') . ' ' . 'OK' . "\n", FILE_APPEND);
+ }
+ catch (Exception $e)
+ {
}
$return = true;