summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2013-12-25 14:21:29 +0100
committerGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2013-12-25 14:21:29 +0100
commit7e6d2eb6f4236b4f04bfb7c976f135a1f33cc107 (patch)
tree0eb4753d99e20bd60c6af91cd20e1f10e56eb83b
parentf0c8cd8847335c0fa1060f6b58e9bb829a8fdd81 (diff)
Encore plus de flux tolérés avec leurs erreurs
Corrige https://github.com/marienfressinaud/FreshRSS/issues/332
-rw-r--r--CHANGELOG1
-rw-r--r--lib/SimplePie/SimplePie.php13
-rw-r--r--lib/SimplePie/SimplePie/Parser.php30
3 files changed, 28 insertions, 16 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 0c816dbd7..05d3a50ec 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -43,6 +43,7 @@
* PHP :
* Meilleure gestion des caractères spéciaux dans différents cas
* Amélioration des performances
+ * Encore plus tolérant pour les flux comportant des erreurs
* Chargement automatique des classes
* Alternative dans le cas d’absence de librairie JSON
* Pour le développement, le cache HTTP peut être désactivé en créant un fichier “./no-cache.txt”
diff --git a/lib/SimplePie/SimplePie.php b/lib/SimplePie/SimplePie.php
index d20ab5430..f02037c10 100644
--- a/lib/SimplePie/SimplePie.php
+++ b/lib/SimplePie/SimplePie.php
@@ -1313,7 +1313,7 @@ class SimplePie
// First check to see if input has been overridden.
if ($this->input_encoding !== false)
{
- $encodings[] = $this->input_encoding;
+ $encodings[] = strtoupper($this->input_encoding);
}
$application_types = array('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity');
@@ -1330,18 +1330,18 @@ class SimplePie
}
else
{
- $encodings[] = ''; //Let the DOM parser decide first
+ $encodings[] = ''; //FreshRSS: Let the DOM parser decide first
}
}
elseif (in_array($sniffed, $text_types) || substr($sniffed, 0, 5) === 'text/' && substr($sniffed, -4) === '+xml')
{
if (isset($headers['content-type']) && preg_match('/;\x20?charset=([^;]*)/i', $headers['content-type'], $charset))
{
- $encodings[] = $charset[1];
+ $encodings[] = strtoupper($charset[1]);
}
else
{
- $encodings[] = '';
+ $encodings[] = ''; //FreshRSS: Let the DOM parser decide first
}
$encodings[] = 'US-ASCII';
}
@@ -1364,13 +1364,14 @@ class SimplePie
foreach ($encodings as $encoding)
{
// Change the encoding to UTF-8 (as we always use UTF-8 internally)
- if ($utf8_data = (empty($encoding) || $encoding === 'UTF-8') ? $this->raw_data : $this->registry->call('Misc', 'change_encoding', array($this->raw_data, $encoding, 'UTF-8')))
+ if ($utf8_data = (empty($encoding) || $encoding === 'UTF-8') ? $this->raw_data : //FreshRSS
+ $this->registry->call('Misc', 'change_encoding', array($this->raw_data, $encoding, 'UTF-8')))
{
// Create new parser
$parser = $this->registry->create('Parser');
// If it's parsed fine
- if ($parser->parse($utf8_data, 'UTF-8'))
+ if ($parser->parse($utf8_data, empty($encoding) ? '' : 'UTF-8')) //FreshRSS
{
$this->data = $parser->get_data();
if (!($this->get_type() & ~SIMPLEPIE_TYPE_NONE))
diff --git a/lib/SimplePie/SimplePie/Parser.php b/lib/SimplePie/SimplePie/Parser.php
index c4c732787..bd6c4efd8 100644
--- a/lib/SimplePie/SimplePie/Parser.php
+++ b/lib/SimplePie/SimplePie/Parser.php
@@ -77,6 +77,8 @@ class SimplePie_Parser
public function parse(&$data, $encoding)
{
+ $xmlEncoding = '';
+
if (!empty($encoding))
{
// Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
@@ -121,6 +123,7 @@ class SimplePie_Parser
$declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
if ($declaration->parse())
{
+ $xmlEncoding = strtoupper($declaration->encoding); //FreshRSS
$data = substr($data, $pos + 2);
$data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
}
@@ -132,17 +135,24 @@ class SimplePie_Parser
}
}
- try //FreshRSS
- {
- $dom = new DOMDocument();
- $dom->recover = true;
- $dom->strictErrorChecking = false;
- $dom->loadXML($data);
- $this->encoding = $encoding = $dom->encoding = 'UTF-8';
- $data = $dom->saveXML();
- }
- catch (Exception $e)
+ if ($xmlEncoding === '' || $xmlEncoding === 'UTF-8') //FreshRSS: case of no explicit HTTP encoding, and lax UTF-8
{
+ try
+ {
+ $dom = new DOMDocument();
+ $dom->recover = true;
+ $dom->strictErrorChecking = false;
+ $dom->loadXML($data);
+ $this->encoding = $encoding = $dom->encoding = 'UTF-8';
+ $data2 = $dom->saveXML();
+ if (strlen($data2) > (strlen($data) / 2.0))
+ {
+ $data = $data2;
+ }
+ }
+ catch (Exception $e)
+ {
+ }
}
$return = true;