summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2013-11-07 19:18:52 +0100
committerGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2013-11-07 19:18:52 +0100
commit6464666075170b006501c4f12d6a2f470300af46 (patch)
tree28239a454392f3ceb5ea79063ef8cecaf1a3820c
parentec052432c081cd0db4717cd3ee3c3f9f47785acc (diff)
Plus de tolérance pour les flux
Utilise une méthode moins stricte qui tolère des erreurs XML dans les flux. Le choix de l'encodage en entrée a dû être légèrement changé car SimplePie a une stratégie d'essayer plusieurs encodages jusqu'à en trouver un qui marche. En étant moins strict sur les erreurs, ça marche plus souvent, et du coup les encodages n'étaient plus bon. À essayer avec plein de flux. Tous mes flux passent (~150). Devrait permettre de fermer https://github.com/marienfressinaud/FreshRSS/issues/233 Fonctionne aussi avec des flux à encodage invalide comme http://travaux.ovh.net/rss.php qui se déclare en "text/xml" (du coup ASCII) mais dans le flux avec un entête XML déclarant de l'UTF-8
-rw-r--r--app/i18n/en.php2
-rw-r--r--app/i18n/fr.php2
-rw-r--r--app/models/Feed.php2
-rw-r--r--lib/SimplePie/SimplePie/Parser.php105
4 files changed, 64 insertions, 47 deletions
diff --git a/app/i18n/en.php b/app/i18n/en.php
index bf4d3f550..9366287c3 100644
--- a/app/i18n/en.php
+++ b/app/i18n/en.php
@@ -76,7 +76,7 @@ return array (
'already_subscribed' => 'You have already subscribed to <em>%s</em>',
'feed_added' => 'RSS feed <em>%s</em> has been added',
'feed_not_added' => '<em>%s</em> could not be added',
- 'internal_problem_feed' => 'An internal problem occurred, RSS feed could not be added',
+ 'internal_problem_feed' => 'The RSS feed could not be added. Check FressRSS logs for details.',
'invalid_url' => 'URL <em>%s</em> is invalid',
'feed_actualized' => '<em>%s</em> has been updated',
'n_feeds_actualized' => '%d feeds have been updated',
diff --git a/app/i18n/fr.php b/app/i18n/fr.php
index 3af8fef7c..0cb40fd82 100644
--- a/app/i18n/fr.php
+++ b/app/i18n/fr.php
@@ -76,7 +76,7 @@ return array (
'already_subscribed' => 'Vous êtes déjà abonné à <em>%s</em>',
'feed_added' => 'Le flux <em>%s</em> a bien été ajouté',
'feed_not_added' => '<em>%s</em> n’ a pas pu être ajouté',
- 'internal_problem_feed' => 'Un problème interne a été rencontré, le flux n’a pas pu être ajouté',
+ 'internal_problem_feed' => 'Le flux n’a pas pu être ajouté. Consulter les logs de FreshRSS pour plus de détails.',
'invalid_url' => 'L’url <em>%s</em> est invalide',
'feed_actualized' => '<em>%s</em> a été mis à jour',
'n_feeds_actualized' => '%d flux ont été mis à jour',
diff --git a/app/models/Feed.php b/app/models/Feed.php
index 97fa7aabc..3877c5b33 100644
--- a/app/models/Feed.php
+++ b/app/models/Feed.php
@@ -117,7 +117,7 @@ class Feed extends Model {
if (empty ($value)) {
throw new BadUrlException ($value);
}
- if (!preg_match ('#^https?://#', $value)) {
+ if (!preg_match ('#^https?://#i', $value)) {
$value = 'http://' . $value;
}
diff --git a/lib/SimplePie/SimplePie/Parser.php b/lib/SimplePie/SimplePie/Parser.php
index d698552ca..72878c25a 100644
--- a/lib/SimplePie/SimplePie/Parser.php
+++ b/lib/SimplePie/SimplePie/Parser.php
@@ -77,56 +77,73 @@ class SimplePie_Parser
public function parse(&$data, $encoding)
{
- // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
- if (strtoupper($encoding) === 'US-ASCII')
+ if (!empty($encoding))
{
- $this->encoding = 'UTF-8';
- }
- else
- {
- $this->encoding = $encoding;
- }
-
- // Strip BOM:
- // UTF-32 Big Endian BOM
- if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
- {
- $data = substr($data, 4);
- }
- // UTF-32 Little Endian BOM
- elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
- {
- $data = substr($data, 4);
- }
- // UTF-16 Big Endian BOM
- elseif (substr($data, 0, 2) === "\xFE\xFF")
- {
- $data = substr($data, 2);
- }
- // UTF-16 Little Endian BOM
- elseif (substr($data, 0, 2) === "\xFF\xFE")
- {
- $data = substr($data, 2);
- }
- // UTF-8 BOM
- elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
- {
- $data = substr($data, 3);
- }
-
- if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
- {
- $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
- if ($declaration->parse())
+ // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
+ if (strtoupper($encoding) === 'US-ASCII')
{
- $data = substr($data, $pos + 2);
- $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
+ $this->encoding = 'UTF-8';
}
else
{
- $this->error_string = 'SimplePie bug! Please report this!';
- return false;
+ $this->encoding = $encoding;
+ }
+
+ // Strip BOM:
+ // UTF-32 Big Endian BOM
+ if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
+ {
+ $data = substr($data, 4);
}
+ // UTF-32 Little Endian BOM
+ elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
+ {
+ $data = substr($data, 4);
+ }
+ // UTF-16 Big Endian BOM
+ elseif (substr($data, 0, 2) === "\xFE\xFF")
+ {
+ $data = substr($data, 2);
+ }
+ // UTF-16 Little Endian BOM
+ elseif (substr($data, 0, 2) === "\xFF\xFE")
+ {
+ $data = substr($data, 2);
+ }
+ // UTF-8 BOM
+ elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
+ {
+ $data = substr($data, 3);
+ }
+
+ if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
+ {
+ $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
+ if ($declaration->parse())
+ {
+ $data = substr($data, $pos + 2);
+ $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
+ }
+ else
+ {
+ $this->error_string = 'SimplePie bug! Please report this!';
+ return false;
+ }
+ }
+ }
+
+ try
+ {
+ $dom = new DOMDocument();
+ $dom->recover = true;
+ $dom->strictErrorChecking = false;
+ $dom->loadXML($data);
+ $this->encoding = $encoding = $dom->encoding = 'UTF-8';
+ $data = $dom->saveXML();
+ //file_put_contents('/home/alex/public_html/alexandre.alapetite.fr/prive/FreshRSS/log/parser.log', date('c') . ' ' . 'OK' . "\n", FILE_APPEND);
+ }
+ catch (Exception $e)
+ {
}
$return = true;