From 6464666075170b006501c4f12d6a2f470300af46 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Thu, 7 Nov 2013 19:18:52 +0100 Subject: Plus de tolérance pour les flux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Utilise une méthode moins stricte qui tolère des erreurs XML dans les flux. Le choix de l'encodage en entrée a dû être légèrement changé car SimplePie a une stratégie d'essayer plusieurs encodages jusqu'à en trouver un qui marche. En étant moins strict sur les erreurs, ça marche plus souvent, et du coup les encodages n'étaient plus bon. À essayer avec plein de flux. Tous mes flux passent (~150). Devrait permettre de fermer https://github.com/marienfressinaud/FreshRSS/issues/233 Fonctionne aussi avec des flux à encodage invalide comme http://travaux.ovh.net/rss.php qui se déclare en "text/xml" (du coup ASCII) mais dans le flux avec un entête XML déclarant de l'UTF-8 --- lib/SimplePie/SimplePie/Parser.php | 105 +++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 44 deletions(-) (limited to 'lib/SimplePie') diff --git a/lib/SimplePie/SimplePie/Parser.php b/lib/SimplePie/SimplePie/Parser.php index d698552ca..72878c25a 100644 --- a/lib/SimplePie/SimplePie/Parser.php +++ b/lib/SimplePie/SimplePie/Parser.php @@ -77,56 +77,73 @@ class SimplePie_Parser public function parse(&$data, $encoding) { - // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character - if (strtoupper($encoding) === 'US-ASCII') + if (!empty($encoding)) { - $this->encoding = 'UTF-8'; - } - else - { - $this->encoding = $encoding; - } - - // Strip BOM: - // UTF-32 Big Endian BOM - if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") - { - $data = substr($data, 4); - } - // UTF-32 Little Endian BOM - elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00") - { - $data = substr($data, 4); - } - // UTF-16 Big Endian BOM - elseif (substr($data, 0, 2) === "\xFE\xFF") - { - $data = substr($data, 2); - } - // UTF-16 Little Endian BOM - elseif (substr($data, 0, 2) === "\xFF\xFE") - { - $data = substr($data, 2); - } - // UTF-8 BOM - elseif (substr($data, 0, 3) === "\xEF\xBB\xBF") - { - $data = substr($data, 3); - } - - if (substr($data, 0, 5) === '')) !== false) - { - $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5))); - if ($declaration->parse()) + // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character + if (strtoupper($encoding) === 'US-ASCII') { - $data = substr($data, $pos + 2); - $data = 'version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data; + $this->encoding = 'UTF-8'; } else { - $this->error_string = 'SimplePie bug! Please report this!'; - return false; + $this->encoding = $encoding; + } + + // Strip BOM: + // UTF-32 Big Endian BOM + if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") + { + $data = substr($data, 4); } + // UTF-32 Little Endian BOM + elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00") + { + $data = substr($data, 4); + } + // UTF-16 Big Endian BOM + elseif (substr($data, 0, 2) === "\xFE\xFF") + { + $data = substr($data, 2); + } + // UTF-16 Little Endian BOM + elseif (substr($data, 0, 2) === "\xFF\xFE") + { + $data = substr($data, 2); + } + // UTF-8 BOM + elseif (substr($data, 0, 3) === "\xEF\xBB\xBF") + { + $data = substr($data, 3); + } + + if (substr($data, 0, 5) === '')) !== false) + { + $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5))); + if ($declaration->parse()) + { + $data = substr($data, $pos + 2); + $data = 'version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data; + } + else + { + $this->error_string = 'SimplePie bug! Please report this!'; + return false; + } + } + } + + try + { + $dom = new DOMDocument(); + $dom->recover = true; + $dom->strictErrorChecking = false; + $dom->loadXML($data); + $this->encoding = $encoding = $dom->encoding = 'UTF-8'; + $data = $dom->saveXML(); + //file_put_contents('/home/alex/public_html/alexandre.alapetite.fr/prive/FreshRSS/log/parser.log', date('c') . ' ' . 'OK' . "\n", FILE_APPEND); + } + catch (Exception $e) + { } $return = true; -- cgit v1.2.3 From 316778ef47394b27047abd9c9a739c34d2fd3829 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Thu, 7 Nov 2013 19:27:52 +0100 Subject: Un morceau de tolérance XML oublié MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/SimplePie/SimplePie.php | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'lib/SimplePie') diff --git a/lib/SimplePie/SimplePie.php b/lib/SimplePie/SimplePie.php index b33c635f1..9e532023a 100644 --- a/lib/SimplePie/SimplePie.php +++ b/lib/SimplePie/SimplePie.php @@ -1311,8 +1311,10 @@ class SimplePie { $encodings[] = strtoupper($charset[1]); } - $encodings = array_merge($encodings, $this->registry->call('Misc', 'xml_encoding', array($this->raw_data, &$this->registry))); - $encodings[] = 'UTF-8'; + else + { + $encodings[] = ''; //Let the DOM parser decide first + } } elseif (in_array($sniffed, $text_types) || substr($sniffed, 0, 5) === 'text/' && substr($sniffed, -4) === '+xml') { @@ -1320,6 +1322,10 @@ class SimplePie { $encodings[] = $charset[1]; } + else + { + $encodings[] = ''; + } $encodings[] = 'US-ASCII'; } // Text MIME-type default @@ -1341,7 +1347,7 @@ class SimplePie foreach ($encodings as $encoding) { // Change the encoding to UTF-8 (as we always use UTF-8 internally) - if ($utf8_data = $this->registry->call('Misc', 'change_encoding', array($this->raw_data, $encoding, 'UTF-8'))) + if ($utf8_data = (empty($encoding) || $encoding === 'UTF-8') ? $this->raw_data : $this->registry->call('Misc', 'change_encoding', array($this->raw_data, $encoding, 'UTF-8'))) { // Create new parser $parser = $this->registry->create('Parser'); -- cgit v1.2.3 From e2d4f1a7214591a47a46272a7a62e320eea029ce Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Mon, 18 Nov 2013 23:04:43 +0100 Subject: SQL : identifiant entier automatique pour les catégories et les flux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implémentation de https://github.com/marienfressinaud/FreshRSS/issues/262 La catégorie par défaut à le numéro 1. Les numéros de catégories et de flux sont automatiques (1, 2, 3...) L'installeur semble marcher. --- app/controllers/feedController.php | 2 +- app/layout/aside_feed.phtml | 2 +- app/models/Category.php | 17 ++++++----------- app/models/Entry.php | 9 ++++++--- app/models/Feed.php | 13 ++++--------- lib/SimplePie/SimplePie/Parser.php | 3 +-- public/install.php | 8 ++++---- 7 files changed, 23 insertions(+), 31 deletions(-) (limited to 'lib/SimplePie') diff --git a/app/controllers/feedController.php b/app/controllers/feedController.php index 5c905e6da..73d13063d 100755 --- a/app/controllers/feedController.php +++ b/app/controllers/feedController.php @@ -175,7 +175,7 @@ class feedController extends ActionController { $entries = $feed->entries (); //For this feed, check last n entry IDs already in database - $existingIds = array_fill_keys ($entryDAO->listLastIdsByFeed ($feed->id (), count($entries) + 2), 1); + $existingIds = array_fill_keys ($entryDAO->listLastIdsByFeed ($feed->id (), count($entries) + 10), 1); // ajout des articles en masse sans se soucier des erreurs // On ne vérifie pas que l'article n'est pas déjà en BDD diff --git a/app/layout/aside_feed.phtml b/app/layout/aside_feed.phtml index 49767740b..f737d1e31 100644 --- a/app/layout/aside_feed.phtml +++ b/app/layout/aside_feed.phtml @@ -16,7 +16,7 @@