From 6f117abfb6010150f95d2d0830a9715cd6f270fc Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 18 Jan 2014 22:54:53 +0100 Subject: Bugs chargement des articles complets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Corrige https://github.com/marienfressinaud/FreshRSS/issues/365 Ajoute le nettoyage correct du HTML par SimplePie, ainsi que le décodage des caractères HTML dans l'URL, et enfin un message dans le syslog pour chaque article téléchargé --- app/Models/Entry.php | 2 +- lib/lib_rss.php | 14 +++----------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/app/Models/Entry.php b/app/Models/Entry.php index 83f68ce78..a6c67221b 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -159,7 +159,7 @@ class FreshRSS_Entry extends Minz_Model { try { // l'article n'est pas en BDD, on va le chercher sur le site $this->content = get_content_by_parsing( - $this->link(), $pathEntries + htmlspecialchars_decode($this->link(), ENT_QUOTES), $pathEntries ); } catch (Exception $e) { // rien à faire, on garde l'ancien contenu (requête a échoué) diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 4f98ed14a..33d7ebc32 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -110,6 +110,7 @@ function sanitizeHTML($data) { static $simplePie = null; if ($simplePie == null) { $simplePie = new SimplePie(); + $simplePie->init(); } return html_only_entity_decode($simplePie->sanitize->sanitize($data, SIMPLEPIE_CONSTRUCT_MAYBE_HTML)); } @@ -118,22 +119,13 @@ function sanitizeHTML($data) { function get_content_by_parsing ($url, $path) { require_once (LIB_PATH . '/lib_phpQuery.php'); + syslog(LOG_INFO, 'FreshRSS GET ' . $url); $html = file_get_contents ($url); if ($html) { $doc = phpQuery::newDocument ($html); $content = $doc->find ($path); - $content->find ('*')->removeAttr ('style') - ->removeAttr ('id') - ->removeAttr ('class') - ->removeAttr ('onload') - ->removeAttr ('target'); - $content->removeAttr ('style') - ->removeAttr ('id') - ->removeAttr ('class') - ->removeAttr ('onload') - ->removeAttr ('target'); - return $content->__toString (); + return sanitizeHTML($content->__toString()); } else { throw new Exception (); } -- cgit v1.2.3