From 6f117abfb6010150f95d2d0830a9715cd6f270fc Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 18 Jan 2014 22:54:53 +0100 Subject: Bugs chargement des articles complets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Corrige https://github.com/marienfressinaud/FreshRSS/issues/365 Ajoute le nettoyage correct du HTML par SimplePie, ainsi que le décodage des caractères HTML dans l'URL, et enfin un message dans le syslog pour chaque article téléchargé --- lib/lib_rss.php | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'lib/lib_rss.php') diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 4f98ed14a..33d7ebc32 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -110,6 +110,7 @@ function sanitizeHTML($data) { static $simplePie = null; if ($simplePie == null) { $simplePie = new SimplePie(); + $simplePie->init(); } return html_only_entity_decode($simplePie->sanitize->sanitize($data, SIMPLEPIE_CONSTRUCT_MAYBE_HTML)); } @@ -118,22 +119,13 @@ function sanitizeHTML($data) { function get_content_by_parsing ($url, $path) { require_once (LIB_PATH . '/lib_phpQuery.php'); + syslog(LOG_INFO, 'FreshRSS GET ' . $url); $html = file_get_contents ($url); if ($html) { $doc = phpQuery::newDocument ($html); $content = $doc->find ($path); - $content->find ('*')->removeAttr ('style') - ->removeAttr ('id') - ->removeAttr ('class') - ->removeAttr ('onload') - ->removeAttr ('target'); - $content->removeAttr ('style') - ->removeAttr ('id') - ->removeAttr ('class') - ->removeAttr ('onload') - ->removeAttr ('target'); - return $content->__toString (); + return sanitizeHTML($content->__toString()); } else { throw new Exception (); } -- cgit v1.2.3