diff options
| author | 2014-01-18 22:54:53 +0100 | |
|---|---|---|
| committer | 2014-01-18 22:54:53 +0100 | |
| commit | 6f117abfb6010150f95d2d0830a9715cd6f270fc (patch) | |
| tree | df1e59ec45c1e6ef7c5955d3837d5681eb9d2be2 /lib/lib_rss.php | |
| parent | 5f0594c630028b8612b57da15bcdf00a932930bc (diff) | |
Bugs chargement des articles complets
Corrige https://github.com/marienfressinaud/FreshRSS/issues/365
Ajoute le nettoyage correct du HTML par SimplePie, ainsi que le décodage
des caractères HTML dans l'URL, et enfin un message dans le syslog pour
chaque article téléchargé
Diffstat (limited to 'lib/lib_rss.php')
| -rw-r--r-- | lib/lib_rss.php | 14 |
1 files changed, 3 insertions, 11 deletions
diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 4f98ed14a..33d7ebc32 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -110,6 +110,7 @@ function sanitizeHTML($data) { static $simplePie = null; if ($simplePie == null) { $simplePie = new SimplePie(); + $simplePie->init(); } return html_only_entity_decode($simplePie->sanitize->sanitize($data, SIMPLEPIE_CONSTRUCT_MAYBE_HTML)); } @@ -118,22 +119,13 @@ function sanitizeHTML($data) { function get_content_by_parsing ($url, $path) { require_once (LIB_PATH . '/lib_phpQuery.php'); + syslog(LOG_INFO, 'FreshRSS GET ' . $url); $html = file_get_contents ($url); if ($html) { $doc = phpQuery::newDocument ($html); $content = $doc->find ($path); - $content->find ('*')->removeAttr ('style') - ->removeAttr ('id') - ->removeAttr ('class') - ->removeAttr ('onload') - ->removeAttr ('target'); - $content->removeAttr ('style') - ->removeAttr ('id') - ->removeAttr ('class') - ->removeAttr ('onload') - ->removeAttr ('target'); - return $content->__toString (); + return sanitizeHTML($content->__toString()); } else { throw new Exception (); } |
