diff options
| author | 2014-01-18 22:54:53 +0100 | |
|---|---|---|
| committer | 2014-01-18 22:54:53 +0100 | |
| commit | 6f117abfb6010150f95d2d0830a9715cd6f270fc (patch) | |
| tree | df1e59ec45c1e6ef7c5955d3837d5681eb9d2be2 | |
| parent | 5f0594c630028b8612b57da15bcdf00a932930bc (diff) | |
Bugs chargement des articles complets
Corrige https://github.com/marienfressinaud/FreshRSS/issues/365
Ajoute le nettoyage correct du HTML par SimplePie, ainsi que le décodage
des caractères HTML dans l'URL, et enfin un message dans le syslog pour
chaque article téléchargé
| -rw-r--r-- | app/Models/Entry.php | 2 | ||||
| -rw-r--r-- | lib/lib_rss.php | 14 |
2 files changed, 4 insertions, 12 deletions
diff --git a/app/Models/Entry.php b/app/Models/Entry.php index 83f68ce78..a6c67221b 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -159,7 +159,7 @@ class FreshRSS_Entry extends Minz_Model { try { // l'article n'est pas en BDD, on va le chercher sur le site $this->content = get_content_by_parsing( - $this->link(), $pathEntries + htmlspecialchars_decode($this->link(), ENT_QUOTES), $pathEntries ); } catch (Exception $e) { // rien à faire, on garde l'ancien contenu (requête a échoué) diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 4f98ed14a..33d7ebc32 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -110,6 +110,7 @@ function sanitizeHTML($data) { static $simplePie = null; if ($simplePie == null) { $simplePie = new SimplePie(); + $simplePie->init(); } return html_only_entity_decode($simplePie->sanitize->sanitize($data, SIMPLEPIE_CONSTRUCT_MAYBE_HTML)); } @@ -118,22 +119,13 @@ function sanitizeHTML($data) { function get_content_by_parsing ($url, $path) { require_once (LIB_PATH . '/lib_phpQuery.php'); + syslog(LOG_INFO, 'FreshRSS GET ' . $url); $html = file_get_contents ($url); if ($html) { $doc = phpQuery::newDocument ($html); $content = $doc->find ($path); - $content->find ('*')->removeAttr ('style') - ->removeAttr ('id') - ->removeAttr ('class') - ->removeAttr ('onload') - ->removeAttr ('target'); - $content->removeAttr ('style') - ->removeAttr ('id') - ->removeAttr ('class') - ->removeAttr ('onload') - ->removeAttr ('target'); - return $content->__toString (); + return sanitizeHTML($content->__toString()); } else { throw new Exception (); } |
