summaryrefslogtreecommitdiff
path: root/lib/lib_rss.php
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2014-01-18 22:54:53 +0100
committerGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2014-01-18 22:54:53 +0100
commit6f117abfb6010150f95d2d0830a9715cd6f270fc (patch)
treedf1e59ec45c1e6ef7c5955d3837d5681eb9d2be2 /lib/lib_rss.php
parent5f0594c630028b8612b57da15bcdf00a932930bc (diff)
Bugs chargement des articles complets
Corrige https://github.com/marienfressinaud/FreshRSS/issues/365 Ajoute le nettoyage correct du HTML par SimplePie, ainsi que le décodage des caractères HTML dans l'URL, et enfin un message dans le syslog pour chaque article téléchargé
Diffstat (limited to 'lib/lib_rss.php')
-rw-r--r--lib/lib_rss.php14
1 files changed, 3 insertions, 11 deletions
diff --git a/lib/lib_rss.php b/lib/lib_rss.php
index 4f98ed14a..33d7ebc32 100644
--- a/lib/lib_rss.php
+++ b/lib/lib_rss.php
@@ -110,6 +110,7 @@ function sanitizeHTML($data) {
static $simplePie = null;
if ($simplePie == null) {
$simplePie = new SimplePie();
+ $simplePie->init();
}
return html_only_entity_decode($simplePie->sanitize->sanitize($data, SIMPLEPIE_CONSTRUCT_MAYBE_HTML));
}
@@ -118,22 +119,13 @@ function sanitizeHTML($data) {
function get_content_by_parsing ($url, $path) {
require_once (LIB_PATH . '/lib_phpQuery.php');
+ syslog(LOG_INFO, 'FreshRSS GET ' . $url);
$html = file_get_contents ($url);
if ($html) {
$doc = phpQuery::newDocument ($html);
$content = $doc->find ($path);
- $content->find ('*')->removeAttr ('style')
- ->removeAttr ('id')
- ->removeAttr ('class')
- ->removeAttr ('onload')
- ->removeAttr ('target');
- $content->removeAttr ('style')
- ->removeAttr ('id')
- ->removeAttr ('class')
- ->removeAttr ('onload')
- ->removeAttr ('target');
- return $content->__toString ();
+ return sanitizeHTML($content->__toString());
} else {
throw new Exception ();
}