summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2014-01-18 22:54:53 +0100
committerGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2014-01-18 22:54:53 +0100
commit6f117abfb6010150f95d2d0830a9715cd6f270fc (patch)
treedf1e59ec45c1e6ef7c5955d3837d5681eb9d2be2
parent5f0594c630028b8612b57da15bcdf00a932930bc (diff)
Bugs chargement des articles complets
Corrige https://github.com/marienfressinaud/FreshRSS/issues/365 Ajoute le nettoyage correct du HTML par SimplePie, ainsi que le décodage des caractères HTML dans l'URL, et enfin un message dans le syslog pour chaque article téléchargé
-rw-r--r--app/Models/Entry.php2
-rw-r--r--lib/lib_rss.php14
2 files changed, 4 insertions, 12 deletions
diff --git a/app/Models/Entry.php b/app/Models/Entry.php
index 83f68ce78..a6c67221b 100644
--- a/app/Models/Entry.php
+++ b/app/Models/Entry.php
@@ -159,7 +159,7 @@ class FreshRSS_Entry extends Minz_Model {
try {
// l'article n'est pas en BDD, on va le chercher sur le site
$this->content = get_content_by_parsing(
- $this->link(), $pathEntries
+ htmlspecialchars_decode($this->link(), ENT_QUOTES), $pathEntries
);
} catch (Exception $e) {
// rien à faire, on garde l'ancien contenu (requête a échoué)
diff --git a/lib/lib_rss.php b/lib/lib_rss.php
index 4f98ed14a..33d7ebc32 100644
--- a/lib/lib_rss.php
+++ b/lib/lib_rss.php
@@ -110,6 +110,7 @@ function sanitizeHTML($data) {
static $simplePie = null;
if ($simplePie == null) {
$simplePie = new SimplePie();
+ $simplePie->init();
}
return html_only_entity_decode($simplePie->sanitize->sanitize($data, SIMPLEPIE_CONSTRUCT_MAYBE_HTML));
}
@@ -118,22 +119,13 @@ function sanitizeHTML($data) {
function get_content_by_parsing ($url, $path) {
require_once (LIB_PATH . '/lib_phpQuery.php');
+ syslog(LOG_INFO, 'FreshRSS GET ' . $url);
$html = file_get_contents ($url);
if ($html) {
$doc = phpQuery::newDocument ($html);
$content = $doc->find ($path);
- $content->find ('*')->removeAttr ('style')
- ->removeAttr ('id')
- ->removeAttr ('class')
- ->removeAttr ('onload')
- ->removeAttr ('target');
- $content->removeAttr ('style')
- ->removeAttr ('id')
- ->removeAttr ('class')
- ->removeAttr ('onload')
- ->removeAttr ('target');
- return $content->__toString ();
+ return sanitizeHTML($content->__toString());
} else {
throw new Exception ();
}