From ec611d7d543e27fe8c794cd98177d00cf636c58f Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Mon, 16 Mar 2015 21:37:28 +0100 Subject: SimplePie: decode special chars for MAYBE_HTML https://github.com/FreshRSS/FreshRSS/issues/754 Needs to check with many feeds to see if this does not introduce incompatibilities with some valid feeds. --- lib/SimplePie/SimplePie/Sanitize.php | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/SimplePie') diff --git a/lib/SimplePie/SimplePie/Sanitize.php b/lib/SimplePie/SimplePie/Sanitize.php index 168a5e2e8..3da7239be 100644 --- a/lib/SimplePie/SimplePie/Sanitize.php +++ b/lib/SimplePie/SimplePie/Sanitize.php @@ -249,6 +249,11 @@ class SimplePie_Sanitize { if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) { + if (preg_match('/&#(x[0-9a-fA-F]+|[0-9]+);/', $data)) //FreshRSS + { + $data = htmlspecialchars_decode($data, ENT_QUOTES); //FreshRSS + //syslog(LOG_DEBUG, 'SimplePie sanitize MAYBE_HTML htmlspecialchars_decode: ' . $data); //FreshRSS + } if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) { $type |= SIMPLEPIE_CONSTRUCT_HTML; -- cgit v1.2.3 From 0e08b5ba56b2117e0f3d9bea6ae725295e8700d5 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 21 Mar 2015 15:13:08 +0100 Subject: htmlspecialchars_decode for all SIMPLEPIE_CONSTRUCT_MAYBE_HTML https://github.com/FreshRSS/FreshRSS/issues/754 --- lib/SimplePie/SimplePie/Sanitize.php | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'lib/SimplePie') diff --git a/lib/SimplePie/SimplePie/Sanitize.php b/lib/SimplePie/SimplePie/Sanitize.php index 3da7239be..e7c9f925f 100644 --- a/lib/SimplePie/SimplePie/Sanitize.php +++ b/lib/SimplePie/SimplePie/Sanitize.php @@ -249,11 +249,7 @@ class SimplePie_Sanitize { if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) { - if (preg_match('/&#(x[0-9a-fA-F]+|[0-9]+);/', $data)) //FreshRSS - { - $data = htmlspecialchars_decode($data, ENT_QUOTES); //FreshRSS - //syslog(LOG_DEBUG, 'SimplePie sanitize MAYBE_HTML htmlspecialchars_decode: ' . $data); //FreshRSS - } + $data = htmlspecialchars_decode($data, ENT_QUOTES); //FreshRSS if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) { $type |= SIMPLEPIE_CONSTRUCT_HTML; -- cgit v1.2.3 From 1a35e2271d3b9383e882371d37d5fef16abd745d Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 21 Mar 2015 18:20:36 +0100 Subject: SimplePie option to restaure syslog of HTTP requests https://github.com/FreshRSS/FreshRSS/issues/711 --- app/Models/Feed.php | 4 ++-- data/config.default.php | 1 + lib/SimplePie/SimplePie.php | 41 ++++++++++++++++++++++++++++++++++------ lib/SimplePie/SimplePie/File.php | 7 +++++-- lib/lib_rss.php | 1 + 5 files changed, 44 insertions(+), 10 deletions(-) (limited to 'lib/SimplePie') diff --git a/app/Models/Feed.php b/app/Models/Feed.php index 5ce03be5d..5f67ea6ce 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -246,10 +246,10 @@ class FreshRSS_Feed extends Minz_Model { } if (($mtime === true) ||($mtime > $this->lastUpdate)) { - Minz_Log::notice('FreshRSS no cache ' . $mtime . ' > ' . $this->lastUpdate . ' for ' . $clean_url); + //Minz_Log::debug('FreshRSS no cache ' . $mtime . ' > ' . $this->lastUpdate . ' for ' . $clean_url); $this->loadEntries($feed); // et on charge les articles du flux } else { - Minz_Log::notice('FreshRSS use cache for ' . $clean_url); + //Minz_Log::debug('FreshRSS use cache for ' . $clean_url); $this->entries = array(); } diff --git a/data/config.default.php b/data/config.default.php index 97df3a299..839bd1687 100644 --- a/data/config.default.php +++ b/data/config.default.php @@ -12,6 +12,7 @@ return array( 'auth_type' => 'none', 'api_enabled' => false, 'unsafe_autologin_enabled' => false, + 'simplepie_syslog_enabled' => true, 'limits' => array( 'cache_duration' => 800, 'timeout' => 10, diff --git a/lib/SimplePie/SimplePie.php b/lib/SimplePie/SimplePie.php index c4872b5be..bb8ce4191 100644 --- a/lib/SimplePie/SimplePie.php +++ b/lib/SimplePie/SimplePie.php @@ -74,6 +74,12 @@ define('SIMPLEPIE_USERAGENT', SIMPLEPIE_NAME . '/' . SIMPLEPIE_VERSION . ' (Feed */ define('SIMPLEPIE_LINKBACK', '' . SIMPLEPIE_NAME . ''); +/** + * Use syslog to report HTTP requests done by SimplePie. + * @see SimplePie::set_syslog() + */ +define('SIMPLEPIE_SYSLOG', true); //FreshRSS + /** * No Autodiscovery * @see SimplePie::set_autodiscovery_level() @@ -622,6 +628,12 @@ class SimplePie */ public $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); + /** + * Use syslog to report HTTP requests done by SimplePie. + * @see SimplePie::set_syslog() + */ + public $syslog_enabled = SIMPLEPIE_SYSLOG; + /** * The SimplePie class contains feed level data and options * @@ -1136,7 +1148,7 @@ class SimplePie $this->sanitize->strip_attributes($attribs); } - public function add_attributes($attribs = '') + public function add_attributes($attribs = '') //FreshRSS { if ($attribs === '') { @@ -1145,6 +1157,14 @@ class SimplePie $this->sanitize->add_attributes($attribs); } + /** + * Use syslog to report HTTP requests done by SimplePie. + */ + public function set_syslog($value = SIMPLEPIE_SYSLOG) //FreshRSS + { + $this->syslog_enabled = $value == true; + } + /** * Set the output encoding * @@ -1231,7 +1251,8 @@ class SimplePie $this->enable_exceptions = $enable; } - function cleanMd5($rss) { //FreshRSS + function cleanMd5($rss) //FreshRSS + { return md5(preg_replace(array('#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+#', '##s'), '', $rss)); } @@ -1329,7 +1350,8 @@ class SimplePie list($headers, $sniffed) = $fetched; - if (isset($this->data['md5'])) { //FreshRSS + if (isset($this->data['md5'])) //FreshRSS + { $md5 = $this->data['md5']; } } @@ -1455,7 +1477,8 @@ class SimplePie { // Load the Cache $this->data = $cache->load(); - if ($cache->mtime() + $this->cache_duration > time()) { //FreshRSS + if ($cache->mtime() + $this->cache_duration > time()) //FreshRSS + { $this->raw_data = false; return true; // If the cache is still valid, just return true } @@ -1529,11 +1552,17 @@ class SimplePie { //FreshRSS $md5 = $this->cleanMd5($file->body); if ($this->data['md5'] === $md5) { - // syslog(LOG_DEBUG, 'SimplePie MD5 cache match for ' . $this->feed_url); + if ($this->syslog_enabled) + { + syslog(LOG_DEBUG, 'SimplePie MD5 cache match for ' . $this->feed_url); + } $cache->touch(); return true; //Content unchanged even though server did not send a 304 } else { - // syslog(LOG_DEBUG, 'SimplePie MD5 cache no match for ' . $this->feed_url); + if ($this->syslog_enabled) + { + syslog(LOG_DEBUG, 'SimplePie MD5 cache no match for ' . $this->feed_url); + } $this->data['md5'] = $md5; } } diff --git a/lib/SimplePie/SimplePie/File.php b/lib/SimplePie/SimplePie/File.php index 9625af2a9..56fe72196 100644 --- a/lib/SimplePie/SimplePie/File.php +++ b/lib/SimplePie/SimplePie/File.php @@ -66,7 +66,7 @@ class SimplePie_File var $method = SIMPLEPIE_FILE_SOURCE_NONE; var $permanent_url; //FreshRSS - public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) + public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false, $syslog_enabled = SIMPLEPIE_SYSLOG) { if (class_exists('idna_convert')) { @@ -79,7 +79,10 @@ class SimplePie_File $this->useragent = $useragent; if (preg_match('/^http(s)?:\/\//i', $url)) { - // syslog(LOG_INFO, 'SimplePie GET ' . $url); //FreshRSS + if ($syslog_enabled) + { + syslog(LOG_INFO, 'SimplePie GET ' . $url); //FreshRSS + } if ($useragent === null) { $useragent = ini_get('user_agent'); diff --git a/lib/lib_rss.php b/lib/lib_rss.php index e5fe73041..16ae3097f 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -123,6 +123,7 @@ function customSimplePie() { $limits = $system_conf->limits; $simplePie = new SimplePie(); $simplePie->set_useragent(_t('gen.freshrss') . '/' . FRESHRSS_VERSION . ' (' . PHP_OS . '; ' . FRESHRSS_WEBSITE . ') ' . SIMPLEPIE_NAME . '/' . SIMPLEPIE_VERSION); + $simplePie->set_syslog($system_conf->simplepie_syslog_enabled); $simplePie->set_cache_location(CACHE_PATH); $simplePie->set_cache_duration($limits['cache_duration']); $simplePie->set_timeout($limits['timeout']); -- cgit v1.2.3 From ad9fe52f5a76faf58d13fcf7bde8f58e85abe82b Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 22 Mar 2015 22:54:29 +0100 Subject: SimplePie sanitize URLs for syslog https://github.com/FreshRSS/FreshRSS/issues/711 https://github.com/FreshRSS/FreshRSS/pull/715 --- app/Models/Feed.php | 2 +- lib/SimplePie/SimplePie.php | 4 ++-- lib/SimplePie/SimplePie/File.php | 2 +- lib/SimplePie/SimplePie/Misc.php | 10 ++++++++++ lib/lib_rss.php | 12 +----------- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'lib/SimplePie') diff --git a/app/Models/Feed.php b/app/Models/Feed.php index 5f67ea6ce..15cbb7d0a 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -240,7 +240,7 @@ class FreshRSS_Feed extends Minz_Model { $subscribe_url = $feed->subscribe_url(true); } - $clean_url = url_remove_credentials($subscribe_url); + $clean_url = SimplePie_Misc::url_remove_credentials($subscribe_url); if ($subscribe_url !== null && $subscribe_url !== $url) { $this->_url($clean_url); } diff --git a/lib/SimplePie/SimplePie.php b/lib/SimplePie/SimplePie.php index bb8ce4191..54f4c5770 100644 --- a/lib/SimplePie/SimplePie.php +++ b/lib/SimplePie/SimplePie.php @@ -1554,14 +1554,14 @@ class SimplePie if ($this->data['md5'] === $md5) { if ($this->syslog_enabled) { - syslog(LOG_DEBUG, 'SimplePie MD5 cache match for ' . $this->feed_url); + syslog(LOG_DEBUG, 'SimplePie MD5 cache match for ' . SimplePie_Misc::url_remove_credentials($this->feed_url)); } $cache->touch(); return true; //Content unchanged even though server did not send a 304 } else { if ($this->syslog_enabled) { - syslog(LOG_DEBUG, 'SimplePie MD5 cache no match for ' . $this->feed_url); + syslog(LOG_DEBUG, 'SimplePie MD5 cache no match for ' . SimplePie_Misc::url_remove_credentials($this->feed_url)); } $this->data['md5'] = $md5; } diff --git a/lib/SimplePie/SimplePie/File.php b/lib/SimplePie/SimplePie/File.php index 56fe72196..1f9e3d502 100644 --- a/lib/SimplePie/SimplePie/File.php +++ b/lib/SimplePie/SimplePie/File.php @@ -81,7 +81,7 @@ class SimplePie_File { if ($syslog_enabled) { - syslog(LOG_INFO, 'SimplePie GET ' . $url); //FreshRSS + syslog(LOG_INFO, 'SimplePie GET ' . SimplePie_Misc::url_remove_credentials($url)); //FreshRSS } if ($useragent === null) { diff --git a/lib/SimplePie/SimplePie/Misc.php b/lib/SimplePie/SimplePie/Misc.php index 5a263a2e5..de50d37b8 100644 --- a/lib/SimplePie/SimplePie/Misc.php +++ b/lib/SimplePie/SimplePie/Misc.php @@ -2240,5 +2240,15 @@ function embed_wmedia(width, height, link) { { // No-op } + + /** + * Sanitize a URL by removing HTTP credentials. + * @param $url the URL to sanitize. + * @return the same URL without HTTP credentials. + */ + function url_remove_credentials($url) //FreshRSS + { + return preg_replace('#(?<=//)[^/:@]+:[^/:@]+@#', '', $url); + } } diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 16ae3097f..65a1a8e04 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -181,7 +181,7 @@ function sanitizeHTML($data, $base = '') { function get_content_by_parsing ($url, $path) { require_once (LIB_PATH . '/lib_phpQuery.php'); - Minz_Log::notice('FreshRSS GET ' . url_remove_credentials($url)); + Minz_Log::notice('FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url)); $html = file_get_contents ($url); if ($html) { @@ -430,13 +430,3 @@ function array_push_unique(&$array, $value) { function array_remove(&$array, $value) { $array = array_diff($array, array($value)); } - - -/** - * Sanitize a URL by removing HTTP credentials. - * @param $url the URL to sanitize. - * @return the same URL without HTTP credentials. - */ -function url_remove_credentials($url) { - return preg_replace('/[^\/]*:[^:]*@/', '', $url); -} -- cgit v1.2.3 From 1c38e646c3223571988b33e6a0d481426e3b6931 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 22 Mar 2015 23:53:59 +0100 Subject: SimplePie seems to only supports HTTP schemes Can simplify the regex (faster because anchored) for cleaning URLs based on the fact that we only have to deal with HTTP or HTTPS --- lib/SimplePie/SimplePie/Misc.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/SimplePie') diff --git a/lib/SimplePie/SimplePie/Misc.php b/lib/SimplePie/SimplePie/Misc.php index de50d37b8..90f01f737 100644 --- a/lib/SimplePie/SimplePie/Misc.php +++ b/lib/SimplePie/SimplePie/Misc.php @@ -2248,7 +2248,7 @@ function embed_wmedia(width, height, link) { */ function url_remove_credentials($url) //FreshRSS { - return preg_replace('#(?<=//)[^/:@]+:[^/:@]+@#', '', $url); + return preg_replace('#(?<=^https?://)[^/:@]+:[^/:@]+@#', '', $url); } } -- cgit v1.2.3 From 0ed213d97f566d7f46747d28a318cf58cdb5d699 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 22 Mar 2015 23:56:47 +0100 Subject: Revert "SimplePie seems to only supports HTTP schemes" This reverts commit 1c38e646c3223571988b33e6a0d481426e3b6931. --- lib/SimplePie/SimplePie/Misc.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/SimplePie') diff --git a/lib/SimplePie/SimplePie/Misc.php b/lib/SimplePie/SimplePie/Misc.php index 90f01f737..de50d37b8 100644 --- a/lib/SimplePie/SimplePie/Misc.php +++ b/lib/SimplePie/SimplePie/Misc.php @@ -2248,7 +2248,7 @@ function embed_wmedia(width, height, link) { */ function url_remove_credentials($url) //FreshRSS { - return preg_replace('#(?<=^https?://)[^/:@]+:[^/:@]+@#', '', $url); + return preg_replace('#(?<=//)[^/:@]+:[^/:@]+@#', '', $url); } } -- cgit v1.2.3 From 7735471140e1c6087a177192a09c7fb849decd77 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Mon, 23 Mar 2015 21:58:37 +0100 Subject: SimplePie faster regex for sanitizing URLs Can simplify the regex (faster because anchored) for cleaning URLs based on the fact that we only have to deal with HTTP or HTTPS https://github.com/FreshRSS/FreshRSS/pull/715 --- lib/SimplePie/SimplePie/Misc.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/SimplePie') diff --git a/lib/SimplePie/SimplePie/Misc.php b/lib/SimplePie/SimplePie/Misc.php index de50d37b8..91549f93a 100644 --- a/lib/SimplePie/SimplePie/Misc.php +++ b/lib/SimplePie/SimplePie/Misc.php @@ -2248,7 +2248,7 @@ function embed_wmedia(width, height, link) { */ function url_remove_credentials($url) //FreshRSS { - return preg_replace('#(?<=//)[^/:@]+:[^/:@]+@#', '', $url); + return preg_replace('#^(https?://)[^/:@]+:[^/:@]+@#i', '$1', $url); } } -- cgit v1.2.3 From 2bfc4dbf8b19cd2b892526ef65a966ff3bfef718 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Tue, 24 Mar 2015 21:58:00 +0100 Subject: SimplePie forgot static keyword https://github.com/FreshRSS/FreshRSS/issues/711 --- lib/SimplePie/SimplePie/Misc.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/SimplePie') diff --git a/lib/SimplePie/SimplePie/Misc.php b/lib/SimplePie/SimplePie/Misc.php index 91549f93a..956a284cb 100644 --- a/lib/SimplePie/SimplePie/Misc.php +++ b/lib/SimplePie/SimplePie/Misc.php @@ -2246,7 +2246,7 @@ function embed_wmedia(width, height, link) { * @param $url the URL to sanitize. * @return the same URL without HTTP credentials. */ - function url_remove_credentials($url) //FreshRSS + public static function url_remove_credentials($url) //FreshRSS { return preg_replace('#^(https?://)[^/:@]+:[^/:@]+@#i', '$1', $url); } -- cgit v1.2.3