From b552abb3327f09baa1c0f4e821dc9f6bd6ef738e Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Tue, 1 May 2018 17:02:11 +0200 Subject: JSON column for feeds (#1838) * Draft of JSON column for feeds https://github.com/FreshRSS/FreshRSS/issues/1654 * Add some per-feed options * Feed cURL timeout * Mark updated articles as read https://github.com/FreshRSS/FreshRSS/issues/891 * Mark as read upon reception https://github.com/FreshRSS/FreshRSS/issues/1702 * Ignore SSL (unsafe) https://github.com/FreshRSS/FreshRSS/issues/1811 * Try PHPCS workaround While waiting for a better syntax support --- lib/lib_rss.php | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'lib/lib_rss.php') diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 215c4c362..9dfca385d 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -175,7 +175,7 @@ function html_only_entity_decode($text) { return strtr($text, $htmlEntitiesOnly); } -function customSimplePie() { +function customSimplePie($attributes = array()) { $system_conf = Minz_Configuration::get('system'); $limits = $system_conf->limits; $simplePie = new SimplePie(); @@ -183,8 +183,17 @@ function customSimplePie() { $simplePie->set_syslog($system_conf->simplepie_syslog_enabled); $simplePie->set_cache_location(CACHE_PATH); $simplePie->set_cache_duration($limits['cache_duration']); - $simplePie->set_timeout($limits['timeout']); - $simplePie->set_curl_options($system_conf->curl_options); + + $feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']); + $simplePie->set_timeout($feed_timeout > 0 ? $feed_timeout : $limits['timeout']); + + $curl_options = $system_conf->curl_options; + if (isset($attributes['ssl_verify'])) { + $curl_options[CURLOPT_SSL_VERIFYHOST] = $attributes['ssl_verify'] ? 2 : 0; + $curl_options[CURLOPT_SSL_VERIFYPEER] = $attributes['ssl_verify'] ? true : false; + } + $simplePie->set_curl_options($curl_options); + $simplePie->strip_htmltags(array( 'base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', -- cgit v1.2.3 From ccc62b0a2cb41663ebee7b33601c5fb1d000d4cb Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 2 Jun 2018 23:08:09 +0200 Subject: Use cURL for GET full content (#1913) * Use cURL for GET full content Fix https://github.com/FreshRSS/FreshRSS/issues/1870 * Changelog 1870 https://github.com/FreshRSS/FreshRSS/issues/1870 https://github.com/FreshRSS/FreshRSS/pull/1913 --- CHANGELOG.md | 1 + app/Models/Entry.php | 3 ++- lib/lib_rss.php | 42 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 42 insertions(+), 4 deletions(-) (limited to 'lib/lib_rss.php') diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f1ad1b0e..933c831c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ * Updated German [#1856](https://github.com/FreshRSS/FreshRSS/pull/1856) * Updated Dutch [#1903](https://github.com/FreshRSS/FreshRSS/pull/1903) * Misc. + * Use cURL for fetching full articles content [#1870](https://github.com/FreshRSS/FreshRSS/issues/1870) * Add error log information when SQLite has not enough temp space [#1816](https://github.com/FreshRSS/FreshRSS/issues/1816) * Allow extension dir to be a symlink [#1911](https://github.com/FreshRSS/FreshRSS/pull/1911) diff --git a/app/Models/Entry.php b/app/Models/Entry.php index 0ad3781e5..c6b26a7cc 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -193,7 +193,8 @@ class FreshRSS_Entry extends Minz_Model { try { // l'article n'est pas en BDD, on va le chercher sur le site $this->content = get_content_by_parsing( - htmlspecialchars_decode($this->link(), ENT_QUOTES), $pathEntries + htmlspecialchars_decode($this->link(), ENT_QUOTES), $pathEntries, + $this->feed->attributes() ); } catch (Exception $e) { // rien à faire, on garde l'ancien contenu(requête a échoué) diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 9dfca385d..abb20f16a 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -254,11 +254,47 @@ function sanitizeHTML($data, $base = '') { } /* permet de récupérer le contenu d'un article pour un flux qui n'est pas complet */ -function get_content_by_parsing ($url, $path) { +function get_content_by_parsing($url, $path, $attributes = array()) { require_once(LIB_PATH . '/lib_phpQuery.php'); + $system_conf = Minz_Configuration::get('system'); + $limits = $system_conf->limits; + $feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']); + + if ($system_conf->simplepie_syslog_enabled) { + syslog(LOG_INFO, 'FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url)); + } + + $ch = curl_init(); + curl_setopt_array($ch, array( + CURLOPT_URL => $url, + CURLOPT_REFERER => SimplePie_Misc::url_remove_credentials($url), + CURLOPT_HTTPHEADER => array('Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'), + CURLOPT_USERAGENT => FRESHRSS_USERAGENT, + CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], + CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], + //CURLOPT_FAILONERROR => true; + CURLOPT_MAXREDIRS => 4, + CURLOPT_RETURNTRANSFER => true, + )); + if (version_compare(PHP_VERSION, '5.6.0') >= 0 || ini_get('open_basedir') == '') { + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //Keep option separated for open_basedir PHP bug 65646 + } + if (defined('CURLOPT_ENCODING')) { + curl_setopt($ch, CURLOPT_ENCODING, ''); //Enable all encodings + } + curl_setopt_array($ch, $system_conf->curl_options); + if (isset($attributes['ssl_verify'])) { + curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, $attributes['ssl_verify'] ? 2 : 0); + curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, $attributes['ssl_verify'] ? true : false); + } + $html = curl_exec($ch); + $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $c_error = curl_error($ch); + curl_close($ch); - Minz_Log::notice('FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url)); - $html = file_get_contents($url); + if ($c_status != 200 || $c_error != '') { + Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url); + } if ($html) { $doc = phpQuery::newDocument($html); -- cgit v1.2.3