aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2018-06-02 23:08:09 +0200
committerGravatar GitHub <noreply@github.com> 2018-06-02 23:08:09 +0200
commitccc62b0a2cb41663ebee7b33601c5fb1d000d4cb (patch)
tree610624f5d6fbc8aea2f32307163e385bae957d52
parent15f0ad645265d652c69c298121d7c0a37c06718d (diff)
Use cURL for GET full content (#1913)
* Use cURL for GET full content Fix https://github.com/FreshRSS/FreshRSS/issues/1870 * Changelog 1870 https://github.com/FreshRSS/FreshRSS/issues/1870 https://github.com/FreshRSS/FreshRSS/pull/1913
-rw-r--r--CHANGELOG.md1
-rw-r--r--app/Models/Entry.php3
-rw-r--r--lib/lib_rss.php42
3 files changed, 42 insertions, 4 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f1ad1b0e..933c831c6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,6 +45,7 @@
* Updated German [#1856](https://github.com/FreshRSS/FreshRSS/pull/1856)
* Updated Dutch [#1903](https://github.com/FreshRSS/FreshRSS/pull/1903)
* Misc.
+ * Use cURL for fetching full articles content [#1870](https://github.com/FreshRSS/FreshRSS/issues/1870)
* Add error log information when SQLite has not enough temp space [#1816](https://github.com/FreshRSS/FreshRSS/issues/1816)
* Allow extension dir to be a symlink [#1911](https://github.com/FreshRSS/FreshRSS/pull/1911)
diff --git a/app/Models/Entry.php b/app/Models/Entry.php
index 0ad3781e5..c6b26a7cc 100644
--- a/app/Models/Entry.php
+++ b/app/Models/Entry.php
@@ -193,7 +193,8 @@ class FreshRSS_Entry extends Minz_Model {
try {
// l'article n'est pas en BDD, on va le chercher sur le site
$this->content = get_content_by_parsing(
- htmlspecialchars_decode($this->link(), ENT_QUOTES), $pathEntries
+ htmlspecialchars_decode($this->link(), ENT_QUOTES), $pathEntries,
+ $this->feed->attributes()
);
} catch (Exception $e) {
// rien à faire, on garde l'ancien contenu(requête a échoué)
diff --git a/lib/lib_rss.php b/lib/lib_rss.php
index 9dfca385d..abb20f16a 100644
--- a/lib/lib_rss.php
+++ b/lib/lib_rss.php
@@ -254,11 +254,47 @@ function sanitizeHTML($data, $base = '') {
}
/* permet de récupérer le contenu d'un article pour un flux qui n'est pas complet */
-function get_content_by_parsing ($url, $path) {
+function get_content_by_parsing($url, $path, $attributes = array()) {
require_once(LIB_PATH . '/lib_phpQuery.php');
+ $system_conf = Minz_Configuration::get('system');
+ $limits = $system_conf->limits;
+ $feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']);
+
+ if ($system_conf->simplepie_syslog_enabled) {
+ syslog(LOG_INFO, 'FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url));
+ }
+
+ $ch = curl_init();
+ curl_setopt_array($ch, array(
+ CURLOPT_URL => $url,
+ CURLOPT_REFERER => SimplePie_Misc::url_remove_credentials($url),
+ CURLOPT_HTTPHEADER => array('Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
+ CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
+ CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
+ CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
+ //CURLOPT_FAILONERROR => true;
+ CURLOPT_MAXREDIRS => 4,
+ CURLOPT_RETURNTRANSFER => true,
+ ));
+ if (version_compare(PHP_VERSION, '5.6.0') >= 0 || ini_get('open_basedir') == '') {
+ curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //Keep option separated for open_basedir PHP bug 65646
+ }
+ if (defined('CURLOPT_ENCODING')) {
+ curl_setopt($ch, CURLOPT_ENCODING, ''); //Enable all encodings
+ }
+ curl_setopt_array($ch, $system_conf->curl_options);
+ if (isset($attributes['ssl_verify'])) {
+ curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, $attributes['ssl_verify'] ? 2 : 0);
+ curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, $attributes['ssl_verify'] ? true : false);
+ }
+ $html = curl_exec($ch);
+ $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+ $c_error = curl_error($ch);
+ curl_close($ch);
- Minz_Log::notice('FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url));
- $html = file_get_contents($url);
+ if ($c_status != 200 || $c_error != '') {
+ Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url);
+ }
if ($html) {
$doc = phpQuery::newDocument($html);