aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2025-07-31 09:17:42 +0200
committerGravatar GitHub <noreply@github.com> 2025-07-31 09:17:42 +0200
commit7a0c423357818b19eb431775452b1357bc7fd3eb (patch)
tree5afd0d95b1af8a5262a305467951449c2a645197 /lib
parente33ef74af9ff2f8ba1c6909b78ee07633cff240a (diff)
Implement support for HTTP 429 Too Many Requests (#7760)
* Implement support for HTTP 429 Too Many Requests Will obey the corresponding HTTP `Retry-After` header at domain level. * Implement 503 Service Unavailable * Sanitize Retry-After * Reduce default value when Retry-After is absent And make configuration parameter * Retry-After also for favicons
Diffstat (limited to 'lib')
-rw-r--r--lib/favicons.php49
-rw-r--r--lib/lib_rss.php30
-rw-r--r--lib/simplepie/simplepie/src/File.php21
-rw-r--r--lib/simplepie/simplepie/src/Misc.php2
4 files changed, 78 insertions, 24 deletions
diff --git a/lib/favicons.php b/lib/favicons.php
index 7b9cbc6ba..e0baf542b 100644
--- a/lib/favicons.php
+++ b/lib/favicons.php
@@ -24,15 +24,24 @@ function isImgMime(string $content): bool {
/** @param array<int,int|bool|string> $curlOptions */
function downloadHttp(string &$url, array $curlOptions = []): string {
+ if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url)) > 0) {
+ Minz_Log::warning('For that domain, will first retry favicon after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url));
+ return '';
+ }
+
syslog(LOG_INFO, 'FreshRSS Favicon GET ' . $url);
$url2 = checkUrl($url);
if ($url2 == false) {
return '';
}
$url = $url2;
- /** @var CurlHandle $ch */
+
$ch = curl_init($url);
+ if ($ch === false) {
+ return '';
+ }
curl_setopt_array($ch, [
+ CURLOPT_HEADER => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => 15,
CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
@@ -50,18 +59,37 @@ function downloadHttp(string &$url, array $curlOptions = []): string {
curl_setopt_array($ch, $curlOptions);
$response = curl_exec($ch);
- if (!is_string($response)) {
- $response = '';
- }
- $info = curl_getinfo($ch);
+ $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+ $c_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
curl_close($ch);
- if (!empty($info['url'])) {
- $url2 = checkUrl($info['url']);
- if ($url2 != false) {
- $url = $url2; //Possible redirect
+
+ $parser = new \SimplePie\HTTP\Parser(is_string($response) ? $response : '');
+ if ($parser->parse()) {
+ $headers = $parser->headers;
+ $body = $parser->body;
+ } else {
+ $headers = [];
+ $body = false;
+ }
+
+ if (in_array($c_status, [429, 503], true)) {
+ $retryAfter = FreshRSS_http_Util::setRetryAfter($url, $headers['retry-after'] ?? '');
+ if ($c_status === 429) {
+ $errorMessage = 'HTTP 429 Too Many Requests! Searching favicon [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
+ } elseif ($c_status === 503) {
+ $errorMessage = 'HTTP 503 Service Unavailable! Searching favicon [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
+ }
+ if ($retryAfter > 0) {
+ $errorMessage .= ' We may retry after ' . date('c', $retryAfter);
}
}
- return is_array($info) && $info['http_code'] == 200 ? $response : '';
+
+ $url2 = checkUrl($c_effective_url);
+ if ($url2 != false) {
+ $url = $url2; //Possible redirect
+ }
+
+ return $c_status === 200 && is_string($body) ? $body : '';
}
function searchFavicon(string &$url): string {
@@ -75,7 +103,6 @@ function searchFavicon(string &$url): string {
$xpath = new DOMXPath($dom);
$links = $xpath->query('//link[@href][translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="shortcut icon"'
. ' or translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="icon"]');
-
if (!($links instanceof DOMNodeList)) {
return '';
}
diff --git a/lib/lib_rss.php b/lib/lib_rss.php
index 0786eb949..9fe884cc2 100644
--- a/lib/lib_rss.php
+++ b/lib/lib_rss.php
@@ -564,6 +564,11 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
cleanCache(CLEANCACHE_HOURS);
}
+ if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url)) > 0) {
+ Minz_Log::warning('For that domain, will first retry after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url));
+ return ['body' => '', 'effective_url' => $url, 'redirect_count' => 0, 'fail' => true];
+ }
+
if (FreshRSS_Context::systemConf()->simplepie_syslog_enabled) {
syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url));
}
@@ -597,6 +602,7 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
CURLOPT_MAXREDIRS => 4,
+ CURLOPT_HEADER => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => '', //Enable all encodings
@@ -630,7 +636,7 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
curl_setopt_array($ch, $curl_options);
- $body = curl_exec($ch);
+ $response = curl_exec($ch);
$c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$c_content_type = '' . curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
$c_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
@@ -638,10 +644,30 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
$c_error = curl_error($ch);
curl_close($ch);
+ $parser = new \SimplePie\HTTP\Parser(is_string($response) ? $response : '');
+ if ($parser->parse()) {
+ $headers = $parser->headers;
+ $body = $parser->body;
+ } else {
+ $headers = [];
+ $body = false;
+ }
+
$fail = $c_status != 200 || $c_error != '' || $body === false;
if ($fail) {
- Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url);
$body = '';
+ Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url);
+ if (in_array($c_status, [429, 503], true)) {
+ $retryAfter = FreshRSS_http_Util::setRetryAfter($url, $headers['retry-after'] ?? '');
+ if ($c_status === 429) {
+ $errorMessage = 'HTTP 429 Too Many Requests! [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
+ } elseif ($c_status === 503) {
+ $errorMessage = 'HTTP 503 Service Unavailable! [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
+ }
+ if ($retryAfter > 0) {
+ $errorMessage .= ' We may retry after ' . date('c', $retryAfter);
+ }
+ }
// TODO: Implement HTTP 410 Gone
} elseif (!is_string($body) || strlen($body) === 0) {
$body = '';
diff --git a/lib/simplepie/simplepie/src/File.php b/lib/simplepie/simplepie/src/File.php
index dcc4a690d..93c943624 100644
--- a/lib/simplepie/simplepie/src/File.php
+++ b/lib/simplepie/simplepie/src/File.php
@@ -127,7 +127,7 @@ class File implements Response
curl_setopt($fp, CURLOPT_URL, $url);
curl_setopt($fp, CURLOPT_HEADER, 1);
curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($fp, CURLOPT_FAILONERROR, 1);
+ // curl_setopt($fp, CURLOPT_FAILONERROR, 1); // FreshRSS removed to retrieve headers even on HTTP errors
curl_setopt($fp, CURLOPT_TIMEOUT, $timeout);
curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout);
// curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url)); // FreshRSS removed
@@ -141,7 +141,7 @@ class File implements Response
if (curl_errno($fp) === 23 || curl_errno($fp) === 61) {
$this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); // FreshRSS
$this->status_code = curl_getinfo($fp, CURLINFO_HTTP_CODE); // FreshRSS
- $this->on_http_response();
+ $this->on_http_response($responseHeaders);
$this->error = null; // FreshRSS
curl_setopt($fp, CURLOPT_ENCODING, 'none');
$responseHeaders = curl_exec($fp);
@@ -150,9 +150,9 @@ class File implements Response
if (curl_errno($fp)) {
$this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp);
$this->success = false;
- $this->on_http_response();
+ $this->on_http_response($responseHeaders);
} else {
- $this->on_http_response();
+ $this->on_http_response($responseHeaders);
// Use the updated url provided by curl_getinfo after any redirects.
if ($info = curl_getinfo($fp)) {
$this->url = $info['url'];
@@ -188,7 +188,7 @@ class File implements Response
if (!$fp) {
$this->error = 'fsockopen error: ' . $errstr;
$this->success = false;
- $this->on_http_response();
+ $this->on_http_response(false);
} else {
stream_set_timeout($fp, $timeout);
if (isset($url_parts['path'])) {
@@ -229,7 +229,7 @@ class File implements Response
$this->set_headers($parser->headers);
$this->body = $parser->body;
$this->status_code = $parser->status_code;
- $this->on_http_response();
+ $this->on_http_response($responseHeaders);
if ((in_array($this->status_code, [300, 301, 302, 303, 307]) || $this->status_code > 307 && $this->status_code < 400) && ($locationHeader = $this->get_header_line('location')) !== '' && $this->redirects < $redirects) {
$this->redirects++;
$location = \SimplePie\Misc::absolutize_url($locationHeader, $url);
@@ -271,12 +271,12 @@ class File implements Response
} else {
$this->error = 'Could not parse'; // FreshRSS
$this->success = false; // FreshRSS
- $this->on_http_response();
+ $this->on_http_response($responseHeaders);
}
} else {
$this->error = 'fsocket timed out';
$this->success = false;
- $this->on_http_response();
+ $this->on_http_response($responseHeaders);
}
fclose($fp);
}
@@ -291,7 +291,7 @@ class File implements Response
$this->body = $filebody;
$this->status_code = 200;
}
- $this->on_http_response();
+ $this->on_http_response($filebody);
}
if ($this->success) {
// (Leading) whitespace may cause XML parsing errors so we trim it,
@@ -303,9 +303,10 @@ class File implements Response
/**
* Event to allow inheriting classes to e.g. log the HTTP responses.
* Triggered just after an HTTP response is received.
+ * @param string|false $response The raw HTTP response headers and body, or false in case of failure (as returned by curl_exec()).
* FreshRSS.
*/
- protected function on_http_response(): void
+ protected function on_http_response(string|false $response): void
{
}
diff --git a/lib/simplepie/simplepie/src/Misc.php b/lib/simplepie/simplepie/src/Misc.php
index 2376a8dcc..42885db5c 100644
--- a/lib/simplepie/simplepie/src/Misc.php
+++ b/lib/simplepie/simplepie/src/Misc.php
@@ -1737,7 +1737,7 @@ class Misc
}
/**
- * @return int|bool
+ * @return int|false
*/
public static function parse_date(string $dt)
{