diff options
| author | 2025-07-31 09:17:42 +0200 | |
|---|---|---|
| committer | 2025-07-31 09:17:42 +0200 | |
| commit | 7a0c423357818b19eb431775452b1357bc7fd3eb (patch) | |
| tree | 5afd0d95b1af8a5262a305467951449c2a645197 /lib | |
| parent | e33ef74af9ff2f8ba1c6909b78ee07633cff240a (diff) | |
Implement support for HTTP 429 Too Many Requests (#7760)
* Implement support for HTTP 429 Too Many Requests
Will obey the corresponding HTTP `Retry-After` header at domain level.
* Implement 503 Service Unavailable
* Sanitize Retry-After
* Reduce default value when Retry-After is absent
And make configuration parameter
* Retry-After also for favicons
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/favicons.php | 49 | ||||
| -rw-r--r-- | lib/lib_rss.php | 30 | ||||
| -rw-r--r-- | lib/simplepie/simplepie/src/File.php | 21 | ||||
| -rw-r--r-- | lib/simplepie/simplepie/src/Misc.php | 2 |
4 files changed, 78 insertions, 24 deletions
diff --git a/lib/favicons.php b/lib/favicons.php index 7b9cbc6ba..e0baf542b 100644 --- a/lib/favicons.php +++ b/lib/favicons.php @@ -24,15 +24,24 @@ function isImgMime(string $content): bool { /** @param array<int,int|bool|string> $curlOptions */ function downloadHttp(string &$url, array $curlOptions = []): string { + if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url)) > 0) { + Minz_Log::warning('For that domain, will first retry favicon after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url)); + return ''; + } + syslog(LOG_INFO, 'FreshRSS Favicon GET ' . $url); $url2 = checkUrl($url); if ($url2 == false) { return ''; } $url = $url2; - /** @var CurlHandle $ch */ + $ch = curl_init($url); + if ($ch === false) { + return ''; + } curl_setopt_array($ch, [ + CURLOPT_HEADER => true, CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_USERAGENT => FRESHRSS_USERAGENT, @@ -50,18 +59,37 @@ function downloadHttp(string &$url, array $curlOptions = []): string { curl_setopt_array($ch, $curlOptions); $response = curl_exec($ch); - if (!is_string($response)) { - $response = ''; - } - $info = curl_getinfo($ch); + $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $c_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); curl_close($ch); - if (!empty($info['url'])) { - $url2 = checkUrl($info['url']); - if ($url2 != false) { - $url = $url2; //Possible redirect + + $parser = new \SimplePie\HTTP\Parser(is_string($response) ? $response : ''); + if ($parser->parse()) { + $headers = $parser->headers; + $body = $parser->body; + } else { + $headers = []; + $body = false; + } + + if (in_array($c_status, [429, 503], true)) { + $retryAfter = FreshRSS_http_Util::setRetryAfter($url, $headers['retry-after'] ?? ''); + if ($c_status === 429) { + $errorMessage = 'HTTP 429 Too Many Requests! Searching favicon [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; + } elseif ($c_status === 503) { + $errorMessage = 'HTTP 503 Service Unavailable! Searching favicon [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; + } + if ($retryAfter > 0) { + $errorMessage .= ' We may retry after ' . date('c', $retryAfter); } } - return is_array($info) && $info['http_code'] == 200 ? $response : ''; + + $url2 = checkUrl($c_effective_url); + if ($url2 != false) { + $url = $url2; //Possible redirect + } + + return $c_status === 200 && is_string($body) ? $body : ''; } function searchFavicon(string &$url): string { @@ -75,7 +103,6 @@ function searchFavicon(string &$url): string { $xpath = new DOMXPath($dom); $links = $xpath->query('//link[@href][translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="shortcut icon"' . ' or translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="icon"]'); - if (!($links instanceof DOMNodeList)) { return ''; } diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 0786eb949..9fe884cc2 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -564,6 +564,11 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a cleanCache(CLEANCACHE_HOURS); } + if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url)) > 0) { + Minz_Log::warning('For that domain, will first retry after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url)); + return ['body' => '', 'effective_url' => $url, 'redirect_count' => 0, 'fail' => true]; + } + if (FreshRSS_Context::systemConf()->simplepie_syslog_enabled) { syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url)); } @@ -597,6 +602,7 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], CURLOPT_MAXREDIRS => 4, + CURLOPT_HEADER => true, CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => true, CURLOPT_ENCODING => '', //Enable all encodings @@ -630,7 +636,7 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a curl_setopt_array($ch, $curl_options); - $body = curl_exec($ch); + $response = curl_exec($ch); $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE); $c_content_type = '' . curl_getinfo($ch, CURLINFO_CONTENT_TYPE); $c_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); @@ -638,10 +644,30 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a $c_error = curl_error($ch); curl_close($ch); + $parser = new \SimplePie\HTTP\Parser(is_string($response) ? $response : ''); + if ($parser->parse()) { + $headers = $parser->headers; + $body = $parser->body; + } else { + $headers = []; + $body = false; + } + $fail = $c_status != 200 || $c_error != '' || $body === false; if ($fail) { - Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url); $body = ''; + Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url); + if (in_array($c_status, [429, 503], true)) { + $retryAfter = FreshRSS_http_Util::setRetryAfter($url, $headers['retry-after'] ?? ''); + if ($c_status === 429) { + $errorMessage = 'HTTP 429 Too Many Requests! [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; + } elseif ($c_status === 503) { + $errorMessage = 'HTTP 503 Service Unavailable! [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; + } + if ($retryAfter > 0) { + $errorMessage .= ' We may retry after ' . date('c', $retryAfter); + } + } // TODO: Implement HTTP 410 Gone } elseif (!is_string($body) || strlen($body) === 0) { $body = ''; diff --git a/lib/simplepie/simplepie/src/File.php b/lib/simplepie/simplepie/src/File.php index dcc4a690d..93c943624 100644 --- a/lib/simplepie/simplepie/src/File.php +++ b/lib/simplepie/simplepie/src/File.php @@ -127,7 +127,7 @@ class File implements Response curl_setopt($fp, CURLOPT_URL, $url); curl_setopt($fp, CURLOPT_HEADER, 1); curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($fp, CURLOPT_FAILONERROR, 1); + // curl_setopt($fp, CURLOPT_FAILONERROR, 1); // FreshRSS removed to retrieve headers even on HTTP errors curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); // curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url)); // FreshRSS removed @@ -141,7 +141,7 @@ class File implements Response if (curl_errno($fp) === 23 || curl_errno($fp) === 61) { $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); // FreshRSS $this->status_code = curl_getinfo($fp, CURLINFO_HTTP_CODE); // FreshRSS - $this->on_http_response(); + $this->on_http_response($responseHeaders); $this->error = null; // FreshRSS curl_setopt($fp, CURLOPT_ENCODING, 'none'); $responseHeaders = curl_exec($fp); @@ -150,9 +150,9 @@ class File implements Response if (curl_errno($fp)) { $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); $this->success = false; - $this->on_http_response(); + $this->on_http_response($responseHeaders); } else { - $this->on_http_response(); + $this->on_http_response($responseHeaders); // Use the updated url provided by curl_getinfo after any redirects. if ($info = curl_getinfo($fp)) { $this->url = $info['url']; @@ -188,7 +188,7 @@ class File implements Response if (!$fp) { $this->error = 'fsockopen error: ' . $errstr; $this->success = false; - $this->on_http_response(); + $this->on_http_response(false); } else { stream_set_timeout($fp, $timeout); if (isset($url_parts['path'])) { @@ -229,7 +229,7 @@ class File implements Response $this->set_headers($parser->headers); $this->body = $parser->body; $this->status_code = $parser->status_code; - $this->on_http_response(); + $this->on_http_response($responseHeaders); if ((in_array($this->status_code, [300, 301, 302, 303, 307]) || $this->status_code > 307 && $this->status_code < 400) && ($locationHeader = $this->get_header_line('location')) !== '' && $this->redirects < $redirects) { $this->redirects++; $location = \SimplePie\Misc::absolutize_url($locationHeader, $url); @@ -271,12 +271,12 @@ class File implements Response } else { $this->error = 'Could not parse'; // FreshRSS $this->success = false; // FreshRSS - $this->on_http_response(); + $this->on_http_response($responseHeaders); } } else { $this->error = 'fsocket timed out'; $this->success = false; - $this->on_http_response(); + $this->on_http_response($responseHeaders); } fclose($fp); } @@ -291,7 +291,7 @@ class File implements Response $this->body = $filebody; $this->status_code = 200; } - $this->on_http_response(); + $this->on_http_response($filebody); } if ($this->success) { // (Leading) whitespace may cause XML parsing errors so we trim it, @@ -303,9 +303,10 @@ class File implements Response /** * Event to allow inheriting classes to e.g. log the HTTP responses. * Triggered just after an HTTP response is received. + * @param string|false $response The raw HTTP response headers and body, or false in case of failure (as returned by curl_exec()). * FreshRSS. */ - protected function on_http_response(): void + protected function on_http_response(string|false $response): void { } diff --git a/lib/simplepie/simplepie/src/Misc.php b/lib/simplepie/simplepie/src/Misc.php index 2376a8dcc..42885db5c 100644 --- a/lib/simplepie/simplepie/src/Misc.php +++ b/lib/simplepie/simplepie/src/Misc.php @@ -1737,7 +1737,7 @@ class Misc } /** - * @return int|bool + * @return int|false */ public static function parse_date(string $dt) { |
