From ffc3d393e52e3c0d62e23dbd6fdc8fe695ca96ee Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Fri, 20 Sep 2024 23:25:38 +0200 Subject: SimplePie support for HTTP cache policies (#6812) * SimplePie support for HTTP cache policies Discussion in https://github.com/FreshRSS/simplepie/pull/26 * Bump SimplePie commit * Typos * Typos * Simpler logic * Explicitly disable cache for non-GET flows * Bump SimplePie commit * Bump SimplePie commit * Bump SimplePie commit * Bump SimplePie commit --- .typos.toml | 1 + app/Models/Feed.php | 1 + config.default.php | 8 ++- lib/composer.json | 2 +- lib/lib_rss.php | 3 +- lib/simplepie/simplepie/src/HTTP/Utils.php | 79 ++++++++++++++++++++++++++++++ lib/simplepie/simplepie/src/SimplePie.php | 73 ++++++++++++++++++++++----- p/api/pshb.php | 1 + 8 files changed, 152 insertions(+), 16 deletions(-) create mode 100644 lib/simplepie/simplepie/src/HTTP/Utils.php diff --git a/.typos.toml b/.typos.toml index d2d304eee..8437a68dd 100644 --- a/.typos.toml +++ b/.typos.toml @@ -38,6 +38,7 @@ extend-exclude = [ "app/i18n/zh-tw/", "bin/", "CHANGELOG-old.md", + "composer.json", "composer.lock", "data/", "docs/fr/", diff --git a/app/Models/Feed.php b/app/Models/Feed.php index 7b6fda7b6..b9afa9e83 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -612,6 +612,7 @@ class FreshRSS_Feed extends Minz_Model { */ private function simplePieFromContent(string $feedContent): \SimplePie\SimplePie { $simplePie = customSimplePie(); + $simplePie->enable_cache(false); $simplePie->set_raw_data($feedContent); $simplePie->init(); return $simplePie; diff --git a/config.default.php b/config.default.php index af9f45511..5d43d7d82 100644 --- a/config.default.php +++ b/config.default.php @@ -99,10 +99,14 @@ return array( # Duration in seconds of the login cookie. 'cookie_duration' => FreshRSS_Auth::DEFAULT_COOKIE_DURATION, - # Duration in seconds of the SimplePie cache, - # during which a query to the RSS feed will return the local cached version. + # Duration in seconds of the SimplePie cache, during which a query to the RSS feed will return the local cached version. # Especially important for multi-user setups. + # Might be overridden by HTTP response headers. 'cache_duration' => 800, + # Minimal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires`, + 'cache_duration_min' => 60, + # Maximal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires`, + 'cache_duration_max' => 86400, # SimplePie HTTP request timeout in seconds. 'timeout' => 20, diff --git a/lib/composer.json b/lib/composer.json index f7ed58363..67eb3518a 100644 --- a/lib/composer.json +++ b/lib/composer.json @@ -18,7 +18,7 @@ "marienfressinaud/lib_opml": "0.5.1", "phpgt/cssxpath": "dev-master#45f3ac151fc21d459e2515c3aff97cd4bf877bf8", "phpmailer/phpmailer": "6.9.1", - "simplepie/simplepie": "dev-freshrss#1cabd55aee050a0a665685d8ec700f1edd5c5160" + "simplepie/simplepie": "dev-freshrss#7090eedb1358d95c002282a645ef915d5ce55c56" }, "config": { "sort-packages": true, diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 2f1f4e3ef..32139d8c8 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -282,7 +282,7 @@ function customSimplePie(array $attributes = [], array $curl_options = []): \Sim $simplePie->set_useragent(FRESHRSS_USERAGENT); $simplePie->set_cache_name_function('sha1'); $simplePie->set_cache_location(CACHE_PATH); - $simplePie->set_cache_duration($limits['cache_duration']); + $simplePie->set_cache_duration($limits['cache_duration'], $limits['cache_duration_min'], $limits['cache_duration_max']); $simplePie->enable_order_by_date(false); $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : (int)$attributes['timeout']; @@ -369,6 +369,7 @@ function sanitizeHTML(string $data, string $base = '', ?int $maxLength = null): static $simplePie = null; if ($simplePie == null) { $simplePie = customSimplePie(); + $simplePie->enable_cache(false); $simplePie->init(); } $result = html_only_entity_decode($simplePie->sanitize->sanitize($data, \SimplePie\SimplePie::CONSTRUCT_HTML, $base)); diff --git a/lib/simplepie/simplepie/src/HTTP/Utils.php b/lib/simplepie/simplepie/src/HTTP/Utils.php new file mode 100644 index 000000000..b2e05e04b --- /dev/null +++ b/lib/simplepie/simplepie/src/HTTP/Utils.php @@ -0,0 +1,79 @@ + $http_headers HTTP headers of the response + * @return int|null The `max-age` value or `null` if not found + * + * FreshRSS + */ + public static function get_http_max_age(array $http_headers): ?int + { + $cache_control = $http_headers['cache-control'] ?? null; + if (is_string($cache_control) && preg_match('/\bmax-age=(\d+)\b/', $cache_control, $matches)) { + return (int) $matches[1]; + } + return null; + } + + /** + * Negotiate the cache expiration time based on the HTTP response headers. + * Return the cache duration time in number of seconds since the Unix Epoch, accounting for: + * - `Cache-Control: max-age` minus `Age`, bounded by `$cache_duration_min` and `$cache_duration_max` + * - `Cache-Control: must-revalidate` will set `$cache_duration` to `$cache_duration_min` + * - `Cache-Control: no-cache` will return `time() + $cache_duration_min` + * - `Cache-Control: no-store` will return `time() + $cache_duration_min - 3` + * - `Expires` like `Cache-Control: max-age` but only if it is absent + * + * @param array $http_headers HTTP headers of the response + * @param int $cache_duration Desired cache duration in seconds, potentially overridden by HTTP response headers + * @param int $cache_duration_min Minimal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires`, + * @param int $cache_duration_max Maximal cache duration (in seconds), overriding HTTP response headers `Cache-Control: max-age` and `Expires`, + * @return int The negotiated cache expiration time in seconds since the Unix Epoch + * + * FreshRSS + */ + public static function negociate_cache_expiration_time(array $http_headers, int $cache_duration, int $cache_duration_min, int $cache_duration_max): int + { + $cache_control = $http_headers['cache-control'] ?? ''; + if ($cache_control !== '') { + if (preg_match('/\bno-store\b/', $cache_control)) { + return time() + $cache_duration_min - 3; // -3 to distinguish from no-cache if needed + } + if (preg_match('/\bno-cache\b/', $cache_control)) { + return time() + $cache_duration_min; + } + if (preg_match('/\bmust-revalidate\b/', $cache_control)) { + $cache_duration = $cache_duration_min; + } + if (preg_match('/\bmax-age=(\d+)\b/', $cache_control, $matches)) { + $max_age = (int) $matches[1]; + $age = $http_headers['age'] ?? ''; + if (is_numeric($age)) { + $max_age -= (int) $age; + } + return time() + min(max($max_age, $cache_duration), $cache_duration_max); + } + } + $expires = $http_headers['expires'] ?? ''; + if ($expires !== '') { + $expire_date = \SimplePie\Misc::parse_date($expires); + if ($expire_date !== false) { + return min(max($expire_date, time() + $cache_duration), time() + $cache_duration_max); + } + } + return time() + $cache_duration; + } +} diff --git a/lib/simplepie/simplepie/src/SimplePie.php b/lib/simplepie/simplepie/src/SimplePie.php index cc4adc3df..455a90a2f 100644 --- a/lib/simplepie/simplepie/src/SimplePie.php +++ b/lib/simplepie/simplepie/src/SimplePie.php @@ -509,12 +509,28 @@ class SimplePie public $force_cache_fallback = false; /** - * @var int Cache duration (in seconds) + * @var int Cache duration (in seconds), but may be overridden by HTTP response headers (FreshRSS) * @see SimplePie::set_cache_duration() * @access private */ public $cache_duration = 3600; + /** + * @var int Minimal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires` + * @see SimplePie::set_cache_duration() + * @access private + * FreshRSS + */ + public $cache_duration_min = 60; + + /** + * @var int Maximal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires` + * @see SimplePie::set_cache_duration() + * @access private + * FreshRSS + */ + public $cache_duration_max = 86400; + /** * @var int Auto-discovery cache duration (in seconds) * @see SimplePie::set_autodiscovery_cache_duration() @@ -989,12 +1005,26 @@ class SimplePie * Set the length of time (in seconds) that the contents of a feed will be * cached * - * @param int $seconds The feed content cache duration + * FreshRSS: The cache is (partially) HTTP compliant, with the following rules: + * + * @param int $seconds The feed content cache duration, which may be overridden by HTTP response headers) + * @param int $min The minimum cache duration (default: 60s), overriding HTTP response headers `Cache-Control` and `Expires` + * @param int $max The maximum cache duration (default: 24h), overriding HTTP response headers `Cache-Control` and `Expires` * @return void */ - public function set_cache_duration(int $seconds = 3600) + public function set_cache_duration(int $seconds = 3600, ?int $min = null, ?int $max = null) { - $this->cache_duration = $seconds; + $this->cache_duration = max(0, $seconds); + if (is_int($min)) { // FreshRSS + $this->cache_duration_min = min(max(0, $min), $seconds); + } elseif ($this->cache_duration_min > $seconds) { + $this->cache_duration_min = $seconds; + } + if (is_int($max)) { // FreshRSS + $this->cache_duration_max = max($seconds, $max); + } elseif ($this->cache_duration_max < $seconds) { + $this->cache_duration_max = $seconds; + } } /** @@ -1851,7 +1881,7 @@ class SimplePie $this->data['hash'] = $this->data['hash'] ?? $this->clean_hash($this->raw_data); // FreshRSS // Cache the file if caching is enabled - $this->data['cache_expiration_time'] = $this->cache_duration + time(); + $this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max); if ($cache && !$cache->set_data($this->get_cache_filename($this->feed_url), $this->data, $this->cache_duration)) { trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); @@ -1972,8 +2002,10 @@ class SimplePie $this->status_code = 0; if ($this->force_cache_fallback) { - $this->data['cache_expiration_time'] = $this->cache_duration + time(); // FreshRSS - $cache->set_data($cacheKey, $this->data, $this->cache_duration); + $this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max); // FreshRSS + if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { // FreshRSS + trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); + } return true; } @@ -1986,13 +2018,28 @@ class SimplePie // is still valid. $this->raw_data = false; if (isset($file)) { // FreshRSS + $old_cache_control = $this->data['headers']['cache-control'] ?? ''; + $old_max_age = \SimplePie\HTTP\Utils::get_http_max_age($this->data['headers']); + // Update cache metadata - $this->data['cache_expiration_time'] = $this->cache_duration + time(); $this->data['headers'] = array_map(function (array $values): string { return implode(',', $values); }, $file->get_headers()); + + // Workaround for buggy servers returning wrong cache-control headers for 304 responses + if ($old_max_age !== null) { + $new_max_age = \SimplePie\HTTP\Utils::get_http_max_age($this->data['headers']); + if ($new_max_age === null || $new_max_age > $old_max_age) { + // Allow servers to return a shorter cache duration for 304 responses, but not longer + $this->data['headers']['cache-control'] = $old_cache_control; + } + } + + $this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max); + } + if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { // FreshRSS + trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); } - $cache->set_data($cacheKey, $this->data, $this->cache_duration); return true; } @@ -2001,11 +2048,13 @@ class SimplePie $hash = $this->clean_hash($file->get_body_content()); if (($this->data['hash'] ?? null) === $hash) { // Update cache metadata - $this->data['cache_expiration_time'] = $this->cache_duration + time(); $this->data['headers'] = array_map(function (array $values): string { return implode(',', $values); }, $file->get_headers()); - $cache->set_data($cacheKey, $this->data, $this->cache_duration); + $this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max); + if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { + trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); + } return true; // Content unchanged even though server did not send a 304 } else { @@ -2138,7 +2187,7 @@ class SimplePie 'url' => $this->feed_url, 'feed_url' => $file->get_final_requested_uri(), 'build' => Misc::get_build(), - 'cache_expiration_time' => $this->cache_duration + time(), + 'cache_expiration_time' => \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max), // FreshRSS 'cache_version' => self::CACHE_VERSION, // FreshRSS 'hash' => empty($hash) ? $this->clean_hash($file->get_body_content()) : $hash, // FreshRSS ]; diff --git a/p/api/pshb.php b/p/api/pshb.php index 8fcc5ab34..9d0b9fadc 100644 --- a/p/api/pshb.php +++ b/p/api/pshb.php @@ -98,6 +98,7 @@ if ($ORIGINAL_INPUT == '') { } $simplePie = customSimplePie(); +$simplePie->enable_cache(false); $simplePie->set_raw_data($ORIGINAL_INPUT); $simplePie->init(); unset($ORIGINAL_INPUT); -- cgit v1.2.3