From a18c35046daee15e7ac5f85db290d54541a03e3c Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Tue, 11 Nov 2025 08:17:12 +0100 Subject: Housekeeping lib_rss.php (#8193) * Housekeeping lib_rss.php `lib_rss.php` had become much too large, especially after https://github.com/FreshRSS/FreshRSS/pull/7924 Moved most functions to other places. Mostly no change of code otherwise (see comments). * Extension: composer run-script phpstan-third-party --- app/Utils/feverUtil.php | 2 +- app/Utils/httpUtil.php | 417 +++++++++++++++++++++++++++++++++++++++++++++ app/Utils/passwordUtil.php | 5 + 3 files changed, 423 insertions(+), 1 deletion(-) (limited to 'app/Utils') diff --git a/app/Utils/feverUtil.php b/app/Utils/feverUtil.php index 8b06568dd..ff6c4f14f 100644 --- a/app/Utils/feverUtil.php +++ b/app/Utils/feverUtil.php @@ -62,7 +62,7 @@ class FreshRSS_fever_Util { * @throws FreshRSS_Context_Exception */ public static function deleteKey(string $username): bool { - $userConfig = get_user_configuration($username); + $userConfig = FreshRSS_UserConfiguration::getForUser($username); if ($userConfig === null) { return false; } diff --git a/app/Utils/httpUtil.php b/app/Utils/httpUtil.php index 6e176a0d3..ffba3712a 100644 --- a/app/Utils/httpUtil.php +++ b/app/Utils/httpUtil.php @@ -83,4 +83,421 @@ final class FreshRSS_http_Util { } return $retryAfter; } + + /** + * @param array $curl_params + * @return array + */ + public static function sanitizeCurlParams(array $curl_params): array { + $safe_params = [ + CURLOPT_COOKIE, + CURLOPT_COOKIEFILE, + CURLOPT_FOLLOWLOCATION, + CURLOPT_HTTPHEADER, + CURLOPT_MAXREDIRS, + CURLOPT_POST, + CURLOPT_POSTFIELDS, + CURLOPT_PROXY, + CURLOPT_PROXYTYPE, + CURLOPT_USERAGENT, + ]; + foreach ($curl_params as $k => $_) { + if (!in_array($k, $safe_params, true)) { + unset($curl_params[$k]); + continue; + } + // Allow only an empty value just to enable the libcurl cookie engine + if ($k === CURLOPT_COOKIEFILE) { + $curl_params[$k] = ''; + } + } + return $curl_params; + } + + private static function idn_to_puny(string $url): string { + if (function_exists('idn_to_ascii')) { + $idn = parse_url($url, PHP_URL_HOST); + if (is_string($idn) && $idn != '') { + $puny = idn_to_ascii($idn); + $pos = strpos($url, $idn); + if ($puny != false && $pos !== false) { + $url = substr_replace($url, $puny, $pos, strlen($idn)); + } + } + } + return $url; + } + + public static function checkUrl(string $url, bool $fixScheme = true): string|false { + $url = trim($url); + if ($url == '') { + return ''; + } + if ($fixScheme && preg_match('#^https?://#i', $url) !== 1) { + $url = 'https://' . ltrim($url, '/'); + } + + $url = self::idn_to_puny($url); // https://bugs.php.net/bug.php?id=53474 + $urlRelaxed = str_replace('_', 'z', $url); //PHP discussion #64948 Underscore + + if (is_string(filter_var($urlRelaxed, FILTER_VALIDATE_URL))) { + return $url; + } else { + return false; + } + } + + /** + * Remove the charset meta information of an HTML document, e.g.: + * `` + * `` + */ + private static function stripHtmlMetaCharset(string $html): string { + return preg_replace('/]*charset\s*=\s*[^>]+>/i', '', $html, 1) ?? ''; + } + + /** + * Set an XML preamble to enforce the HTML content type charset received by HTTP. + * @param string $html the raw downloaded HTML content + * @param string $contentType an HTTP Content-Type such as 'text/html; charset=utf-8' + * @return string an HTML string with XML encoding information for DOMDocument::loadHTML() + */ + private static function enforceHttpEncoding(string $html, string $contentType = ''): string { + $httpCharset = preg_match('/\bcharset=([0-9a-z_-]{2,12})$/i', $contentType, $matches) === 1 ? $matches[1] : ''; + if ($httpCharset == '') { + // No charset defined by HTTP + if (preg_match('/]*charset\s*=[\s\'"]*UTF-?8\b/i', substr($html, 0, 2048))) { + // Detect UTF-8 even if declared too deep in HTML for DOMDocument + $httpCharset = 'UTF-8'; + } else { + // Do nothing + return $html; + } + } + $httpCharsetNormalized = \SimplePie\Misc::encoding($httpCharset); + if (in_array($httpCharsetNormalized, ['windows-1252', 'US-ASCII'], true)) { + // Default charset for HTTP, do nothing + return $html; + } + if (substr($html, 0, 3) === "\xEF\xBB\xBF" || // UTF-8 BOM + substr($html, 0, 2) === "\xFF\xFE" || // UTF-16 Little Endian BOM + substr($html, 0, 2) === "\xFE\xFF" || // UTF-16 Big Endian BOM + substr($html, 0, 4) === "\xFF\xFE\x00\x00" || // UTF-32 Little Endian BOM + substr($html, 0, 4) === "\x00\x00\xFE\xFF") { // UTF-32 Big Endian BOM + // Existing byte order mark, do nothing + return $html; + } + if (preg_match('/^<[?]xml[^>]+encoding\b/', substr($html, 0, 64))) { + // Existing XML declaration, do nothing + return $html; + } + if ($httpCharsetNormalized !== 'UTF-8') { + // Try to change encoding to UTF-8 using mbstring or iconv or intl + $utf8 = \SimplePie\Misc::change_encoding($html, $httpCharsetNormalized, 'UTF-8'); + if (is_string($utf8)) { + $html = self::stripHtmlMetaCharset($utf8); + $httpCharsetNormalized = 'UTF-8'; + } + } + if ($httpCharsetNormalized === 'UTF-8') { + // Save encoding information as XML declaration + return '<' . '?xml version="1.0" encoding="' . $httpCharsetNormalized . '" ?' . ">\n" . $html; + } + // Give up + return $html; + } + + /** + * Set an HTML base URL to the HTML content if there is none. + * @param string $html the raw downloaded HTML content + * @param string $href the HTML base URL + * @return string an HTML string + */ + private static function enforceHtmlBase(string $html, string $href): string { + $doc = new DOMDocument(); + $doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING); + if ($doc->documentElement === null) { + return ''; + } + $xpath = new DOMXPath($doc); + $bases = $xpath->evaluate('//base'); + if (!($bases instanceof DOMNodeList) || $bases->length === 0) { + $base = $doc->createElement('base'); + if ($base === false) { + return $html; + } + $base->setAttribute('href', $href); + $head = null; + $heads = $xpath->evaluate('//head'); + if ($heads instanceof DOMNodeList && $heads->length > 0) { + $head = $heads->item(0); + } + if ($head instanceof DOMElement) { + $head->insertBefore($base, $head->firstChild); + } else { + $doc->documentElement->insertBefore($base, $doc->documentElement->firstChild); + } + } + return $doc->saveHTML() ?: $html; + } + + /** + * @param non-empty-string $url + * @param string $type {html,ico,json,opml,xml} + * @param array $attributes + * @param array $curl_options + * @return array{body:string,effective_url:string,redirect_count:int,fail:bool} + */ + public static function httpGet(string $url, string $cachePath, string $type = 'html', array $attributes = [], array $curl_options = []): array { + $limits = FreshRSS_Context::systemConf()->limits; + $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : intval($attributes['timeout']); + + $cacheMtime = @filemtime($cachePath); + if ($cacheMtime !== false && $cacheMtime > time() - intval($limits['cache_duration'])) { + $body = @file_get_contents($cachePath); + if ($body != false) { + syslog(LOG_DEBUG, 'FreshRSS uses cache for ' . \SimplePie\Misc::url_remove_credentials($url)); + return ['body' => $body, 'effective_url' => $url, 'redirect_count' => 0, 'fail' => false]; + } + } + + if (rand(0, 30) === 1) { // Remove old cache once in a while + cleanCache(CLEANCACHE_HOURS); + } + + $options = []; + $accept = ''; + $proxy = is_string(FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] ?? null) ? FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] : ''; + if (is_array($attributes['curl_params'] ?? null)) { + $options = self::sanitizeCurlParams($attributes['curl_params']); + $proxy = is_string($options[CURLOPT_PROXY]) ? $options[CURLOPT_PROXY] : ''; + if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) { + // Remove headers problematic for security + $options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER], + fn($header) => is_string($header) && !preg_match('/^(Remote-User|X-WebAuth-User)\\s*:/i', $header)); + // Add Accept header if it is not set + if (preg_grep('/^Accept\\s*:/i', $options[CURLOPT_HTTPHEADER]) === false) { + $options[CURLOPT_HTTPHEADER][] = 'Accept: ' . $accept; + } + } + } + + if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url, $proxy)) > 0) { + Minz_Log::warning('For that domain, will first retry after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url)); + return ['body' => '', 'effective_url' => $url, 'redirect_count' => 0, 'fail' => true]; + } + + if (FreshRSS_Context::systemConf()->simplepie_syslog_enabled) { + syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url)); + } + + switch ($type) { + case 'json': + $accept = 'application/json,application/feed+json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7'; + break; + case 'opml': + $accept = 'text/x-opml,text/xml;q=0.9,application/xml;q=0.9,*/*;q=0.8'; + break; + case 'xml': + $accept = 'application/xml,application/xhtml+xml,text/xml;q=0.9,*/*;q=0.8'; + break; + case 'ico': + $accept = 'image/x-icon,image/vnd.microsoft.icon,image/ico,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.1'; + break; + case 'html': + default: + $accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'; + break; + } + + // TODO: Implement HTTP 1.1 conditional GET If-Modified-Since + $ch = curl_init(); + if ($ch === false) { + return ['body' => '', 'effective_url' => '', 'redirect_count' => 0, 'fail' => true]; + } + curl_setopt_array($ch, [ + CURLOPT_URL => $url, + CURLOPT_HTTPHEADER => ['Accept: ' . $accept], + CURLOPT_USERAGENT => FRESHRSS_USERAGENT, + CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], + CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], + CURLOPT_MAXREDIRS => 4, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_ENCODING => '', //Enable all encodings + //CURLOPT_VERBOSE => 1, // To debug sent HTTP headers + ]); + + curl_setopt_array($ch, $options); + curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options); + + $responseHeaders = ''; + curl_setopt($ch, CURLOPT_HEADERFUNCTION, function (\CurlHandle $ch, string $header) use (&$responseHeaders) { + if (trim($header) !== '') { // Skip e.g. separation with trailer headers + $responseHeaders .= $header; + } + return strlen($header); + }); + + if (isset($attributes['ssl_verify'])) { + curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, empty($attributes['ssl_verify']) ? 0 : 2); + curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (bool)$attributes['ssl_verify']); + if (empty($attributes['ssl_verify'])) { + curl_setopt($ch, CURLOPT_SSL_CIPHER_LIST, 'DEFAULT@SECLEVEL=1'); + } + } + + curl_setopt_array($ch, $curl_options); + + $body = curl_exec($ch); + $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $c_content_type = '' . curl_getinfo($ch, CURLINFO_CONTENT_TYPE); + $c_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); + $c_redirect_count = curl_getinfo($ch, CURLINFO_REDIRECT_COUNT); + $c_error = curl_error($ch); + + $headers = []; + if ($body !== false) { + assert($c_redirect_count >= 0); + $responseHeaders = \SimplePie\HTTP\Parser::prepareHeaders($responseHeaders, $c_redirect_count + 1); + $parser = new \SimplePie\HTTP\Parser($responseHeaders); + if ($parser->parse()) { + $headers = $parser->headers; + } + } + + $fail = $c_status != 200 || $c_error != '' || $body === false; + if ($fail) { + $body = ''; + Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url); + if (in_array($c_status, [429, 503], true)) { + $retryAfter = FreshRSS_http_Util::setRetryAfter($url, $proxy, $headers['retry-after'] ?? ''); + if ($c_status === 429) { + $errorMessage = 'HTTP 429 Too Many Requests! [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; + } elseif ($c_status === 503) { + $errorMessage = 'HTTP 503 Service Unavailable! [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; + } + if ($retryAfter > 0) { + $errorMessage .= ' We may retry after ' . date('c', $retryAfter); + } + } + // TODO: Implement HTTP 410 Gone + } elseif (!is_string($body) || strlen($body) === 0) { + $body = ''; + } else { + if (in_array($type, ['html', 'json', 'opml', 'xml'], true)) { + $body = trim($body, " \n\r\t\v"); // Do not trim \x00 to avoid breaking a BOM + } + if (in_array($type, ['html', 'xml', 'opml'], true)) { + $body = self::enforceHttpEncoding($body, $c_content_type); + } + if (in_array($type, ['html'], true)) { + $body = self::enforceHtmlBase($body, $c_effective_url); + } + } + + if (file_put_contents($cachePath, $body) === false) { + Minz_Log::warning("Error saving cache $cachePath for $url"); + } + + return ['body' => $body, 'effective_url' => $c_effective_url, 'redirect_count' => $c_redirect_count, 'fail' => $fail]; + } + + /** + * Converts an IP (v4 or v6) to a binary representation using inet_pton + * + * @param string $ip the IP to convert + * @return string a binary representation of the specified IP + */ + private static function ipToBits(string $ip): string { + $binaryip = ''; + foreach (str_split(inet_pton($ip) ?: '') as $char) { + $binaryip .= str_pad(decbin(ord($char)), 8, '0', STR_PAD_LEFT); + } + return $binaryip; + } + + /** + * Check if an ip belongs to the provided range (in CIDR format) + * + * @param string $ip the IP that we want to verify (ex: 192.168.16.1) + * @param string $range the range to check against (ex: 192.168.16.0/24) + * @return bool true if the IP is in the range, otherwise false + */ + private static function checkCIDR(string $ip, string $range): bool { + $binary_ip = self::ipToBits($ip); + $split = explode('/', $range); + + $subnet = $split[0] ?? ''; + if ($subnet == '') { + return false; + } + $binary_subnet = self::ipToBits($subnet); + + $mask_bits = $split[1] ?? ''; + $mask_bits = (int)$mask_bits; + if ($mask_bits === 0) { + $mask_bits = null; + } + + $ip_net_bits = substr($binary_ip, 0, $mask_bits); + $subnet_bits = substr($binary_subnet, 0, $mask_bits); + return $ip_net_bits === $subnet_bits; + } + + /** + * Check if the client (e.g. last proxy) is allowed to send unsafe headers. + * This uses the `TRUSTED_PROXY` environment variable or the `trusted_sources` configuration option to get an array of the authorized ranges, + * The connection IP is obtained from the `CONN_REMOTE_ADDR` + * (if available, to be robust even when using Apache mod_remoteip) or `REMOTE_ADDR` environment variables. + * @return bool true if the sender’s IP is in one of the ranges defined in the configuration, else false + */ + public static function checkTrustedIP(): bool { + if (!FreshRSS_Context::hasSystemConf()) { + return false; + } + $remoteIp = Minz_Request::connectionRemoteAddress(); + if ($remoteIp === '') { + return false; + } + $trusted = getenv('TRUSTED_PROXY'); + if ($trusted != 0 && is_string($trusted)) { + $trusted = preg_split('/\s+/', $trusted, -1, PREG_SPLIT_NO_EMPTY); + } + if (!is_array($trusted) || empty($trusted)) { + $trusted = FreshRSS_Context::systemConf()->trusted_sources; + } + foreach ($trusted as $cidr) { + if (self::checkCIDR($remoteIp, $cidr)) { + return true; + } + } + return false; + } + + public static function httpAuthUser(bool $onlyTrusted = true): string { + $auths = array_unique( + array_intersect_key($_SERVER, ['REMOTE_USER' => '', 'REDIRECT_REMOTE_USER' => '', 'HTTP_REMOTE_USER' => '', 'HTTP_X_WEBAUTH_USER' => '']) + ); + if (count($auths) > 1) { + Minz_Log::warning('Multiple HTTP authentication headers!'); + return ''; + } + + if (!empty($_SERVER['REMOTE_USER']) && is_string($_SERVER['REMOTE_USER'])) { + return $_SERVER['REMOTE_USER']; + } + if (!empty($_SERVER['REDIRECT_REMOTE_USER']) && is_string($_SERVER['REDIRECT_REMOTE_USER'])) { + return $_SERVER['REDIRECT_REMOTE_USER']; + } + if (!$onlyTrusted || self::checkTrustedIP()) { + if (!empty($_SERVER['HTTP_REMOTE_USER']) && is_string($_SERVER['HTTP_REMOTE_USER'])) { + return $_SERVER['HTTP_REMOTE_USER']; + } + if (!empty($_SERVER['HTTP_X_WEBAUTH_USER']) && is_string($_SERVER['HTTP_X_WEBAUTH_USER'])) { + return $_SERVER['HTTP_X_WEBAUTH_USER']; + } + } + return ''; + } } diff --git a/app/Utils/passwordUtil.php b/app/Utils/passwordUtil.php index 76c5455c5..d522abac0 100644 --- a/app/Utils/passwordUtil.php +++ b/app/Utils/passwordUtil.php @@ -28,4 +28,9 @@ class FreshRSS_password_Util { public static function check(string $password): bool { return strlen($password) >= 7; } + + public static function cryptAvailable(): bool { + $hash = '$2y$04$usesomesillystringfore7hnbRJHxXVLeakoG8K30oukPsA.ztMG'; + return $hash === @crypt('password', $hash); + } } -- cgit v1.2.3