diff options
Diffstat (limited to 'lib/lib_rss.php')
| -rw-r--r-- | lib/lib_rss.php | 899 |
1 files changed, 4 insertions, 895 deletions
diff --git a/lib/lib_rss.php b/lib/lib_rss.php index e7503ffe4..2fca6896f 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -145,39 +145,6 @@ function echoJson($json, int $optimisationDepth = -1): void { } } -function idn_to_puny(string $url): string { - if (function_exists('idn_to_ascii')) { - $idn = parse_url($url, PHP_URL_HOST); - if (is_string($idn) && $idn != '') { - $puny = idn_to_ascii($idn); - $pos = strpos($url, $idn); - if ($puny != false && $pos !== false) { - $url = substr_replace($url, $puny, $pos, strlen($idn)); - } - } - } - return $url; -} - -function checkUrl(string $url, bool $fixScheme = true): string|false { - $url = trim($url); - if ($url == '') { - return ''; - } - if ($fixScheme && preg_match('#^https?://#i', $url) !== 1) { - $url = 'https://' . ltrim($url, '/'); - } - - $url = idn_to_puny($url); // https://bugs.php.net/bug.php?id=53474 - $urlRelaxed = str_replace('_', 'z', $url); //PHP discussion #64948 Underscore - - if (is_string(filter_var($urlRelaxed, FILTER_VALIDATE_URL))) { - return $url; - } else { - return false; - } -} - function safe_ascii(?string $text): string { return $text === null ? '' : (filter_var($text, FILTER_DEFAULT, FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH) ?: ''); } @@ -290,319 +257,6 @@ function sensitive_log(array|string $log): array|string { return $log; } -/** - * @param array<mixed> $curl_params - * @return array<mixed> - */ -function sanitizeCurlParams(array $curl_params): array { - $safe_params = [ - CURLOPT_COOKIE, - CURLOPT_COOKIEFILE, - CURLOPT_FOLLOWLOCATION, - CURLOPT_HTTPHEADER, - CURLOPT_MAXREDIRS, - CURLOPT_POST, - CURLOPT_POSTFIELDS, - CURLOPT_PROXY, - CURLOPT_PROXYTYPE, - CURLOPT_USERAGENT, - ]; - foreach ($curl_params as $k => $_) { - if (!in_array($k, $safe_params, true)) { - unset($curl_params[$k]); - continue; - } - // Allow only an empty value just to enable the libcurl cookie engine - if ($k === CURLOPT_COOKIEFILE) { - $curl_params[$k] = ''; - } - } - return $curl_params; -} - -/** - * @param array<string,mixed> $attributes - * @param array<int,mixed> $curl_options - * @throws FreshRSS_Context_Exception - */ -function customSimplePie(array $attributes = [], array $curl_options = []): \SimplePie\SimplePie { - $limits = FreshRSS_Context::systemConf()->limits; - $simplePie = new \SimplePie\SimplePie(); - if (FreshRSS_Context::systemConf()->simplepie_syslog_enabled) { - $simplePie->get_registry()->register(\SimplePie\File::class, FreshRSS_SimplePieResponse::class); - } - $simplePie->set_useragent(FRESHRSS_USERAGENT); - $simplePie->set_cache_name_function('sha1'); - $simplePie->set_cache_location(CACHE_PATH); - $simplePie->set_cache_duration($limits['cache_duration'], $limits['cache_duration_min'], $limits['cache_duration_max']); - $simplePie->enable_order_by_date(false); - - $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : (int)$attributes['timeout']; - $simplePie->set_timeout($feed_timeout > 0 ? $feed_timeout : $limits['timeout']); - - $curl_options = array_replace(FreshRSS_Context::systemConf()->curl_options, $curl_options); - if (isset($attributes['ssl_verify'])) { - $curl_options[CURLOPT_SSL_VERIFYHOST] = empty($attributes['ssl_verify']) ? 0 : 2; - $curl_options[CURLOPT_SSL_VERIFYPEER] = (bool)$attributes['ssl_verify']; - if (empty($attributes['ssl_verify'])) { - $curl_options[CURLOPT_SSL_CIPHER_LIST] = 'DEFAULT@SECLEVEL=1'; - } - } - $attributes['curl_params'] = sanitizeCurlParams(is_array($attributes['curl_params'] ?? null) ? $attributes['curl_params'] : []); - if (!empty($attributes['curl_params']) && is_array($attributes['curl_params'])) { - foreach ($attributes['curl_params'] as $co => $v) { - if (is_int($co)) { - $curl_options[$co] = $v; - } - } - } - if (!empty($curl_options[CURLOPT_PROXYTYPE]) && ($curl_options[CURLOPT_PROXYTYPE] < 0 || $curl_options[CURLOPT_PROXYTYPE] === 3)) { - // 3 is legacy for NONE - unset($curl_options[CURLOPT_PROXYTYPE]); - if (isset($curl_options[CURLOPT_PROXY])) { - unset($curl_options[CURLOPT_PROXY]); - } - } - $simplePie->set_curl_options($curl_options); - - $simplePie->strip_comments(true); - $simplePie->rename_attributes(['id', 'class']); - $simplePie->allow_aria_attr(true); - $simplePie->allow_data_attr(true); - $simplePie->allowed_html_attributes([ - // HTML - 'dir', 'draggable', 'hidden', 'lang', 'role', 'title', - // MathML - 'displaystyle', 'mathsize', 'scriptlevel', - ]); - $simplePie->allowed_html_elements_with_attributes([ - // HTML - 'a' => ['href', 'hreflang', 'type'], - 'abbr' => [], - 'acronym' => [], - 'address' => [], - // 'area' => [], // TODO: support <area> after rewriting ids with a format like #ugc-<insert original id here> (maybe) - 'article' => [], - 'aside' => [], - 'audio' => ['controlslist', 'loop', 'muted', 'src'], - 'b' => [], - 'bdi' => [], - 'bdo' => [], - 'big' => [], - 'blink' => [], - 'blockquote' => ['cite'], - 'br' => ['clear'], - 'button' => ['disabled'], - 'canvas' => ['width', 'height'], - 'caption' => ['align'], - 'center' => [], - 'cite' => [], - 'code' => [], - 'col' => ['span', 'align', 'valign', 'width'], - 'colgroup' => ['span', 'align', 'valign', 'width'], - 'data' => ['value'], - 'datalist' => [], - 'dd' => [], - 'del' => ['cite', 'datetime'], - 'details' => ['open'], - 'dfn' => [], - 'dialog' => [], - 'dir' => [], - 'div' => ['align'], - 'dl' => [], - 'dt' => [], - 'em' => [], - 'fieldset' => ['disabled'], - 'figcaption' => [], - 'figure' => [], - 'footer' => [], - 'h1' => [], - 'h2' => [], - 'h3' => [], - 'h4' => [], - 'h5' => [], - 'h6' => [], - 'header' => [], - 'hgroup' => [], - 'hr' => ['align', 'noshade', 'size', 'width'], - 'i' => [], - 'iframe' => ['src', 'align', 'frameborder', 'longdesc', 'marginheight', 'marginwidth', 'scrolling'], - 'image' => ['src', 'alt', 'width', 'height', 'align', 'border', 'hspace', 'longdesc', 'vspace'], - 'img' => ['src', 'alt', 'width', 'height', 'align', 'border', 'hspace', 'longdesc', 'vspace'], - 'ins' => ['cite', 'datetime'], - 'kbd' => [], - 'label' => [], - 'legend' => [], - 'li' => ['value', 'type'], - 'main' => [], - // 'map' => [], // TODO: support <map> after rewriting ids with a format like #ugc-<insert original id here> (maybe) - 'mark' => [], - 'marquee' => ['behavior', 'direction', 'height', 'hspace', 'loop', 'scrollamount', 'scrolldelay', 'truespeed', 'vspace', 'width'], - 'menu' => [], - 'meter' => ['value', 'min', 'max', 'low', 'high', 'optimum'], - 'nav' => [], - 'nobr' => [], - // 'noembed' => [], // <embed> is not allowed, so we want to display the contents of <noembed> - 'noframes' => [], - // 'noscript' => [], // From the perspective of the feed content, JS isn't allowed so we want to display the contents of <noscript> - 'ol' => ['reversed', 'start', 'type'], - 'optgroup' => ['disabled', 'label'], - 'option' => ['disabled', 'label', 'selected', 'value'], - 'output' => [], - 'p' => ['align'], - 'picture' => [], - // 'plaintext' => [], // Can't be closed. See: https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/plaintext - 'pre' => ['width', 'wrap'], - 'progress' => ['max', 'value'], - 'q' => ['cite'], - 'rb' => [], - 'rp' => [], - 'rt' => [], - 'rtc' => [], - 'ruby' => [], - 's' => [], - 'samp' => [], - 'search' => [], - 'section' => [], - 'select' => ['disabled', 'multiple', 'size'], - 'small' => [], - 'source' => ['type', 'src', 'media', 'height', 'width'], - 'span' => [], - 'strike' => [], - 'strong' => [], - 'sub' => [], - 'summary' => [], - 'sup' => [], - 'table' => ['align', 'border', 'cellpadding', 'cellspacing', 'rules', 'summary', 'width'], - 'tbody' => ['align', 'char', 'charoff', 'valign'], - 'td' => ['colspan', 'headers', 'rowspan', 'abbr', 'align', 'height', 'scope', 'valign', 'width'], - 'textarea' => ['cols', 'disabled', 'maxlength', 'minlength', 'placeholder', 'readonly', 'rows', 'wrap'], - 'tfoot' => ['align', 'valign'], - 'th' => ['abbr', 'colspan', 'rowspan', 'scope', 'align', 'height', 'valign', 'width'], - 'thead' => ['align', 'valign'], - 'time' => ['datetime'], - 'tr' => ['align', 'valign'], - 'track' => ['default', 'kind', 'srclang', 'label', 'src'], - 'tt' => [], - 'u' => [], - 'ul' => ['type'], - 'var' => [], - 'video' => ['src', 'poster', 'controlslist', 'height', 'loop', 'muted', 'playsinline', 'width'], - 'wbr' => [], - 'xmp' => [], - // MathML - 'maction' => ['actiontype', 'selection'], - 'math' => ['display'], - 'menclose' => ['notation'], - 'merror' => [], - 'mfenced' => ['close', 'open', 'separators'], - 'mfrac' => ['denomalign', 'linethickness', 'numalign'], - 'mi' => ['mathvariant'], - 'mmultiscripts' => ['subscriptshift', 'superscriptshift'], - 'mn' => [], - 'mo' => ['accent', 'fence', 'form', 'largeop', 'lspace', 'maxsize', 'minsize', 'movablelimits', 'rspace', 'separator', 'stretchy', 'symmetric'], - 'mover' => ['accent'], - 'mpadded' => ['depth', 'height', 'lspace', 'voffset', 'width'], - 'mphantom' => [], - 'mprescripts' => [], - 'mroot' => [], - 'mrow' => [], - 'ms' => [], - 'mspace' => ['depth', 'height', 'width'], - 'msqrt' => [], - 'msub' => [], - 'msubsup' => ['subscriptshift', 'superscriptshift'], - 'msup' => ['superscriptshift'], - 'mtable' => ['align', 'columnalign', 'columnlines', 'columnspacing', 'frame', 'framespacing', 'rowalign', 'rowlines', 'rowspacing', 'width'], - 'mtd' => ['columnspan', 'rowspan', 'columnalign', 'rowalign'], - 'mtext' => [], - 'mtr' => ['columnalign', 'rowalign'], - 'munder' => ['accentunder'], - 'munderover' => ['accent', 'accentunder'], - // TODO: Support SVG after sanitizing and URL rewriting of xlink:href - ]); - $simplePie->strip_attributes([ - 'data-auto-leave-validation', 'data-leave-validation', 'data-no-leave-validation', 'data-original', - ]); - $simplePie->add_attributes([ - 'audio' => ['controls' => 'controls', 'preload' => 'none'], - 'iframe' => [ - 'allow' => 'accelerometer; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share', - 'sandbox' => 'allow-scripts allow-same-origin', - ], - 'video' => ['controls' => 'controls', 'preload' => 'none'], - ]); - $simplePie->set_url_replacements([ - 'a' => 'href', - 'area' => 'href', - 'audio' => 'src', - 'blockquote' => 'cite', - 'del' => 'cite', - 'form' => 'action', - 'iframe' => 'src', - 'img' => [ - 'longdesc', - 'src', - ], - 'image' => [ - 'longdesc', - 'src', - ], - 'input' => 'src', - 'ins' => 'cite', - 'q' => 'cite', - 'source' => 'src', - 'track' => 'src', - 'video' => [ - 'poster', - 'src', - ], - ]); - $https_domains = []; - $force = @file(FRESHRSS_PATH . '/force-https.default.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); - if (is_array($force)) { - $https_domains = array_merge($https_domains, $force); - } - $force = @file(DATA_PATH . '/force-https.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); - if (is_array($force)) { - $https_domains = array_merge($https_domains, $force); - } - - // Remove whitespace and comments starting with # / ; - $https_domains = preg_replace('%\\s+|[\/#;].*$%', '', $https_domains) ?? $https_domains; - $https_domains = array_filter($https_domains, fn(string $v) => $v !== ''); - - $simplePie->set_https_domains($https_domains); - return $simplePie; -} - -function sanitizeHTML(string $data, string $base = '', ?int $maxLength = null): string { - if ($data === '' || ($maxLength !== null && $maxLength <= 0)) { - return ''; - } - if ($maxLength !== null) { - $data = mb_strcut($data, 0, $maxLength, 'UTF-8'); - } - /** @var \SimplePie\SimplePie|null $simplePie */ - static $simplePie = null; - if ($simplePie === null) { - $simplePie = customSimplePie(); - $simplePie->enable_cache(false); - $simplePie->init(); - } - $sanitized = $simplePie->sanitize->sanitize($data, \SimplePie\SimplePie::CONSTRUCT_HTML, $base); - if (!is_string($sanitized)) { - return ''; - } - $result = html_only_entity_decode($sanitized); - if ($maxLength !== null && strlen($result) > $maxLength) { - //Sanitizing has made the result too long so try again shorter - $data = mb_strcut($result, 0, (2 * $maxLength) - strlen($result) - 2, 'UTF-8'); - return sanitizeHTML($data, $base, $maxLength); - } - return $result; -} - function cleanCache(int $hours = 720): void { // N.B.: GLOB_BRACE is not available on all platforms $files = glob(CACHE_PATH . '/*.*', GLOB_NOSORT) ?: []; @@ -618,275 +272,6 @@ function cleanCache(int $hours = 720): void { } /** - * Remove the charset meta information of an HTML document, e.g.: - * `<meta charset="..." />` - * `<meta http-equiv="Content-Type" content="text/html; charset=...">` - */ -function stripHtmlMetaCharset(string $html): string { - return preg_replace('/<meta\s[^>]*charset\s*=\s*[^>]+>/i', '', $html, 1) ?? ''; -} - -/** - * Set an XML preamble to enforce the HTML content type charset received by HTTP. - * @param string $html the raw downloaded HTML content - * @param string $contentType an HTTP Content-Type such as 'text/html; charset=utf-8' - * @return string an HTML string with XML encoding information for DOMDocument::loadHTML() - */ -function enforceHttpEncoding(string $html, string $contentType = ''): string { - $httpCharset = preg_match('/\bcharset=([0-9a-z_-]{2,12})$/i', $contentType, $matches) === 1 ? $matches[1] : ''; - if ($httpCharset == '') { - // No charset defined by HTTP - if (preg_match('/<meta\s[^>]*charset\s*=[\s\'"]*UTF-?8\b/i', substr($html, 0, 2048))) { - // Detect UTF-8 even if declared too deep in HTML for DOMDocument - $httpCharset = 'UTF-8'; - } else { - // Do nothing - return $html; - } - } - $httpCharsetNormalized = \SimplePie\Misc::encoding($httpCharset); - if (in_array($httpCharsetNormalized, ['windows-1252', 'US-ASCII'], true)) { - // Default charset for HTTP, do nothing - return $html; - } - if (substr($html, 0, 3) === "\xEF\xBB\xBF" || // UTF-8 BOM - substr($html, 0, 2) === "\xFF\xFE" || // UTF-16 Little Endian BOM - substr($html, 0, 2) === "\xFE\xFF" || // UTF-16 Big Endian BOM - substr($html, 0, 4) === "\xFF\xFE\x00\x00" || // UTF-32 Little Endian BOM - substr($html, 0, 4) === "\x00\x00\xFE\xFF") { // UTF-32 Big Endian BOM - // Existing byte order mark, do nothing - return $html; - } - if (preg_match('/^<[?]xml[^>]+encoding\b/', substr($html, 0, 64))) { - // Existing XML declaration, do nothing - return $html; - } - if ($httpCharsetNormalized !== 'UTF-8') { - // Try to change encoding to UTF-8 using mbstring or iconv or intl - $utf8 = \SimplePie\Misc::change_encoding($html, $httpCharsetNormalized, 'UTF-8'); - if (is_string($utf8)) { - $html = stripHtmlMetaCharset($utf8); - $httpCharsetNormalized = 'UTF-8'; - } - } - if ($httpCharsetNormalized === 'UTF-8') { - // Save encoding information as XML declaration - return '<' . '?xml version="1.0" encoding="' . $httpCharsetNormalized . '" ?' . ">\n" . $html; - } - // Give up - return $html; -} - -/** - * Set an HTML base URL to the HTML content if there is none. - * @param string $html the raw downloaded HTML content - * @param string $href the HTML base URL - * @return string an HTML string - */ -function enforceHtmlBase(string $html, string $href): string { - $doc = new DOMDocument(); - $doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING); - if ($doc->documentElement === null) { - return ''; - } - $xpath = new DOMXPath($doc); - $bases = $xpath->evaluate('//base'); - if (!($bases instanceof DOMNodeList) || $bases->length === 0) { - $base = $doc->createElement('base'); - if ($base === false) { - return $html; - } - $base->setAttribute('href', $href); - $head = null; - $heads = $xpath->evaluate('//head'); - if ($heads instanceof DOMNodeList && $heads->length > 0) { - $head = $heads->item(0); - } - if ($head instanceof DOMElement) { - $head->insertBefore($base, $head->firstChild); - } else { - $doc->documentElement->insertBefore($base, $doc->documentElement->firstChild); - } - } - return $doc->saveHTML() ?: $html; -} - -/** - * @param non-empty-string $url - * @param string $type {html,ico,json,opml,xml} - * @param array<string,mixed> $attributes - * @param array<int,mixed> $curl_options - * @return array{body:string,effective_url:string,redirect_count:int,fail:bool} - */ -function httpGet(string $url, string $cachePath, string $type = 'html', array $attributes = [], array $curl_options = []): array { - $limits = FreshRSS_Context::systemConf()->limits; - $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : intval($attributes['timeout']); - - $cacheMtime = @filemtime($cachePath); - if ($cacheMtime !== false && $cacheMtime > time() - intval($limits['cache_duration'])) { - $body = @file_get_contents($cachePath); - if ($body != false) { - syslog(LOG_DEBUG, 'FreshRSS uses cache for ' . \SimplePie\Misc::url_remove_credentials($url)); - return ['body' => $body, 'effective_url' => $url, 'redirect_count' => 0, 'fail' => false]; - } - } - - if (rand(0, 30) === 1) { // Remove old cache once in a while - cleanCache(CLEANCACHE_HOURS); - } - - $options = []; - $accept = ''; - $proxy = is_string(FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] ?? null) ? FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] : ''; - if (is_array($attributes['curl_params'] ?? null)) { - $options = sanitizeCurlParams($attributes['curl_params']); - $proxy = is_string($options[CURLOPT_PROXY]) ? $options[CURLOPT_PROXY] : ''; - if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) { - // Remove headers problematic for security - $options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER], - fn($header) => is_string($header) && !preg_match('/^(Remote-User|X-WebAuth-User)\\s*:/i', $header)); - // Add Accept header if it is not set - if (preg_grep('/^Accept\\s*:/i', $options[CURLOPT_HTTPHEADER]) === false) { - $options[CURLOPT_HTTPHEADER][] = 'Accept: ' . $accept; - } - } - } - - if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url, $proxy)) > 0) { - Minz_Log::warning('For that domain, will first retry after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url)); - return ['body' => '', 'effective_url' => $url, 'redirect_count' => 0, 'fail' => true]; - } - - if (FreshRSS_Context::systemConf()->simplepie_syslog_enabled) { - syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url)); - } - - switch ($type) { - case 'json': - $accept = 'application/json,application/feed+json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7'; - break; - case 'opml': - $accept = 'text/x-opml,text/xml;q=0.9,application/xml;q=0.9,*/*;q=0.8'; - break; - case 'xml': - $accept = 'application/xml,application/xhtml+xml,text/xml;q=0.9,*/*;q=0.8'; - break; - case 'ico': - $accept = 'image/x-icon,image/vnd.microsoft.icon,image/ico,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.1'; - break; - case 'html': - default: - $accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'; - break; - } - - // TODO: Implement HTTP 1.1 conditional GET If-Modified-Since - $ch = curl_init(); - if ($ch === false) { - return ['body' => '', 'effective_url' => '', 'redirect_count' => 0, 'fail' => true]; - } - curl_setopt_array($ch, [ - CURLOPT_URL => $url, - CURLOPT_HTTPHEADER => ['Accept: ' . $accept], - CURLOPT_USERAGENT => FRESHRSS_USERAGENT, - CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], - CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], - CURLOPT_MAXREDIRS => 4, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_FOLLOWLOCATION => true, - CURLOPT_ENCODING => '', //Enable all encodings - //CURLOPT_VERBOSE => 1, // To debug sent HTTP headers - ]); - - curl_setopt_array($ch, $options); - curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options); - - $responseHeaders = ''; - curl_setopt($ch, CURLOPT_HEADERFUNCTION, function (\CurlHandle $ch, string $header) use (&$responseHeaders) { - if (trim($header) !== '') { // Skip e.g. separation with trailer headers - $responseHeaders .= $header; - } - return strlen($header); - }); - - if (isset($attributes['ssl_verify'])) { - curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, empty($attributes['ssl_verify']) ? 0 : 2); - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (bool)$attributes['ssl_verify']); - if (empty($attributes['ssl_verify'])) { - curl_setopt($ch, CURLOPT_SSL_CIPHER_LIST, 'DEFAULT@SECLEVEL=1'); - } - } - - curl_setopt_array($ch, $curl_options); - - $body = curl_exec($ch); - $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE); - $c_content_type = '' . curl_getinfo($ch, CURLINFO_CONTENT_TYPE); - $c_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); - $c_redirect_count = curl_getinfo($ch, CURLINFO_REDIRECT_COUNT); - $c_error = curl_error($ch); - - $headers = []; - if ($body !== false) { - assert($c_redirect_count >= 0); - $responseHeaders = \SimplePie\HTTP\Parser::prepareHeaders($responseHeaders, $c_redirect_count + 1); - $parser = new \SimplePie\HTTP\Parser($responseHeaders); - if ($parser->parse()) { - $headers = $parser->headers; - } - } - - $fail = $c_status != 200 || $c_error != '' || $body === false; - if ($fail) { - $body = ''; - Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url); - if (in_array($c_status, [429, 503], true)) { - $retryAfter = FreshRSS_http_Util::setRetryAfter($url, $proxy, $headers['retry-after'] ?? ''); - if ($c_status === 429) { - $errorMessage = 'HTTP 429 Too Many Requests! [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; - } elseif ($c_status === 503) { - $errorMessage = 'HTTP 503 Service Unavailable! [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; - } - if ($retryAfter > 0) { - $errorMessage .= ' We may retry after ' . date('c', $retryAfter); - } - } - // TODO: Implement HTTP 410 Gone - } elseif (!is_string($body) || strlen($body) === 0) { - $body = ''; - } else { - if (in_array($type, ['html', 'json', 'opml', 'xml'], true)) { - $body = trim($body, " \n\r\t\v"); // Do not trim \x00 to avoid breaking a BOM - } - if (in_array($type, ['html', 'xml', 'opml'], true)) { - $body = enforceHttpEncoding($body, $c_content_type); - } - if (in_array($type, ['html'], true)) { - $body = enforceHtmlBase($body, $c_effective_url); - } - } - - if (file_put_contents($cachePath, $body) === false) { - Minz_Log::warning("Error saving cache $cachePath for $url"); - } - - return ['body' => $body, 'effective_url' => $c_effective_url, 'redirect_count' => $c_redirect_count, 'fail' => $fail]; -} - -/** - * Validate an email address, supports internationalized addresses. - * - * @param string $email The address to validate - * @return bool true if email is valid, else false - */ -function validateEmailAddress(string $email): bool { - $mailer = new PHPMailer\PHPMailer\PHPMailer(); - $mailer->CharSet = 'utf-8'; - $punyemail = $mailer->punyencodeAddress($email); - return PHPMailer\PHPMailer\PHPMailer::validateAddress($punyemail, 'html5'); -} - -/** * Add support of image lazy loading * Move content from src/poster attribute to data-original * @param string $content is the text we want to parse @@ -923,250 +308,17 @@ function invalidateHttpCache(string $username = ''): bool { } /** - * @return list<string> - */ -function listUsers(): array { - $final_list = []; - $base_path = join_path(DATA_PATH, 'users'); - $dir_list = array_values(array_diff( - scandir($base_path) ?: [], - ['..', '.', Minz_User::INTERNAL_USER] - )); - foreach ($dir_list as $file) { - if ($file[0] !== '.' && is_dir(join_path($base_path, $file)) && file_exists(join_path($base_path, $file, 'config.php'))) { - $final_list[] = $file; - } - } - return $final_list; -} - - -/** - * Return if the maximum number of registrations has been reached. - * Note a max_registrations of 0 means there is no limit. - * - * @return bool true if number of users >= max registrations, false else. - */ -function max_registrations_reached(): bool { - $limit_registrations = FreshRSS_Context::systemConf()->limits['max_registrations']; - $number_accounts = count(listUsers()); - - return $limit_registrations > 0 && $number_accounts >= $limit_registrations; -} - - -/** - * Register and return the configuration for a given user. - * - * Note this function has been created to generate temporary configuration - * objects. If you need a long-time configuration, please don't use this function. - * - * @param string $username the name of the user of which we want the configuration. - * @return FreshRSS_UserConfiguration|null object, or null if the configuration cannot be loaded. - * @throws Minz_ConfigurationNamespaceException - */ -function get_user_configuration(string $username): ?FreshRSS_UserConfiguration { - if (!FreshRSS_user_Controller::checkUsername($username)) { - return null; - } - $namespace = 'user_' . $username; - try { - FreshRSS_UserConfiguration::register($namespace, - USERS_PATH . '/' . $username . '/config.php', - FRESHRSS_PATH . '/config-user.default.php'); - } catch (Minz_FileNotExistException $e) { - Minz_Log::warning($e->getMessage(), ADMIN_LOG); - return null; - } - - $user_conf = FreshRSS_UserConfiguration::get($namespace); - return $user_conf; -} - -/** - * Converts an IP (v4 or v6) to a binary representation using inet_pton - * - * @param string $ip the IP to convert - * @return string a binary representation of the specified IP - */ -function ipToBits(string $ip): string { - $binaryip = ''; - foreach (str_split(inet_pton($ip) ?: '') as $char) { - $binaryip .= str_pad(decbin(ord($char)), 8, '0', STR_PAD_LEFT); - } - return $binaryip; -} - -/** - * Check if an ip belongs to the provided range (in CIDR format) - * - * @param string $ip the IP that we want to verify (ex: 192.168.16.1) - * @param string $range the range to check against (ex: 192.168.16.0/24) - * @return bool true if the IP is in the range, otherwise false - */ -function checkCIDR(string $ip, string $range): bool { - $binary_ip = ipToBits($ip); - $split = explode('/', $range); - - $subnet = $split[0] ?? ''; - if ($subnet == '') { - return false; - } - $binary_subnet = ipToBits($subnet); - - $mask_bits = $split[1] ?? ''; - $mask_bits = (int)$mask_bits; - if ($mask_bits === 0) { - $mask_bits = null; - } - - $ip_net_bits = substr($binary_ip, 0, $mask_bits); - $subnet_bits = substr($binary_subnet, 0, $mask_bits); - return $ip_net_bits === $subnet_bits; -} - -/** - * Use CONN_REMOTE_ADDR (if available, to be robust even when using Apache mod_remoteip) or REMOTE_ADDR environment variable to determine the connection IP. + * @deprecated Use {@see Minz_Request::connectionRemoteAddress()} instead. */ function connectionRemoteAddress(): string { - $remoteIp = is_string($_SERVER['CONN_REMOTE_ADDR'] ?? null) ? $_SERVER['CONN_REMOTE_ADDR'] : ''; - if ($remoteIp == '') { - $remoteIp = is_string($_SERVER['REMOTE_ADDR'] ?? null) ? $_SERVER['REMOTE_ADDR'] : ''; - } - if ($remoteIp == 0) { - $remoteIp = ''; - } - return $remoteIp; + return Minz_Request::connectionRemoteAddress(); } /** - * Check if the client (e.g. last proxy) is allowed to send unsafe headers. - * This uses the `TRUSTED_PROXY` environment variable or the `trusted_sources` configuration option to get an array of the authorized ranges, - * The connection IP is obtained from the `CONN_REMOTE_ADDR` (if available, to be robust even when using Apache mod_remoteip) or `REMOTE_ADDR` environment variables. - * @return bool true if the sender’s IP is in one of the ranges defined in the configuration, else false + * @deprecated Use {@see FreshRSS_http_Util::checkTrustedIP()} instead. */ function checkTrustedIP(): bool { - if (!FreshRSS_Context::hasSystemConf()) { - return false; - } - $remoteIp = connectionRemoteAddress(); - if ($remoteIp === '') { - return false; - } - $trusted = getenv('TRUSTED_PROXY'); - if ($trusted != 0 && is_string($trusted)) { - $trusted = preg_split('/\s+/', $trusted, -1, PREG_SPLIT_NO_EMPTY); - } - if (!is_array($trusted) || empty($trusted)) { - $trusted = FreshRSS_Context::systemConf()->trusted_sources; - } - foreach ($trusted as $cidr) { - if (checkCIDR($remoteIp, $cidr)) { - return true; - } - } - return false; -} - -function httpAuthUser(bool $onlyTrusted = true): string { - $auths = array_unique(array_intersect_key($_SERVER, ['REMOTE_USER' => '', 'REDIRECT_REMOTE_USER' => '', 'HTTP_REMOTE_USER' => '', 'HTTP_X_WEBAUTH_USER' => ''])); - if (count($auths) > 1) { - Minz_Log::warning('Multiple HTTP authentication headers!'); - return ''; - } - - if (!empty($_SERVER['REMOTE_USER']) && is_string($_SERVER['REMOTE_USER'])) { - return $_SERVER['REMOTE_USER']; - } - if (!empty($_SERVER['REDIRECT_REMOTE_USER']) && is_string($_SERVER['REDIRECT_REMOTE_USER'])) { - return $_SERVER['REDIRECT_REMOTE_USER']; - } - if (!$onlyTrusted || checkTrustedIP()) { - if (!empty($_SERVER['HTTP_REMOTE_USER']) && is_string($_SERVER['HTTP_REMOTE_USER'])) { - return $_SERVER['HTTP_REMOTE_USER']; - } - if (!empty($_SERVER['HTTP_X_WEBAUTH_USER']) && is_string($_SERVER['HTTP_X_WEBAUTH_USER'])) { - return $_SERVER['HTTP_X_WEBAUTH_USER']; - } - } - return ''; -} - -function cryptAvailable(): bool { - $hash = '$2y$04$usesomesillystringfore7hnbRJHxXVLeakoG8K30oukPsA.ztMG'; - return $hash === @crypt('password', $hash); -} - - -/** - * Check PHP and its extensions are well-installed. - * - * @return array<string,bool> of tested values. - */ -function check_install_php(): array { - $pdo_mysql = extension_loaded('pdo_mysql'); - $pdo_pgsql = extension_loaded('pdo_pgsql'); - $pdo_sqlite = extension_loaded('pdo_sqlite'); - return [ - 'php' => version_compare(PHP_VERSION, FRESHRSS_MIN_PHP_VERSION) >= 0, - 'curl' => extension_loaded('curl'), - 'pdo' => $pdo_mysql || $pdo_sqlite || $pdo_pgsql, - 'pcre' => extension_loaded('pcre'), - 'ctype' => extension_loaded('ctype'), - 'fileinfo' => extension_loaded('fileinfo'), - 'dom' => class_exists('DOMDocument'), - 'json' => extension_loaded('json'), - 'mbstring' => extension_loaded('mbstring'), - 'zip' => extension_loaded('zip'), - ]; -} - -/** - * Check different data files and directories exist. - * @return array<string,bool> of tested values. - */ -function check_install_files(): array { - return [ - 'data' => is_dir(DATA_PATH) && touch(DATA_PATH . '/index.html'), // is_writable() is not reliable for a folder on NFS - 'cache' => is_dir(CACHE_PATH) && touch(CACHE_PATH . '/index.html'), - 'users' => is_dir(USERS_PATH) && touch(USERS_PATH . '/index.html'), - 'favicons' => is_dir(DATA_PATH) && touch(DATA_PATH . '/favicons/index.html'), - 'tokens' => is_dir(DATA_PATH) && touch(DATA_PATH . '/tokens/index.html'), - ]; -} - -/** - * Check database is well-installed. - * - * @return array<string,bool> of tested values. - */ -function check_install_database(): array { - $status = [ - 'connection' => true, - 'tables' => false, - 'categories' => false, - 'feeds' => false, - 'entries' => false, - 'entrytmp' => false, - 'tag' => false, - 'entrytag' => false, - ]; - - try { - $dbDAO = FreshRSS_Factory::createDatabaseDAO(); - - $status['tables'] = $dbDAO->tablesAreCorrect(); - $status['categories'] = $dbDAO->categoryIsCorrect(); - $status['feeds'] = $dbDAO->feedIsCorrect(); - $status['entries'] = $dbDAO->entryIsCorrect(); - $status['entrytmp'] = $dbDAO->entrytmpIsCorrect(); - $status['tag'] = $dbDAO->tagIsCorrect(); - $status['entrytag'] = $dbDAO->entrytagIsCorrect(); - } catch (Minz_PDOConnectionException $e) { - $status['connection'] = false; - } - - return $status; + return FreshRSS_http_Util::checkTrustedIP(); } /** @@ -1200,53 +352,10 @@ function recursive_unlink(string $dir): bool { return rmdir($dir); } -/** - * Remove queries where $get is appearing. - * @param string $get the get attribute which should be removed. - * @param array<int,array{get?:string,name?:string,order?:string,search?:string,state?:int,url?:string,token?:string, - * shareRss?:bool,shareOpml?:bool,description?:string,imageUrl?:string}> $queries an array of queries. - * @return array<int,array{get?:string,name?:string,order?:string,search?:string,state?:int,url?:string,token?:string, - * shareRss?:bool,shareOpml?:bool,description?:string,imageUrl?:string}> without queries where $get is appearing. - */ -function remove_query_by_get(string $get, array $queries): array { - $final_queries = []; - foreach ($queries as $query) { - if (empty($query['get']) || $query['get'] !== $get) { - $final_queries[] = $query; - } - } - return $final_queries; -} - function _i(string $icon, int $type = FreshRSS_Themes::ICON_DEFAULT): string { return FreshRSS_Themes::icon($icon, $type); } - -const SHORTCUT_KEYS = [ - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', - 'ArrowDown', 'ArrowLeft', 'ArrowRight', 'ArrowUp', 'Backspace', 'Delete', - 'End', 'Enter', 'Escape', 'Home', 'Insert', 'PageDown', 'PageUp', 'Space', 'Tab', - ]; - -/** - * @param array<string> $shortcuts - * @return list<string> - */ -function getNonStandardShortcuts(array $shortcuts): array { - $standard = strtolower(implode(' ', SHORTCUT_KEYS)); - - $nonStandard = array_filter($shortcuts, static function (string $shortcut) use ($standard) { - $shortcut = trim($shortcut); - return $shortcut !== '' && stripos($standard, $shortcut) === false; - }); - - return array_values($nonStandard); -} - function errorMessageInfo(string $errorTitle, string $error = ''): string { $errorTitle = htmlspecialchars($errorTitle, ENT_NOQUOTES, 'UTF-8'); |
