diff options
| -rwxr-xr-x | app/Controllers/feedController.php | 10 | ||||
| -rw-r--r-- | app/Controllers/subscriptionController.php | 1 | ||||
| -rw-r--r-- | app/Models/Feed.php | 10 | ||||
| -rw-r--r-- | data/cache/.gitignore | 1 | ||||
| -rw-r--r-- | lib/favicons.php | 89 | ||||
| -rw-r--r-- | lib/lib_rss.php | 23 | ||||
| -rw-r--r-- | p/f.php | 6 |
7 files changed, 47 insertions, 93 deletions
diff --git a/app/Controllers/feedController.php b/app/Controllers/feedController.php index a080d5e67..4cd5e7369 100755 --- a/app/Controllers/feedController.php +++ b/app/Controllers/feedController.php @@ -722,11 +722,13 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { $feed->_name($name); $feedProperties['name'] = $feed->name(false); } - if (trim($feed->website()) === '') { + if ($feed->website() === '' || $feed->website() === $feed->url()) { $website = html_only_entity_decode($simplePie->get_link()); - $feed->_website($website == '' ? $feed->url() : $website); - $feedProperties['website'] = $feed->website(); - $feed->faviconPrepare(); + if ($website !== $feed->website()) { + $feed->_website($website); + $feedProperties['website'] = $feed->website(); + $feed->faviconPrepare(); + } } if (trim($feed->description()) === '') { $description = html_only_entity_decode($simplePie->get_description()); diff --git a/app/Controllers/subscriptionController.php b/app/Controllers/subscriptionController.php index 58cda4b9b..70065fc26 100644 --- a/app/Controllers/subscriptionController.php +++ b/app/Controllers/subscriptionController.php @@ -369,7 +369,6 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController { } if ($favicon_uploaded && !$resetFavicon) { - require_once(LIB_PATH . '/favicons.php'); $max_size = FreshRSS_Context::systemConf()->limits['max_favicon_upload_size']; if ($favicon_size > $max_size) { Minz_Request::bad(_t('feedback.sub.feed.favicon.too_large', format_bytes($max_size)), $url_redirect); diff --git a/app/Models/Feed.php b/app/Models/Feed.php index 81765d433..2b442561e 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -176,6 +176,8 @@ class FreshRSS_Feed extends Minz_Model { } $attributesOnly = $contents === null && $tmpPath === ''; + + require_once(LIB_PATH . '/favicons.php'); if (!$attributesOnly && !isImgMime(is_string($contents) ? $contents : '')) { throw new FreshRSS_UnsupportedImageFormat_Exception(); } @@ -195,7 +197,6 @@ class FreshRSS_Feed extends Minz_Model { $this->_attribute('customFaviconExt', $extName); $this->_attribute('customFaviconDisallowDel', $disallowDelete); - require_once(LIB_PATH . '/favicons.php'); $newPath = FAVICONS_DIR . $this->hashFavicon(skipCache: true) . '.ico'; if ($attributesOnly && !file_exists($newPath)) { $updateFeed = false; @@ -404,7 +405,12 @@ class FreshRSS_Feed extends Minz_Model { if ($this->customFavicon()) { return; } - $url = $this->website(fallback: true); + $url = $this->website(fallback: false); + if ($url === '' || $url === $this->url) { + // Get root URL from the feed URL + $url = preg_replace('%^(https?://[^/]+).*$%i', '$1/', $this->url) ?? $this->url; + } + $txt = FAVICONS_DIR . $this->hashFavicon() . '.txt'; if (@file_get_contents($txt) !== $url) { file_put_contents($txt, $url); diff --git a/data/cache/.gitignore b/data/cache/.gitignore index db4a3469c..b5f768fa4 100644 --- a/data/cache/.gitignore +++ b/data/cache/.gitignore @@ -1,4 +1,5 @@ *.html +*.ico *.json *.spc *.xml diff --git a/lib/favicons.php b/lib/favicons.php index e0baf542b..c28cdfc7f 100644 --- a/lib/favicons.php +++ b/lib/favicons.php @@ -22,81 +22,14 @@ function isImgMime(string $content): bool { return $isImage; } -/** @param array<int,int|bool|string> $curlOptions */ -function downloadHttp(string &$url, array $curlOptions = []): string { - if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url)) > 0) { - Minz_Log::warning('For that domain, will first retry favicon after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url)); - return ''; - } - - syslog(LOG_INFO, 'FreshRSS Favicon GET ' . $url); - $url2 = checkUrl($url); - if ($url2 == false) { - return ''; - } - $url = $url2; - - $ch = curl_init($url); - if ($ch === false) { - return ''; - } - curl_setopt_array($ch, [ - CURLOPT_HEADER => true, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_TIMEOUT => 15, - CURLOPT_USERAGENT => FRESHRSS_USERAGENT, - CURLOPT_MAXREDIRS => 10, - CURLOPT_FOLLOWLOCATION => true, - CURLOPT_ENCODING => '', //Enable all encodings - //CURLOPT_VERBOSE => 1, // To debug sent HTTP headers - ]); - - FreshRSS_Context::initSystem(); - if (FreshRSS_Context::hasSystemConf()) { - curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options); - } - - curl_setopt_array($ch, $curlOptions); - - $response = curl_exec($ch); - $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE); - $c_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); - curl_close($ch); - - $parser = new \SimplePie\HTTP\Parser(is_string($response) ? $response : ''); - if ($parser->parse()) { - $headers = $parser->headers; - $body = $parser->body; - } else { - $headers = []; - $body = false; - } - - if (in_array($c_status, [429, 503], true)) { - $retryAfter = FreshRSS_http_Util::setRetryAfter($url, $headers['retry-after'] ?? ''); - if ($c_status === 429) { - $errorMessage = 'HTTP 429 Too Many Requests! Searching favicon [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; - } elseif ($c_status === 503) { - $errorMessage = 'HTTP 503 Service Unavailable! Searching favicon [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; - } - if ($retryAfter > 0) { - $errorMessage .= ' We may retry after ' . date('c', $retryAfter); - } - } - - $url2 = checkUrl($c_effective_url); - if ($url2 != false) { - $url = $url2; //Possible redirect - } - - return $c_status === 200 && is_string($body) ? $body : ''; +function faviconCachePath(string $url): string { + return CACHE_PATH . '/' . sha1($url) . '.ico'; } -function searchFavicon(string &$url): string { +function searchFavicon(string $url): string { $dom = new DOMDocument(); - $html = downloadHttp($url); - - if ($html == '' || !@$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) { + ['body' => $html, 'effective_url' => $effective_url, 'fail' => $fail] = httpGet($url, cachePath: CACHE_PATH . '/' . sha1($url) . '.html', type: 'html'); + if ($fail || $html === '' || !@$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) { return ''; } @@ -110,14 +43,14 @@ function searchFavicon(string &$url): string { // Use the base element for relative paths, if there is one $baseElements = $xpath->query('//base[@href]'); $baseElement = ($baseElements !== false && $baseElements->length > 0) ? $baseElements->item(0) : null; - $baseUrl = ($baseElement instanceof DOMElement) ? $baseElement->getAttribute('href') : $url; + $baseUrl = ($baseElement instanceof DOMElement) ? $baseElement->getAttribute('href') : $effective_url; foreach ($links as $link) { if (!$link instanceof DOMElement) { continue; } $href = trim($link->getAttribute('href')); - $urlParts = parse_url($url); + $urlParts = parse_url($effective_url); // Handle protocol-relative URLs by adding the current URL's scheme if (substr($href, 0, 2) === '//') { @@ -133,7 +66,9 @@ function searchFavicon(string &$url): string { if ($iri == false) { return ''; } - $favicon = downloadHttp($iri, [CURLOPT_REFERER => $url]); + $favicon = httpGet($iri, faviconCachePath($iri), 'ico', curl_options: [ + CURLOPT_REFERER => $effective_url, + ])['body']; if (isImgMime($favicon)) { return $favicon; } @@ -152,7 +87,9 @@ function download_favicon(string $url, string $dest): bool { } if ($favicon == '') { $link = $rootUrl . 'favicon.ico'; - $favicon = downloadHttp($link, [CURLOPT_REFERER => $url]); + $favicon = httpGet($link, faviconCachePath($link), 'ico', curl_options: [ + CURLOPT_REFERER => $url, + ])['body']; if (!isImgMime($favicon)) { $favicon = ''; } diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 25405017a..8954f9921 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -432,13 +432,9 @@ function sanitizeHTML(string $data, string $base = '', ?int $maxLength = null): function cleanCache(int $hours = 720): void { // N.B.: GLOB_BRACE is not available on all platforms - $files = array_merge( - glob(CACHE_PATH . '/*.html', GLOB_NOSORT) ?: [], - glob(CACHE_PATH . '/*.json', GLOB_NOSORT) ?: [], - glob(CACHE_PATH . '/*.spc', GLOB_NOSORT) ?: [], - glob(CACHE_PATH . '/*.xml', GLOB_NOSORT) ?: []); + $files = glob(CACHE_PATH . '/*.*', GLOB_NOSORT) ?: []; foreach ($files as $file) { - if (substr($file, -10) === 'index.html') { + if (str_ends_with($file, 'index.html')) { continue; } $cacheMtime = @filemtime($file); @@ -543,7 +539,7 @@ function enforceHtmlBase(string $html, string $href): string { } /** - * @param string $type {html,json,opml,xml} + * @param string $type {html,ico,json,opml,xml} * @param array<string,mixed> $attributes * @param array<int,mixed> $curl_options * @return array{body:string,effective_url:string,redirect_count:int,fail:bool} @@ -574,7 +570,7 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url)); } - $accept = '*/*;q=0.8'; + $accept = ''; switch ($type) { case 'json': $accept = 'application/json,application/feed+json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7'; @@ -585,6 +581,9 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a case 'xml': $accept = 'application/xml,application/xhtml+xml,text/xml;q=0.9,*/*;q=0.8'; break; + case 'ico': + $accept = 'image/x-icon,image/vnd.microsoft.icon,image/ico,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.1'; + break; case 'html': default: $accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'; @@ -673,9 +672,13 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a } elseif (!is_string($body) || strlen($body) === 0) { $body = ''; } else { - $body = trim($body, " \n\r\t\v"); // Do not trim \x00 to avoid breaking a BOM - if ($type !== 'json') { + if (in_array($type, ['html', 'json', 'opml', 'xml'], true)) { + $body = trim($body, " \n\r\t\v"); // Do not trim \x00 to avoid breaking a BOM + } + if (in_array($type, ['html', 'xml', 'opml'], true)) { $body = enforceHttpEncoding($body, $c_content_type); + } + if (in_array($type, ['html'], true)) { $body = enforceHtmlBase($body, $c_effective_url); } } @@ -39,6 +39,12 @@ if (($ico_mtime == false || $ico_mtime < $txt_mtime || ($ico_mtime < time() - (m show_default_favicon(1800); exit(); } + + FreshRSS_Context::initSystem(); + if (!FreshRSS_Context::hasSystemConf()) { + header('HTTP/1.1 500 Internal Server Error'); + die('Invalid system init!'); + } if (!download_favicon($url, $ico)) { // Download failed if ($ico_mtime == false) { |
