diff options
| author | 2023-11-10 03:14:34 -0500 | |
|---|---|---|
| committer | 2023-11-10 09:14:34 +0100 | |
| commit | 57f46922e856e80c2974dd8e35e60318577fccdc (patch) | |
| tree | f89046b7393bbbb899faea11b54fc49705fd6ab0 | |
| parent | e6b0f8c3e39bc2b7565a88e0ba3695abeefc0b4a (diff) | |
Rework the behavior of favicon search (#5839)
* Rework the behavior of favicon search
Use xpath queries to reduce the filtering necessary.
Don't call checkUrl since the href value will not always be a full URL.
Check for a base path in the HTML to be used if the favicon href doesn't
start with a leading slash.
* Break long xpath query into two lines
* Fix issues found by CI
| -rw-r--r-- | lib/favicons.php | 63 |
1 files changed, 35 insertions, 28 deletions
diff --git a/lib/favicons.php b/lib/favicons.php index 8da38c519..a74f3331f 100644 --- a/lib/favicons.php +++ b/lib/favicons.php @@ -68,34 +68,41 @@ function downloadHttp(string &$url, array $curlOptions = []): string { function searchFavicon(string &$url): string { $dom = new DOMDocument(); $html = downloadHttp($url); - if ($html != '' && @$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) { - $rels = array('shortcut icon', 'icon'); - $links = $dom->getElementsByTagName('link'); - foreach ($rels as $rel) { - foreach ($links as $link) { - if ($link->hasAttribute('rel') && $link->hasAttribute('href') && - strtolower(trim($link->getAttribute('rel'))) === $rel) { - $href = trim($link->getAttribute('href')); - if (substr($href, 0, 2) === '//') { - // Case of protocol-relative URLs - if (preg_match('%^(https?:)//%i', $url, $matches) === 1) { - $href = $matches[1] . $href; - } else { - $href = 'https:' . $href; - } - } - $checkUrl = checkUrl($href, false); - if (is_string($checkUrl)) { - $href = SimplePie_IRI::absolutize($url, $href); - } - $favicon = downloadHttp($href, array( - CURLOPT_REFERER => $url, - )); - if (isImgMime($favicon)) { - return $favicon; - } - } - } + + if ($html == '' || !@$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) { + return ''; + } + + $xpath = new DOMXPath($dom); + $links = $xpath->query('//link[@href][translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="shortcut icon"' + . ' or translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="icon"]'); + + if (!$links) { + return ''; + } + + // Use the base element for relative paths, if there is one + $baseElements = $xpath->query('//base[@href]'); + $baseElement = ($baseElements !== false && $baseElements->length > 0) ? $baseElements->item(0) : null; + $baseUrl = ($baseElement instanceof DOMElement) ? $baseElement->getAttribute('href') : $url; + + foreach ($links as $link) { + if (!$link instanceof DOMElement) { + continue; + } + $href = trim($link->getAttribute('href')); + $urlParts = parse_url($url); + + // Handle protocol-relative URLs by adding the current URL's scheme + if (substr($href, 0, 2) === '//') { + $href = ($urlParts['scheme'] ?? 'https') . '://' . $href; + } + + $href = SimplePie_IRI::absolutize($baseUrl, $href); + + $favicon = downloadHttp($href, array(CURLOPT_REFERER => $url)); + if (isImgMime($favicon)) { + return $favicon; } } return ''; |
