aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Dan Hersam <142353+jaden@users.noreply.github.com> 2023-11-10 03:14:34 -0500
committerGravatar GitHub <noreply@github.com> 2023-11-10 09:14:34 +0100
commit57f46922e856e80c2974dd8e35e60318577fccdc (patch)
treef89046b7393bbbb899faea11b54fc49705fd6ab0
parente6b0f8c3e39bc2b7565a88e0ba3695abeefc0b4a (diff)
Rework the behavior of favicon search (#5839)
* Rework the behavior of favicon search Use xpath queries to reduce the filtering necessary. Don't call checkUrl since the href value will not always be a full URL. Check for a base path in the HTML to be used if the favicon href doesn't start with a leading slash. * Break long xpath query into two lines * Fix issues found by CI
-rw-r--r--lib/favicons.php63
1 files changed, 35 insertions, 28 deletions
diff --git a/lib/favicons.php b/lib/favicons.php
index 8da38c519..a74f3331f 100644
--- a/lib/favicons.php
+++ b/lib/favicons.php
@@ -68,34 +68,41 @@ function downloadHttp(string &$url, array $curlOptions = []): string {
function searchFavicon(string &$url): string {
$dom = new DOMDocument();
$html = downloadHttp($url);
- if ($html != '' && @$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
- $rels = array('shortcut icon', 'icon');
- $links = $dom->getElementsByTagName('link');
- foreach ($rels as $rel) {
- foreach ($links as $link) {
- if ($link->hasAttribute('rel') && $link->hasAttribute('href') &&
- strtolower(trim($link->getAttribute('rel'))) === $rel) {
- $href = trim($link->getAttribute('href'));
- if (substr($href, 0, 2) === '//') {
- // Case of protocol-relative URLs
- if (preg_match('%^(https?:)//%i', $url, $matches) === 1) {
- $href = $matches[1] . $href;
- } else {
- $href = 'https:' . $href;
- }
- }
- $checkUrl = checkUrl($href, false);
- if (is_string($checkUrl)) {
- $href = SimplePie_IRI::absolutize($url, $href);
- }
- $favicon = downloadHttp($href, array(
- CURLOPT_REFERER => $url,
- ));
- if (isImgMime($favicon)) {
- return $favicon;
- }
- }
- }
+
+ if ($html == '' || !@$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
+ return '';
+ }
+
+ $xpath = new DOMXPath($dom);
+ $links = $xpath->query('//link[@href][translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="shortcut icon"'
+ . ' or translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="icon"]');
+
+ if (!$links) {
+ return '';
+ }
+
+ // Use the base element for relative paths, if there is one
+ $baseElements = $xpath->query('//base[@href]');
+ $baseElement = ($baseElements !== false && $baseElements->length > 0) ? $baseElements->item(0) : null;
+ $baseUrl = ($baseElement instanceof DOMElement) ? $baseElement->getAttribute('href') : $url;
+
+ foreach ($links as $link) {
+ if (!$link instanceof DOMElement) {
+ continue;
+ }
+ $href = trim($link->getAttribute('href'));
+ $urlParts = parse_url($url);
+
+ // Handle protocol-relative URLs by adding the current URL's scheme
+ if (substr($href, 0, 2) === '//') {
+ $href = ($urlParts['scheme'] ?? 'https') . '://' . $href;
+ }
+
+ $href = SimplePie_IRI::absolutize($baseUrl, $href);
+
+ $favicon = downloadHttp($href, array(CURLOPT_REFERER => $url));
+ if (isImgMime($favicon)) {
+ return $favicon;
}
}
return '';