aboutsummaryrefslogtreecommitdiff
path: root/lib/favicons.php
blob: fd03f61c8be43aef77025481ea2fb99ca601bc94 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
<?php
const FAVICONS_DIR = DATA_PATH . '/favicons/';
const DEFAULT_FAVICON = PUBLIC_PATH . '/themes/icons/default_favicon.ico';

function isImgMime($content) {
	//Based on https://github.com/ArthurHoaro/favicon/blob/3a4f93da9bb24915b21771eb7873a21bde26f5d1/src/Favicon/Favicon.php#L311-L319
	if ($content == '') {
		return false;
	}
	if (!extension_loaded('fileinfo')) {
		return true;
	}
	$isImage = true;
	try {
		$fInfo = finfo_open(FILEINFO_MIME_TYPE);
		$isImage = strpos(finfo_buffer($fInfo, $content), 'image') !== false;
		finfo_close($fInfo);
	} catch (Exception $e) {
		echo 'Caught exception: ',  $e->getMessage(), "\n";
	}
	return $isImage;
}

function downloadHttp(&$url, $curlOptions = array()) {
	syslog(LOG_INFO, 'FreshRSS Favicon GET ' . $url);
	$url = checkUrl($url);
	if (!$url) {
		return '';
	}
	$ch = curl_init($url);
	curl_setopt_array($ch, [
			CURLOPT_RETURNTRANSFER => true,
			CURLOPT_TIMEOUT => 15,
			CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
			CURLOPT_MAXREDIRS => 10,
			CURLOPT_FOLLOWLOCATION => true,
			CURLOPT_ENCODING => '',	//Enable all encodings
		]);
	curl_setopt_array($ch, $curlOptions);
	$response = curl_exec($ch);
	$info = curl_getinfo($ch);
	curl_close($ch);
	if (!empty($info['url'])) {
		$url2 = checkUrl($info['url']);
		if ($url2 != '') {
			$url = $url2;	//Possible redirect
		}
	}
	return $info['http_code'] == 200 ? $response : '';
}

function searchFavicon(&$url) {
	$dom = new DOMDocument();
	$html = downloadHttp($url);
	if ($html != '' && @$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
		$rels = array('shortcut icon', 'icon');
		$links = $dom->getElementsByTagName('link');
		foreach ($rels as $rel) {
			foreach ($links as $link) {
				if ($link->hasAttribute('rel') && $link->hasAttribute('href') &&
					strtolower(trim($link->getAttribute('rel'))) === $rel) {
					$href = trim($link->getAttribute('href'));
					if (substr($href, 0, 2) === '//') {
						// Case of protocol-relative URLs
						if (preg_match('%^(https?:)//%i', $url, $matches)) {
							$href = $matches[1] . $href;
						} else {
							$href = 'https:' . $href;
						}
					}
					if (!checkUrl($href, false)) {
						$href = SimplePie_IRI::absolutize($url, $href);
					}
					$favicon = downloadHttp($href, array(
							CURLOPT_REFERER => $url,
						));
					if (isImgMime($favicon)) {
						return $favicon;
					}
				}
			}
		}
	}
	return '';
}

function download_favicon($url, $dest) {
	$url = trim($url);
	$favicon = searchFavicon($url);
	if ($favicon == '') {
		$rootUrl = preg_replace('%^(https?://[^/]+).*$%i', '$1/', $url);
		if ($rootUrl != $url) {
			$url = $rootUrl;
			$favicon = searchFavicon($url);
		}
		if ($favicon == '') {
			$link = $rootUrl . 'favicon.ico';
			$favicon = downloadHttp($link, array(
					CURLOPT_REFERER => $url,
				));
			if (!isImgMime($favicon)) {
				$favicon = '';
			}
		}
	}
	return ($favicon != '' && file_put_contents($dest, $favicon)) ||
		@copy(DEFAULT_FAVICON, $dest);
}