aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2017-04-23 14:06:37 +0200
committerGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2017-04-23 14:06:37 +0200
commit44c9ae51c44478e56ee70ce692ade6a275981320 (patch)
treeed47cd59439b9f826108886cdb0dfe30f113fbb6
parent6d5fb27f978c25be09a939d0f426a5c8962e79ec (diff)
Rewriten Favicon library using cURL
Reduce the number of requests, more robust, many more cases working, reduced code
-rw-r--r--CHANGELOG.md2
-rw-r--r--lib/Favicon/DataAccess.php43
-rw-r--r--lib/Favicon/Favicon.php396
-rw-r--r--lib/Favicon/FaviconDLType.php23
-rw-r--r--lib/favicons.php110
-rw-r--r--p/f.php3
6 files changed, 98 insertions, 479 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e680736db..08025bb3b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,7 +23,7 @@
* Improve English [#1465](https://github.com/FreshRSS/FreshRSS/pull/1465)
* Misc.
* Fall back to article URL when the article GUID is empty [#1482](https://github.com/FreshRSS/FreshRSS/issues/1482)
- * Update to version 1.2 of Favicon library [#1501](https://github.com/FreshRSS/FreshRSS/issues/1501)
+ * Rewriten Favicon library using cURL [#1503](https://github.com/FreshRSS/FreshRSS/pull/1503)
## 2017-03-11 FreshRSS 1.6.3
diff --git a/lib/Favicon/DataAccess.php b/lib/Favicon/DataAccess.php
deleted file mode 100644
index 1445e9343..000000000
--- a/lib/Favicon/DataAccess.php
+++ /dev/null
@@ -1,43 +0,0 @@
-<?php
-
-namespace Favicon;
-
-/**
- * DataAccess is a wrapper used to read/write data locally or remotly
- * Aside from SOLID principles, this wrapper is also useful to mock remote resources in unit tests
- * Note: remote access warning are silenced because we don't care if a website is unreachable
- **/
-class DataAccess {
- public function retrieveUrl($url) {
- $this->set_context();
- return @file_get_contents($url);
- }
-
- public function retrieveHeader($url) {
- $this->set_context();
- $headers = @get_headers($url, 1);
- return is_array($headers) ? array_change_key_case($headers) : array();
- }
-
- public function saveCache($file, $data) {
- file_put_contents($file, $data);
- }
-
- public function readCache($file) {
- return file_get_contents($file);
- }
-
- private function set_context() {
- stream_context_set_default(
- array(
- 'http' => array(
- 'method' => 'GET',
- 'follow_location' => 0,
- 'max_redirects' => 1,
- 'timeout' => 10,
- 'header' => "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:20.0; Favicon; +https://github.com/ArthurHoaro/favicon) Gecko/20100101 Firefox/32.0\r\n",
- )
- )
- );
- }
-} \ No newline at end of file
diff --git a/lib/Favicon/Favicon.php b/lib/Favicon/Favicon.php
deleted file mode 100644
index c026d8a95..000000000
--- a/lib/Favicon/Favicon.php
+++ /dev/null
@@ -1,396 +0,0 @@
-<?php
-
-namespace Favicon;
-
-class Favicon
-{
- protected static $TYPE_CACHE_URL = 'url';
- protected static $TYPE_CACHE_IMG = 'img';
- protected $url = '';
- protected $cacheDir;
- protected $cacheTimeout;
- protected $dataAccess;
-
- public function __construct($args = array())
- {
- if (isset($args['url'])) {
- $this->url = $args['url'];
- }
-
- $this->cacheDir = __DIR__ . '/../../resources/cache';
- $this->cacheTimeout = 604800;
- $this->dataAccess = new DataAccess();
- }
-
- /**
- * Set cache settings:
- * - dir: cache directory
- * - timeout: in seconds
- *
- * @param array $args
- */
- public function cache($args = array()) {
- if (isset($args['dir'])) {
- $this->cacheDir = $args['dir'];
- }
-
- if (!empty($args['timeout'])) {
- $this->cacheTimeout = $args['timeout'];
- }
- }
-
- public static function baseUrl($url, $path = false)
- {
- $return = '';
-
- if (!$url = parse_url($url)) {
- return FALSE;
- }
-
- // Scheme
- $scheme = isset($url['scheme']) ? strtolower($url['scheme']) : null;
- if ($scheme != 'http' && $scheme != 'https') {
-
- return FALSE;
- }
- $return .= "{$scheme}://";
-
- // Username and password
- if (isset($url['user'])) {
- $return .= $url['user'];
- if (isset($url['pass'])) {
- $return .= ":{$url['pass']}";
- }
- $return .= '@';
- }
-
- // Hostname
- if( !isset($url['host']) ) {
- return FALSE;
- }
-
- $return .= $url['host'];
-
- // Port
- if (isset($url['port'])) {
- $return .= ":{$url['port']}";
- }
-
- // Path
- if( $path && isset($url['path']) ) {
- $return .= $url['path'];
- }
- $return .= '/';
-
- return $return;
- }
-
- public function info($url)
- {
- if(empty($url) || $url === false) {
- return false;
- }
-
- $max_loop = 5;
-
- // Discover real status by following redirects.
- $loop = TRUE;
- while ($loop && $max_loop-- > 0) {
- $headers = $this->dataAccess->retrieveHeader($url);
- if (empty($headers)) {
- return false;
- }
- $exploded = explode(' ', $headers[0]);
-
- if( !isset($exploded[1]) ) {
- return false;
- }
- list(,$status) = $exploded;
-
- switch ($status) {
- case '301':
- case '302':
- $url = isset($headers['location']) ? $headers['location'] : '';
- if (is_array($url)) {
- $url = end($url);
- }
- break;
- default:
- $loop = FALSE;
- break;
- }
- }
-
- return array('status' => $status, 'url' => $url);
- }
-
- public function endRedirect($url) {
- $out = $this->info($url);
- return !empty($out['url']) ? $out['url'] : false;
- }
-
- /**
- * Find remote (or cached) favicon
- *
- * @param string $url to look for a favicon
- * @param int $type type of retrieval (FaviconDLType):
- * - HOTLINK_URL: returns remote URL
- * - DL_FILE_PATH: returns file path of the favicon downloaded locally
- * - RAW_IMAGE: returns the favicon image binary string
- *
- * @return string|bool favicon URL, false if nothing was found
- */
- public function get($url = '', $type = FaviconDLType::HOTLINK_URL)
- {
- // URLs passed to this method take precedence.
- if (!empty($url)) {
- $this->url = $url;
- }
-
- // Get the base URL without the path for clearer concatenations.
- $url = rtrim($this->baseUrl($this->url, true), '/');
- $original = $url;
- if (($favicon = $this->checkCache($original, self::$TYPE_CACHE_URL)) === false
- && ! $favicon = $this->getFavicon($original, false)
- ) {
- $url = rtrim($this->endRedirect($this->baseUrl($this->url, false)), '/');
- if (($favicon = $this->checkCache($url, self::$TYPE_CACHE_URL)) === false
- && ! $favicon = $this->getFavicon($url)
- ) {
- $url = $original;
- }
- }
-
- $this->saveCache($url, $favicon, self::$TYPE_CACHE_URL);
-
- switch ($type) {
- case FaviconDLType::DL_FILE_PATH:
- return $this->getImage($url, $favicon, false);
- case FaviconDLType::RAW_IMAGE:
- return $this->getImage($url, $favicon, true);
- case FaviconDLType::HOTLINK_URL:
- default:
- return empty($favicon) ? false : $favicon;
- }
- }
-
- private function getFavicon($url, $checkDefault = true) {
- $favicon = false;
-
- if(empty($url)) {
- return false;
- }
-
- // Try /favicon.ico first.
- if( $checkDefault ) {
- $info = $this->info("{$url}/favicon.ico");
- if ($info['status'] == '200') {
- $favicon = $info['url'];
- }
- }
-
- // See if it's specified in a link tag in domain url.
- if (!$favicon) {
- $favicon = trim($this->getInPage($url));
- }
- if (substr($favicon, 0, 2) === '//') {
- $favicon = 'https:' . $favicon;
- }
-
- // Make sure the favicon is an absolute URL.
- if( $favicon && filter_var($favicon, FILTER_VALIDATE_URL) === false ) {
- $favicon = $url . '/' . $favicon;
- }
-
- // Sometimes people lie, so check the status.
- // And sometimes, it's not even an image. Sneaky bastards!
- // If cacheDir isn't writable, that's not our problem
- if ($favicon && is_writable($this->cacheDir) && extension_loaded('fileinfo') && !$this->checkImageMType($favicon)) {
- $favicon = false;
- }
-
- return $favicon;
- }
-
- /**
- * Find remote favicon and return it as an image
- */
- private function getImage($url, $faviconUrl = '', $image = false)
- {
- if (empty($faviconUrl)) {
- return false;
- }
-
- $favicon = $this->checkCache($url, self::$TYPE_CACHE_IMG);
- // Favicon not found in the cache
- if( $favicon === false ) {
- $favicon = $this->dataAccess->retrieveUrl($faviconUrl);
- // Definitely not found
- if (!$this->checkImageMTypeContent($favicon)) {
- return false;
- } else {
- $this->saveCache($url, $favicon, self::$TYPE_CACHE_IMG);
- }
- }
-
- if( $image ) {
- return $favicon;
- }
- else
- return self::$TYPE_CACHE_IMG . md5($url);
- }
-
- /**
- * Display data as a PNG Favicon, then exit
- * @param $data
- */
- private function displayFavicon($data) {
- header('Content-Type: image/png');
- header('Cache-Control: private, max-age=10800, pre-check=10800');
- header('Pragma: private');
- header('Expires: ' . date(DATE_RFC822,strtotime('7 day')));
- echo $data;
- exit;
- }
-
- private function getInPage($url) {
- $html = $this->dataAccess->retrieveUrl("{$url}/");
- preg_match('!<head.*?>.*</head>!ims', $html, $match);
-
- if(empty($match) || count($match) == 0) {
- return false;
- }
-
- $head = $match[0];
-
- $dom = new \DOMDocument();
- // Use error suppression, because the HTML might be too malformed.
- if (@$dom->loadHTML($head)) {
- $links = $dom->getElementsByTagName('link');
- foreach ($links as $link) {
- if ($link->hasAttribute('rel') && strtolower($link->getAttribute('rel')) == 'shortcut icon') {
- return $link->getAttribute('href');
- }
- }
- foreach ($links as $link) {
- if ($link->hasAttribute('rel') && strtolower($link->getAttribute('rel')) == 'icon') {
- return $link->getAttribute('href');
- }
- }
- foreach ($links as $link) {
- if ($link->hasAttribute('href') && strpos($link->getAttribute('href'), 'favicon') !== FALSE) {
- return $link->getAttribute('href');
- }
- }
- }
- return false;
- }
-
- private function checkCache($url, $type) {
- if ($this->cacheTimeout) {
- $cache = $this->cacheDir . '/'. $type . md5($url);
- if (file_exists($cache) && is_readable($cache)
- && ($this->cacheTimeout === -1 || time() - filemtime($cache) < $this->cacheTimeout)
- ) {
- return $this->dataAccess->readCache($cache);
- }
- }
- return false;
- }
-
- /**
- * Will save data in cacheDir if the directory writable and any previous cache is expired (cacheTimeout)
- * @param $url
- * @param $data
- * @param $type
- * @return string cache file path
- */
- private function saveCache($url, $data, $type) {
- // Save cache if necessary
- $cache = $this->cacheDir . '/'. $type . md5($url);
- if ($this->cacheTimeout && !file_exists($cache)
- || (is_writable($cache) && $this->cacheTimeout !== -1 && time() - filemtime($cache) > $this->cacheTimeout)
- ) {
- $this->dataAccess->saveCache($cache, $data);
- }
- return $cache;
- }
-
- private function checkImageMType($url) {
-
- $fileContent = $this->dataAccess->retrieveUrl($url);
-
- return $this->checkImageMTypeContent($fileContent);
- }
-
- private function checkImageMTypeContent($content) {
- if(empty($content)) return false;
-
- $isImage = true;
- try {
- $fInfo = finfo_open(FILEINFO_MIME_TYPE);
- $isImage = strpos(finfo_buffer($fInfo, $content), 'image') !== false;
- finfo_close($fInfo);
- } catch (\Exception $e) {
- error_log('Favicon checkImageMTypeContent error: ' . $e->getMessage());
- }
-
- return $isImage;
- }
-
- /**
- * @return mixed
- */
- public function getCacheDir()
- {
- return $this->cacheDir;
- }
-
- /**
- * @param mixed $cacheDir
- */
- public function setCacheDir($cacheDir)
- {
- $this->cacheDir = $cacheDir;
- }
-
- /**
- * @return mixed
- */
- public function getCacheTimeout()
- {
- return $this->cacheTimeout;
- }
-
- /**
- * @param mixed $cacheTimeout
- */
- public function setCacheTimeout($cacheTimeout)
- {
- $this->cacheTimeout = $cacheTimeout;
- }
-
- /**
- * @return string
- */
- public function getUrl()
- {
- return $this->url;
- }
-
- /**
- * @param string $url
- */
- public function setUrl($url)
- {
- $this->url = $url;
- }
-
- /**
- * @param DataAccess|\PHPUnit_Framework_MockObject_MockObject $dataAccess
- */
- public function setDataAccess($dataAccess)
- {
- $this->dataAccess = $dataAccess;
- }
-}
diff --git a/lib/Favicon/FaviconDLType.php b/lib/Favicon/FaviconDLType.php
deleted file mode 100644
index 6da525a7f..000000000
--- a/lib/Favicon/FaviconDLType.php
+++ /dev/null
@@ -1,23 +0,0 @@
-<?php
-
-
-namespace Favicon;
-
-
-interface FaviconDLType
-{
- /**
- * Retrieve remote favicon URL.
- */
- const HOTLINK_URL = 0;
-
- /**
- * Retrieve downloaded favicon path (requires cache).
- */
- const DL_FILE_PATH = 1;
-
- /**
- * Retrieve the image content as a binary string.
- */
- const RAW_IMAGE = 2;
-}
diff --git a/lib/favicons.php b/lib/favicons.php
index d8baba342..cc6e54374 100644
--- a/lib/favicons.php
+++ b/lib/favicons.php
@@ -1,22 +1,104 @@
<?php
-
-include(LIB_PATH . '/Favicon/FaviconDLType.php');
-include(LIB_PATH . '/Favicon/DataAccess.php');
-include(LIB_PATH . '/Favicon/Favicon.php');
-
$favicons_dir = DATA_PATH . '/favicons/';
$default_favicon = PUBLIC_PATH . '/themes/icons/default_favicon.ico';
-function download_favicon($website, $dest) {
- global $default_favicon;
+function isImgMime($content) {
+ //Based on https://github.com/ArthurHoaro/favicon/blob/3a4f93da9bb24915b21771eb7873a21bde26f5d1/src/Favicon/Favicon.php#L311-L319
+ if($content == '') {
+ return false;
+ }
+ if (!extension_loaded('fileinfo')) {
+ return true;
+ }
+ $isImage = true;
+ try {
+ $fInfo = finfo_open(FILEINFO_MIME_TYPE);
+ $isImage = strpos(finfo_buffer($fInfo, $content), 'image') !== false;
+ finfo_close($fInfo);
+ } catch (Exception $e) {
+ }
+ return $isImage;
+}
- syslog(LOG_INFO, 'FreshRSS Favicon discovery GET ' . $website);
- $favicon_getter = new \Favicon\Favicon();
- $tmpPath = realpath(TMP_PATH);
- $favicon_getter->setCacheDir($tmpPath);
- $favicon_getter->setCacheTimeout(-1);
- $favicon_path = $favicon_getter->get($website, \Favicon\FaviconDLType::DL_FILE_PATH);
+function downloadHttp(&$url, $curlOptions = array()) {
+ syslog(LOG_INFO, 'FreshRSS Favicon GET ' . $url);
+ if (substr($url, 0, 2) === '//') {
+ $url = 'https:' . $favicon;
+ }
+ if ($url == '' || filter_var($url, FILTER_VALIDATE_URL) === false) {
+ return '';
+ }
+ $ch = curl_init($url);
+ curl_setopt_array($ch, array(
+ CURLOPT_FOLLOWLOCATION => true,
+ CURLOPT_MAXREDIRS => 10,
+ CURLOPT_RETURNTRANSFER => true,
+ CURLOPT_TIMEOUT => 15,
+ CURLOPT_USERAGENT => 'FreshRSS/' . FRESHRSS_VERSION . ' (' . PHP_OS . '; ' . FRESHRSS_WEBSITE . ')',
+ ));
+ if (defined('CURLOPT_ENCODING')) {
+ curl_setopt($ch, CURLOPT_ENCODING, ''); //Enable all encodings
+ }
+ curl_setopt_array($ch, $curlOptions);
+ $response = curl_exec($ch);
+ $info = curl_getinfo($ch);
+ curl_close($ch);
+ if (!empty($info['url']) && (filter_var($info['url'], FILTER_VALIDATE_URL) !== false)) {
+ $url = $info['url'];
+ }
+ return $info['http_code'] == 200 ? $response : '';
+}
+
+function searchFavicon(&$url) {
+ $dom = new DOMDocument();
+ $html = downloadHttp($url);
+ if ($html != '' && @$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
+ $rels = array('shortcut icon', 'icon');
+ $links = $dom->getElementsByTagName('link');
+ foreach ($rels as $rel) {
+ foreach ($links as $link) {
+ if ($link->hasAttribute('rel') && $link->hasAttribute('href') &&
+ strtolower(trim($link->getAttribute('rel'))) === $rel) {
+ $href = trim($link->getAttribute('href'));
+ if (substr($href, 0, 2) === '//') {
+ $href = 'https:' . $href;
+ }
+ if (filter_var($href, FILTER_VALIDATE_URL) === false) {
+ $href = SimplePie_IRI::absolutize($url, $href);
+ }
+ $favicon = downloadHttp($href, array(
+ CURLOPT_REFERER => $url,
+ ));
+ if (isImgMime($favicon)) {
+ return $favicon;
+ }
+ }
+ }
+ }
+ }
+ return '';
+}
- return ($favicon_path != false && @rename($tmpPath . '/' . $favicon_path, $dest)) ||
+function download_favicon($url, $dest) {
+ global $default_favicon;
+ $url = trim($url);
+ $favicon = searchFavicon($url);
+ if ($favicon == '') {
+ $rootUrl = preg_replace('%^(https?://[^/]+).*$%i', '$1/', $url);
+ if ($rootUrl != $url) {
+ $url = $rootUrl;
+ $favicon = searchFavicon($url);
+ }
+ if ($favicon == '') {
+ $link = $rootUrl . 'favicon.ico';
+ $favicon = downloadHttp($link, array(
+ CURLOPT_REFERER => $url,
+ ));
+ if (!isImgMime($favicon)) {
+ $favicon = '';
+ }
+ }
+ }
+ return ($favicon != '' && file_put_contents($dest, $favicon)) ||
@copy($default_favicon, $dest);
}
diff --git a/p/f.php b/p/f.php
index e4c82bb16..c47fa747a 100644
--- a/p/f.php
+++ b/p/f.php
@@ -1,6 +1,6 @@
<?php
-
require('../constants.php');
+require(LIB_PATH . '/lib_rss.php'); //Includes class autoloader
require(LIB_PATH . '/favicons.php');
require(LIB_PATH . '/http-conditional.php');
@@ -15,7 +15,6 @@ function show_default_favicon($cacheSeconds = 3600) {
}
}
-
$id = isset($_SERVER['QUERY_STRING']) ? $_SERVER['QUERY_STRING'] : '0';
if (!ctype_xdigit($id)) {
$id = '0';