From 1fe66ad020ca8f0560bb9c6e311852ed77228f78 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Mon, 28 Feb 2022 20:22:43 +0100 Subject: Implement Web scraping "HTML + XPath" (#4220) * More PHP type hints for Fever Follow-up of https://github.com/FreshRSS/FreshRSS/pull/4201 Related to https://github.com/FreshRSS/FreshRSS/issues/4200 * Detail * Draft * Progress * More draft * Fix thumbnail PHP type hint https://github.com/FreshRSS/FreshRSS/issues/4215 * More types * A bit more * Refactor FreshRSS_Entry::fromArray * Progress * Starts to work * Categories * Fonctional * Layout update * Fix relative URLs * Cache system * Forgotten files * Remove a debug line * Automatic form validation of XPath expressions * data-leave-validation * Fix reload action * Simpler examples * Fix column type for PostgreSQL * Enforce HTTP encoding * Readme * Fix get full content * target="_blank" * gitignore * htmlspecialchars_utf8 * Implement HTML And fix/revert `xml:base` support in SimplePie https://github.com/simplepie/simplepie/commit/e49c578817aa504d8d05cd7f33857aeda9d41908 * SimplePie upstream PR merged https://github.com/simplepie/simplepie/pull/723 --- README.fr.md | 2 + README.md | 2 + app/Controllers/feedController.php | 48 +++++++- app/Controllers/indexController.php | 2 +- app/Controllers/subscriptionController.php | 18 +++ app/Models/Entry.php | 96 +++++++-------- app/Models/EntryDAO.php | 31 ++--- app/Models/EntryDAOPGSQL.php | 4 +- app/Models/EntryDAOSQLite.php | 4 +- app/Models/Feed.php | 188 ++++++++++++++++++++++++++--- app/Models/FeedDAO.php | 42 ++++--- app/Models/FeedDAOSQLite.php | 2 +- app/Models/View.php | 17 ++- app/SQL/install.sql.mysql.php | 1 + app/SQL/install.sql.pgsql.php | 1 + app/SQL/install.sql.sqlite.php | 1 + app/i18n/cz/sub.php | 43 +++++++ app/i18n/de/sub.php | 43 +++++++ app/i18n/en-us/sub.php | 43 +++++++ app/i18n/en/sub.php | 43 +++++++ app/i18n/es/sub.php | 43 +++++++ app/i18n/fr/admin.php | 6 +- app/i18n/fr/conf.php | 4 +- app/i18n/fr/install.php | 6 +- app/i18n/fr/sub.php | 45 ++++++- app/i18n/fr/user.php | 20 +-- app/i18n/he/sub.php | 43 +++++++ app/i18n/it/sub.php | 43 +++++++ app/i18n/ja/sub.php | 43 +++++++ app/i18n/ko/sub.php | 43 +++++++ app/i18n/nl/sub.php | 43 +++++++ app/i18n/oc/sub.php | 43 +++++++ app/i18n/pl/sub.php | 43 +++++++ app/i18n/pt-br/sub.php | 43 +++++++ app/i18n/ru/sub.php | 43 +++++++ app/i18n/sk/sub.php | 43 +++++++ app/i18n/tr/sub.php | 43 +++++++ app/i18n/zh-cn/sub.php | 43 +++++++ app/layout/layout.phtml | 2 +- app/views/helpers/export/articles.phtml | 2 +- app/views/helpers/feed/update.phtml | 104 ++++++++++++++++ app/views/index/normal.phtml | 7 +- app/views/index/reader.phtml | 2 + app/views/index/rss.phtml | 30 ++++- app/views/subscription/add.phtml | 91 ++++++++++++++ data/cache/.gitignore | 4 +- lib/Minz/Url.php | 7 +- lib/Minz/View.php | 6 + lib/SimplePie/SimplePie.php | 2 +- lib/lib_phpQuery.php | 3 +- lib/lib_rss.php | 127 ++++++++++++++++++- p/api/fever.php | 2 +- p/api/greader.php | 1 + p/scripts/extra.js | 45 +++++++ p/themes/base-theme/template.css | 8 ++ p/themes/base-theme/template.rtl.css | 8 ++ 56 files changed, 1567 insertions(+), 155 deletions(-) diff --git a/README.fr.md b/README.fr.md index b5cec608c..960d89f5d 100644 --- a/README.fr.md +++ b/README.fr.md @@ -15,6 +15,8 @@ Il y a une API pour les clients (mobiles), ainsi qu’une [interface en ligne de Grâce au standard [WebSub](https://www.w3.org/TR/websub/) (anciennement [PubSubHubbub](https://github.com/pubsubhubbub/PubSubHubbub)), FreshRSS est capable de recevoir des notifications push instantanées depuis les sources compatibles, telles [Mastodon](https://joinmastodon.org), [Friendica](https://friendi.ca), [WordPress](https://wordpress.org/plugins/pubsubhubbub/), Blogger, FeedBurner, etc. +FreshRSS supporte nativement le moissonnage du Web (Web Scraping) basique, basé sur [XPath](https://www.w3.org/TR/xpath-10/), pour les sites Web sans flux RSS / Atom. + Enfin, il permet l’ajout d’[extensions](#extensions) pour encore plus de personnalisation. Les demandes de fonctionnalités, rapports de bugs, et autres contributions sont les bienvenues. Privilégiez pour cela des [demandes sur GitHub](https://github.com/FreshRSS/FreshRSS/issues). diff --git a/README.md b/README.md index 1223b4dcd..29d481a38 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ There is an API for (mobile) clients, and a [Command-Line Interface](cli/README. Thanks to the [WebSub](https://www.w3.org/TR/websub/) standard (formerly [PubSubHubbub](https://github.com/pubsubhubbub/PubSubHubbub)), FreshRSS is able to receive instant push notifications from compatible sources, such as [Mastodon](https://joinmastodon.org), [Friendica](https://friendi.ca), [WordPress](https://wordpress.org/plugins/pubsubhubbub/), Blogger, FeedBurner, etc. +FreshRSS natively supports basic Web scraping, based on [XPath](https://www.w3.org/TR/xpath-10/), for Web sites not providing any RSS / Atom feed. + Finally, it supports [extensions](#extensions) for further tuning. Feature requests, bug reports, and other contributions are welcome. The best way to contribute is to [open an issue on GitHub](https://github.com/FreshRSS/FreshRSS/issues). diff --git a/app/Controllers/feedController.php b/app/Controllers/feedController.php index f18a67072..dabfb348f 100755 --- a/app/Controllers/feedController.php +++ b/app/Controllers/feedController.php @@ -38,7 +38,7 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { * @throws FreshRSS_Feed_Exception * @throws Minz_FileNotExistException */ - public static function addFeed($url, $title = '', $cat_id = 0, $new_cat_name = '', $http_auth = '', $attributes = array()) { + public static function addFeed($url, $title = '', $cat_id = 0, $new_cat_name = '', $http_auth = '', $attributes = array(), $kind = FreshRSS_Feed::KIND_RSS) { FreshRSS_UserDAO::touch(); @set_time_limit(300); @@ -67,10 +67,19 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { $cat_id = $cat == null ? FreshRSS_CategoryDAO::DEFAULTCATEGORYID : $cat->id(); $feed = new FreshRSS_Feed($url); //Throws FreshRSS_BadUrl_Exception + $feed->_kind($kind); $feed->_attributes('', $attributes); $feed->_httpAuth($http_auth); - $feed->load(true); //Throws FreshRSS_Feed_Exception, Minz_FileNotExistException $feed->_category($cat_id); + switch ($kind) { + case FreshRSS_Feed::KIND_RSS: + case FreshRSS_Feed::KIND_RSS_FORCED: + $feed->load(true); //Throws FreshRSS_Feed_Exception, Minz_FileNotExistException + break; + case FreshRSS_Feed::KIND_HTML_XPATH: + $feed->_website($url); + break; + } $feedDAO = FreshRSS_Factory::createFeedDao(); if ($feedDAO->searchByUrl($feed->url())) { @@ -85,8 +94,9 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { $values = array( 'url' => $feed->url(), + 'kind' => $feed->kind(), 'category' => $feed->category(), - 'name' => $title != '' ? $title : $feed->name(), + 'name' => $title != '' ? $title : $feed->name(true), 'website' => $feed->website(), 'description' => $feed->description(), 'lastUpdate' => 0, @@ -184,8 +194,25 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { $timeout = intval(Minz_Request::param('timeout', 0)); $attributes['timeout'] = $timeout > 0 ? $timeout : null; + $feed_kind = Minz_Request::param('feed_kind', FreshRSS_Feed::KIND_RSS); + if ($feed_kind == FreshRSS_Feed::KIND_HTML_XPATH) { + $xPathSettings = []; + if (Minz_Request::param('xPathFeedTitle', '') != '') $xPathSettings['feedTitle'] = Minz_Request::param('xPathFeedTitle', '', true); + if (Minz_Request::param('xPathItem', '') != '') $xPathSettings['item'] = Minz_Request::param('xPathItem', '', true); + if (Minz_Request::param('xPathItemTitle', '') != '') $xPathSettings['itemTitle'] = Minz_Request::param('xPathItemTitle', '', true); + if (Minz_Request::param('xPathItemContent', '') != '') $xPathSettings['itemContent'] = Minz_Request::param('xPathItemContent', '', true); + if (Minz_Request::param('xPathItemUri', '') != '') $xPathSettings['itemUri'] = Minz_Request::param('xPathItemUri', '', true); + if (Minz_Request::param('xPathItemAuthor', '') != '') $xPathSettings['itemAuthor'] = Minz_Request::param('xPathItemAuthor', '', true); + if (Minz_Request::param('xPathItemTimestamp', '') != '') $xPathSettings['itemTimestamp'] = Minz_Request::param('xPathItemTimestamp', '', true); + if (Minz_Request::param('xPathItemThumbnail', '') != '') $xPathSettings['itemThumbnail'] = Minz_Request::param('xPathItemThumbnail', '', true); + if (Minz_Request::param('xPathItemCategories', '') != '') $xPathSettings['itemCategories'] = Minz_Request::param('xPathItemCategories', '', true); + if (!empty($xPathSettings)) { + $attributes['xpath'] = $xPathSettings; + } + } + try { - $feed = self::addFeed($url, '', $cat, '', $http_auth, $attributes); + $feed = self::addFeed($url, '', $cat, '', $http_auth, $attributes, $feed_kind); } catch (FreshRSS_BadUrl_Exception $e) { // Given url was not a valid url! Minz_Log::warning($e->getMessage()); @@ -264,6 +291,14 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { } } + /** + * @param int $feed_id + * @param string $feed_url + * @param bool $force + * @param SimplePie|null $simplePiePush + * @param bool $noCommit + * @param int $maxFeeds + */ public static function actualizeFeed($feed_id, $feed_url, $force, $simplePiePush = null, $noCommit = false, $maxFeeds = 10) { @set_time_limit(300); @@ -338,6 +373,8 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { try { if ($simplePiePush) { $simplePie = $simplePiePush; //Used by WebSub + } elseif ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH) { + $simplePie = $feed->loadHtmlXpath(false, $isNewFeed); } else { $simplePie = $feed->load(false, $isNewFeed); } @@ -377,6 +414,7 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { $oldGuids = array(); // Add entries in database if possible. + /** @var FreshRSS_Entry $entry */ foreach ($entries as $entry) { if (isset($newGuids[$entry->guid()])) { continue; //Skip subsequent articles with same GUID @@ -765,7 +803,7 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { //Re-fetch articles as if the feed was new. $feedDAO->updateFeed($feed->id(), [ 'lastUpdate' => 0 ]); - self::actualizeFeed($feed_id, null, false, null, true); + self::actualizeFeed($feed_id, '', false); //Extract all feed entries from database, load complete content and store them back in database. $entries = $entryDAO->listWhere('f', $feed_id, FreshRSS_Entry::STATE_ALL, 'DESC', 0); diff --git a/app/Controllers/indexController.php b/app/Controllers/indexController.php index 0fc7bb61a..4f2f0d451 100755 --- a/app/Controllers/indexController.php +++ b/app/Controllers/indexController.php @@ -160,7 +160,7 @@ class FreshRSS_index_Controller extends FreshRSS_ActionController { } // No layout for RSS output. - $this->view->url = PUBLIC_TO_INDEX_PATH . '/' . (empty($_SERVER['QUERY_STRING']) ? '' : '?' . $_SERVER['QUERY_STRING']); + $this->view->rss_url = PUBLIC_TO_INDEX_PATH . '/' . (empty($_SERVER['QUERY_STRING']) ? '' : '?' . $_SERVER['QUERY_STRING']); $this->view->rss_title = FreshRSS_Context::$name . ' | ' . FreshRSS_View::title(); $this->view->_layout(false); header('Content-Type: application/rss+xml; charset=utf-8'); diff --git a/app/Controllers/subscriptionController.php b/app/Controllers/subscriptionController.php index 7d2c58714..8fa468b8e 100644 --- a/app/Controllers/subscriptionController.php +++ b/app/Controllers/subscriptionController.php @@ -192,8 +192,26 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController { $feed->_filtersAction('read', preg_split('/[\n\r]+/', Minz_Request::param('filteractions_read', ''))); + $feed_kind = Minz_Request::param('feed_kind', FreshRSS_Feed::KIND_RSS); + if ($feed_kind == FreshRSS_Feed::KIND_HTML_XPATH) { + $xPathSettings = []; + if (Minz_Request::param('xPathFeedTitle', '') != '') $xPathSettings['feedTitle'] = Minz_Request::param('xPathFeedTitle', '', true); + if (Minz_Request::param('xPathItem', '') != '') $xPathSettings['item'] = Minz_Request::param('xPathItem', '', true); + if (Minz_Request::param('xPathItemTitle', '') != '') $xPathSettings['itemTitle'] = Minz_Request::param('xPathItemTitle', '', true); + if (Minz_Request::param('xPathItemContent', '') != '') $xPathSettings['itemContent'] = Minz_Request::param('xPathItemContent', '', true); + if (Minz_Request::param('xPathItemUri', '') != '') $xPathSettings['itemUri'] = Minz_Request::param('xPathItemUri', '', true); + if (Minz_Request::param('xPathItemAuthor', '') != '') $xPathSettings['itemAuthor'] = Minz_Request::param('xPathItemAuthor', '', true); + if (Minz_Request::param('xPathItemTimestamp', '') != '') $xPathSettings['itemTimestamp'] = Minz_Request::param('xPathItemTimestamp', '', true); + if (Minz_Request::param('xPathItemThumbnail', '') != '') $xPathSettings['itemThumbnail'] = Minz_Request::param('xPathItemThumbnail', '', true); + if (Minz_Request::param('xPathItemCategories', '') != '') $xPathSettings['itemCategories'] = Minz_Request::param('xPathItemCategories', '', true); + if (!empty($xPathSettings)) { + $feed->_attributes('xpath', $xPathSettings); + } + } + $values = array( 'name' => Minz_Request::param('name', ''), + 'kind' => $feed_kind, 'description' => sanitizeHTML(Minz_Request::param('description', '', true)), 'website' => checkUrl(Minz_Request::param('website', '')), 'url' => checkUrl(Minz_Request::param('url', '')), diff --git a/app/Models/Entry.php b/app/Models/Entry.php index a190e505d..ab88d777a 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -59,6 +59,38 @@ class FreshRSS_Entry extends Minz_Model { $this->_guid($guid); } + /** @param array $dao */ + public static function fromArray(array $dao): FreshRSS_Entry { + if (!isset($dao['content'])) { + $dao['content'] = ''; + } + if (isset($dao['thumbnail'])) { + $dao['content'] .= '

'; + } + $entry = new FreshRSS_Entry( + $dao['id_feed'] ?? 0, + $dao['guid'] ?? '', + $dao['title'] ?? '', + $dao['author'] ?? '', + $dao['content'] ?? '', + $dao['link'] ?? '', + $dao['date'] ?? 0, + $dao['is_read'] ?? false, + $dao['is_favorite'] ?? false, + $dao['tags'] ?? '' + ); + if (isset($dao['id'])) { + $entry->_id($dao['id']); + } + if (!empty($dao['timestamp'])) { + $entry->_date(strtotime($dao['timestamp'])); + } + if (!empty($dao['categories'])) { + $entry->_tags($dao['categories']); + } + return $entry; + } + public function id(): string { return $this->id; } @@ -83,6 +115,7 @@ class FreshRSS_Entry extends Minz_Model { return $this->content; } + /** @return array> */ public function enclosures(bool $searchBodyImages = false): array { $results = []; try { @@ -97,11 +130,20 @@ class FreshRSS_Entry extends Minz_Model { if ($searchEnclosures) { $enclosures = $xpath->query('//div[@class="enclosure"]/p[@class="enclosure-content"]/*[@src]'); foreach ($enclosures as $enclosure) { - $results[] = [ + $result = [ 'url' => $enclosure->getAttribute('src'), 'type' => $enclosure->getAttribute('data-type'), + 'medium' => $enclosure->getAttribute('data-medium'), 'length' => $enclosure->getAttribute('data-length'), ]; + if (empty($result['medium'])) { + switch (strtolower($enclosure->nodeName)) { + case 'img': $result['medium'] = 'image'; break; + case 'video': $result['medium'] = 'video'; break; + case 'audio': $result['medium'] = 'audio'; break; + } + } + $results[] = $result; } } if ($searchBodyImages) { @@ -432,52 +474,12 @@ class FreshRSS_Entry extends Minz_Model { } } - public static function getContentByParsing(string $url, string $path, array $attributes = array(), int $maxRedirs = 3): string { - $limits = FreshRSS_Context::$system_conf->limits; - $feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']); - - if (FreshRSS_Context::$system_conf->simplepie_syslog_enabled) { - syslog(LOG_INFO, 'FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url)); - } - - $ch = curl_init(); - curl_setopt_array($ch, [ - CURLOPT_URL => $url, - CURLOPT_REFERER => SimplePie_Misc::url_remove_credentials($url), - CURLOPT_HTTPHEADER => array('Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'), - CURLOPT_USERAGENT => FRESHRSS_USERAGENT, - CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], - CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], - //CURLOPT_FAILONERROR => true; - CURLOPT_MAXREDIRS => 4, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_FOLLOWLOCATION => true, - CURLOPT_ENCODING => '', //Enable all encodings - ]); - - curl_setopt_array($ch, FreshRSS_Context::$system_conf->curl_options); - - if (isset($attributes['curl_params']) && is_array($attributes['curl_params'])) { - curl_setopt_array($ch, $attributes['curl_params']); - } - - if (isset($attributes['ssl_verify'])) { - curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, $attributes['ssl_verify'] ? 2 : 0); - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, $attributes['ssl_verify'] ? true : false); - if (!$attributes['ssl_verify']) { - curl_setopt($ch, CURLOPT_SSL_CIPHER_LIST, 'DEFAULT@SECLEVEL=1'); - } - } - $html = curl_exec($ch); - $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE); - $c_error = curl_error($ch); - curl_close($ch); - - if ($c_status != 200 || $c_error != '') { - Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url); - } - - if (is_string($html) && strlen($html) > 0) { + /** + * @param array $attributes + */ + public static function getContentByParsing(string $url, string $path, array $attributes = [], int $maxRedirs = 3): string { + $html = getHtml($url, $attributes); + if (strlen($html) > 0) { require_once(LIB_PATH . '/lib_phpQuery.php'); /** * @var phpQueryObject @doc diff --git a/app/Models/EntryDAO.php b/app/Models/EntryDAO.php index a10440edb..8f248e20f 100644 --- a/app/Models/EntryDAO.php +++ b/app/Models/EntryDAO.php @@ -164,7 +164,7 @@ INSERT IGNORE INTO `_entry` ( ) SELECT @rank:=@rank+1 AS id, guid, title, author, content_bin, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags FROM `_entrytmp` -ORDER BY date; +ORDER BY date, id; DELETE FROM `_entrytmp` WHERE id <= @rank; SQL; @@ -658,6 +658,7 @@ SQL; } } + /** @return FreshRSS_Entry|null */ public function searchByGuid($id_feed, $guid) { // un guid est unique pour un flux donné $sql = 'SELECT id, guid, title, author, ' @@ -669,9 +670,10 @@ SQL; $stm->bindParam(':guid', $guid); $stm->execute(); $res = $stm->fetchAll(PDO::FETCH_ASSOC); - return isset($res[0]) ? self::daoToEntry($res[0]) : null; + return isset($res[0]) ? FreshRSS_Entry::fromArray($res[0]) : null; } + /** @return FreshRSS_Entry|null */ public function searchById($id) { $sql = 'SELECT id, guid, title, author, ' . ($this->isCompressed() ? 'UNCOMPRESS(content_bin) AS content' : 'content') @@ -681,7 +683,7 @@ SQL; $stm->bindParam(':id', $id, PDO::PARAM_INT); $stm->execute(); $res = $stm->fetchAll(PDO::FETCH_ASSOC); - return isset($res[0]) ? self::daoToEntry($res[0]) : null; + return isset($res[0]) ? FreshRSS_Entry::fromArray($res[0]) : null; } public function searchIdByGuid($id_feed, $guid) { @@ -1061,7 +1063,7 @@ SQL; $stm = $this->listWhereRaw($type, $id, $state, $order, $limit, $firstId, $filters, $date_min); if ($stm) { while ($row = $stm->fetch(PDO::FETCH_ASSOC)) { - yield self::daoToEntry($row); + yield FreshRSS_Entry::fromArray($row); } } else { yield false; @@ -1092,7 +1094,7 @@ SQL; $stm = $this->pdo->prepare($sql); $stm->execute($ids); while ($row = $stm->fetch(PDO::FETCH_ASSOC)) { - yield self::daoToEntry($row); + yield FreshRSS_Entry::fromArray($row); } } @@ -1251,23 +1253,4 @@ SQL; $unread = empty($res[1]) ? 0 : intval($res[1]); return array('all' => $all, 'unread' => $unread, 'read' => $all - $unread); } - - public static function daoToEntry($dao) { - $entry = new FreshRSS_Entry( - $dao['id_feed'], - $dao['guid'], - $dao['title'], - $dao['author'], - $dao['content'], - $dao['link'], - $dao['date'], - $dao['is_read'], - $dao['is_favorite'], - isset($dao['tags']) ? $dao['tags'] : '' - ); - if (isset($dao['id'])) { - $entry->_id($dao['id']); - } - return $entry; - } } diff --git a/app/Models/EntryDAOPGSQL.php b/app/Models/EntryDAOPGSQL.php index 7a46670fc..b97417a7c 100644 --- a/app/Models/EntryDAOPGSQL.php +++ b/app/Models/EntryDAOPGSQL.php @@ -45,13 +45,13 @@ rank bigint := (SELECT maxrank - COUNT(*) FROM `_entrytmp`); BEGIN INSERT INTO `_entry` (id, guid, title, author, content, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags) - (SELECT rank + row_number() OVER(ORDER BY date) AS id, guid, title, author, content, + (SELECT rank + row_number() OVER(ORDER BY date, id) AS id, guid, title, author, content, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags FROM `_entrytmp` AS etmp WHERE NOT EXISTS ( SELECT 1 FROM `_entry` AS ereal WHERE (etmp.id = ereal.id) OR (etmp.id_feed = ereal.id_feed AND etmp.guid = ereal.guid)) - ORDER BY date); + ORDER BY date, id); DELETE FROM `_entrytmp` WHERE id <= maxrank; END $$;'; $hadTransaction = $this->pdo->inTransaction(); diff --git a/app/Models/EntryDAOSQLite.php b/app/Models/EntryDAOSQLite.php index 8b0f2d252..16d15f899 100644 --- a/app/Models/EntryDAOSQLite.php +++ b/app/Models/EntryDAOSQLite.php @@ -41,13 +41,13 @@ DROP TABLE IF EXISTS `tmp`; CREATE TEMP TABLE `tmp` AS SELECT id, guid, title, author, content, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags FROM `_entrytmp` - ORDER BY date; + ORDER BY date, id; INSERT OR IGNORE INTO `_entry` (id, guid, title, author, content, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags) SELECT rowid + (SELECT MAX(id) - COUNT(*) FROM `tmp`) AS id, guid, title, author, content, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags FROM `tmp` - ORDER BY date; + ORDER BY date, id; DELETE FROM `_entrytmp` WHERE id <= (SELECT MAX(id) FROM `tmp`); DROP TABLE IF EXISTS `tmp`; '; diff --git a/app/Models/Feed.php b/app/Models/Feed.php index 3425f4bce..0e02194ef 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -1,6 +1,28 @@ $filterActions */ private $filterActions = null; public function __construct(string $url, bool $validate = true) { @@ -47,6 +86,9 @@ class FreshRSS_Feed extends Minz_Model { } } + /** + * @return FreshRSS_Feed + */ public static function example() { $f = new FreshRSS_Feed('http://example.net/', false); $f->faviconPrepare(); @@ -71,6 +113,9 @@ class FreshRSS_Feed extends Minz_Model { public function selfUrl(): string { return $this->selfUrl; } + public function kind(): int { + return $this->kind; + } public function hubUrl(): string { return $this->hubUrl; } @@ -200,6 +245,9 @@ class FreshRSS_Feed extends Minz_Model { } $this->url = $value; } + public function _kind($value) { + $this->kind = $value; + } public function _category($value) { $value = intval($value); $this->category = $value >= 0 ? $value : 0; @@ -267,7 +315,7 @@ class FreshRSS_Feed extends Minz_Model { * @return SimplePie|null */ public function load(bool $loadDetails = false, bool $noCache = false) { - if ($this->url !== null) { + if ($this->url != '') { // @phpstan-ignore-next-line if (CACHE_PATH === false) { throw new Minz_FileNotExistException( @@ -347,6 +395,7 @@ class FreshRSS_Feed extends Minz_Model { $guids = []; $hasBadGuids = $this->attributes('hasBadGuids'); + // TODO: Replace very slow $simplePie->get_item($i) by getting all items at once for ($i = $simplePie->get_item_quantity() - 1; $i >= 0; $i--) { $item = $simplePie->get_item($i); if ($item == null) { @@ -375,6 +424,7 @@ class FreshRSS_Feed extends Minz_Model { $hasBadGuids = $this->attributes('hasBadGuids'); // We want chronological order and SimplePie uses reverse order. + // TODO: Replace very slow $simplePie->get_item($i) by getting all items at once for ($i = $simplePie->get_item_quantity() - 1; $i >= 0; $i--) { $item = $simplePie->get_item($i); if ($item == null) { @@ -428,15 +478,18 @@ class FreshRSS_Feed extends Minz_Model { } elseif ($medium === 'audio' || strpos($mime, 'audio') === 0) { $enclosureContent .= '

💾

'; } elseif ($medium === 'video' || strpos($mime, 'video') === 0) { $enclosureContent .= '

💾

'; } else { //e.g. application, text, unknown - $enclosureContent .= '

💾

'; + $enclosureContent .= '

💾

'; } $thumbnailContent = ''; @@ -489,6 +542,97 @@ class FreshRSS_Feed extends Minz_Model { } } + /** + * @param array $attributes + * @return SimplePie|null + */ + public function loadHtmlXpath(bool $loadDetails = false, bool $noCache = false, array $attributes = []) { + if ($this->url == '') { + return null; + } + $feedSourceUrl = htmlspecialchars_decode($this->url, ENT_QUOTES); + if ($this->httpAuth != '') { + $feedSourceUrl = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $feedSourceUrl); + } + + // Same naming conventions than https://github.com/RSS-Bridge/rss-bridge/wiki/XPathAbstract + // https://github.com/RSS-Bridge/rss-bridge/wiki/The-collectData-function + /** @var array */ + $xPathSettings = $this->attributes('xpath'); + $xPathFeedTitle = $xPathSettings['feedTitle'] ?? ''; + $xPathItem = $xPathSettings['item'] ?? ''; + $xPathItemTitle = $xPathSettings['itemTitle'] ?? ''; + $xPathItemContent = $xPathSettings['itemContent'] ?? ''; + $xPathItemUri = $xPathSettings['itemUri'] ?? ''; + $xPathItemAuthor = $xPathSettings['itemAuthor'] ?? ''; + $xPathItemTimestamp = $xPathSettings['itemTimestamp'] ?? ''; + $xPathItemThumbnail = $xPathSettings['itemThumbnail'] ?? ''; + $xPathItemCategories = $xPathSettings['itemCategories'] ?? ''; + if ($xPathItem == '') { + return null; + } + + $html = getHtml($feedSourceUrl, $attributes); + if (strlen($html) <= 0) { + return null; + } + + $view = new FreshRSS_View(); + $view->_path('index/rss.phtml'); + $view->internal_rendering = true; + $view->rss_url = $feedSourceUrl; + $view->entries = []; + + try { + $doc = new DOMDocument(); + $doc->recover = true; + $doc->strictErrorChecking = false; + $doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING); + $xpath = new DOMXPath($doc); + $view->rss_title = $xPathFeedTitle == '' ? '' : htmlspecialchars(@$xpath->evaluate('normalize-space(' . $xPathFeedTitle . ')'), ENT_COMPAT, 'UTF-8'); + $view->rss_base = htmlspecialchars(trim($xpath->evaluate('normalize-space(//base/@href)')), ENT_COMPAT, 'UTF-8'); + $nodes = $xpath->query($xPathItem); + if (empty($nodes)) { + return null; + } + + foreach ($nodes as $node) { + $item = []; + $item['title'] = $xPathItemTitle == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemTitle . ')', $node); + $item['content'] = $xPathItemContent == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemContent . ')', $node); + $item['link'] = $xPathItemUri == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemUri . ')', $node); + $item['author'] = $xPathItemAuthor == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemAuthor . ')', $node); + $item['timestamp'] = $xPathItemTimestamp == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemTimestamp . ')', $node); + $item['thumbnail'] = $xPathItemThumbnail == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemThumbnail . ')', $node); + if ($xPathItemCategories != '') { + $itemCategories = @$xpath->query($xPathItemCategories); + if ($itemCategories) { + foreach ($itemCategories as $itemCategory) { + $item['categories'][] = $itemCategory->textContent; + } + } + } + if ($item['title'] . $item['content'] . $item['link'] != '') { + $item['guid'] = 'urn:sha1:' . sha1($item['title'] . $item['content'] . $item['link']); + $item = Minz_Helper::htmlspecialchars_utf8($item); + $view->entries[] = FreshRSS_Entry::fromArray($item); + } + } + } catch (Exception $ex) { + Minz_Log::warning($ex->getMessage()); + return null; + } + + if (count($view->entries) < 1) { + return null; + } + + $simplePie = customSimplePie(); + $simplePie->set_raw_data($view->renderToString()); + $simplePie->init(); + return $simplePie; + } + /** * To keep track of some new potentially unread articles since last commit+fetch from database */ @@ -532,18 +676,23 @@ class FreshRSS_Feed extends Minz_Model { return false; } - protected function cacheFilename(): string { - $simplePie = customSimplePie($this->attributes()); - $filename = $simplePie->get_cache_filename($this->url); - return CACHE_PATH . '/' . $filename . '.spc'; + public static function cacheFilename(string $url, array $attributes, int $kind = FreshRSS_Feed::KIND_RSS): string { + $simplePie = customSimplePie($attributes); + $filename = $simplePie->get_cache_filename($url); + if ($kind == FreshRSS_Feed::KIND_HTML_XPATH) { + return CACHE_PATH . '/' . $filename . '.html'; + } else { + return CACHE_PATH . '/' . $filename . '.spc'; + } } public function clearCache(): bool { - return @unlink($this->cacheFilename()); + return @unlink(FreshRSS_Feed::cacheFilename($this->url, $this->attributes(), $this->kind)); } + /** @return int|false */ public function cacheModifiedTime() { - return @filemtime($this->cacheFilename()); + return @filemtime(FreshRSS_Feed::cacheFilename($this->url, $this->attributes(), $this->kind)); } public function lock(): bool { @@ -567,7 +716,7 @@ class FreshRSS_Feed extends Minz_Model { * @return array */ public function filterActions(): array { - if ($this->filterActions == null) { + if (empty($this->filterActions)) { $this->filterActions = array(); $filters = $this->attributes('filters'); if (is_array($filters)) { @@ -582,6 +731,9 @@ class FreshRSS_Feed extends Minz_Model { return $this->filterActions; } + /** + * @param array $filterActions + */ private function _filterActions($filterActions) { $this->filterActions = $filterActions; if (is_array($this->filterActions) && !empty($this->filterActions)) { diff --git a/app/Models/FeedDAO.php b/app/Models/FeedDAO.php index ab73b2ec2..c4a0b1429 100644 --- a/app/Models/FeedDAO.php +++ b/app/Models/FeedDAO.php @@ -5,7 +5,9 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable { protected function addColumn(string $name) { Minz_Log::warning(__method__ . ': ' . $name); try { - if ($name === 'attributes') { //v1.11.0 + if ($name === 'kind') { //v1.20.0 + return $this->pdo->exec('ALTER TABLE `_feed` ADD COLUMN kind SMALLINT DEFAULT 0') !== false; + } elseif ($name === 'attributes') { //v1.11.0 return $this->pdo->exec('ALTER TABLE `_feed` ADD COLUMN attributes TEXT') !== false; } } catch (Exception $e) { @@ -17,7 +19,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable { protected function autoUpdateDb(array $errorInfo) { if (isset($errorInfo[0])) { if ($errorInfo[0] === FreshRSS_DatabaseDAO::ER_BAD_FIELD_ERROR || $errorInfo[0] === FreshRSS_DatabaseDAOPGSQL::UNDEFINED_COLUMN) { - foreach (['attributes'] as $column) { + foreach (['attributes', 'kind'] as $column) { if (stripos($errorInfo[2], $column) !== false) { return $this->addColumn($column); } @@ -32,6 +34,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable { INSERT INTO `_feed` ( url, + kind, category, name, website, @@ -45,7 +48,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable { attributes ) VALUES - (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'; + (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'; $stm = $this->pdo->prepare($sql); $valuesTmp['url'] = safe_ascii($valuesTmp['url']); @@ -59,6 +62,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable { $values = array( substr($valuesTmp['url'], 0, 511), + $valuesTmp['kind'] ?? FreshRSS_Feed::KIND_RSS, $valuesTmp['category'], mb_strcut(trim($valuesTmp['name']), 0, FreshRSS_DatabaseDAO::LENGTH_INDEX_UNICODE, 'UTF-8'), substr($valuesTmp['website'], 0, 255), @@ -84,7 +88,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable { } } - public function addFeedObject($feed): int { + public function addFeedObject(FreshRSS_Feed $feed): int { // TODO: not sure if we should write this method in DAO since DAO // should not be aware about feed class @@ -94,6 +98,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable { $values = array( 'id' => $feed->id(), 'url' => $feed->url(), + 'kind' => $feed->kind(), 'category' => $feed->category(), 'name' => $feed->name(), 'website' => $feed->website(), @@ -252,7 +257,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable { public function selectAll() { $sql = <<<'SQL' -SELECT id, url, category, name, website, description, `lastUpdate`, +SELECT id, url, kind, category, name, website, description, `lastUpdate`, priority, `pathEntries`, `httpAuth`, error, ttl, attributes FROM `_feed` SQL; @@ -346,7 +351,7 @@ SQL; */ public function listFeedsOrderUpdate(int $defaultCacheDuration = 3600, int $limit = 0) { $this->updateTTL(); - $sql = 'SELECT id, url, name, website, `lastUpdate`, `pathEntries`, `httpAuth`, ttl, attributes ' + $sql = 'SELECT id, url, kind, name, website, `lastUpdate`, `pathEntries`, `httpAuth`, ttl, attributes ' . 'FROM `_feed` ' . ($defaultCacheDuration < 0 ? '' : 'WHERE ttl >= ' . FreshRSS_Feed::TTL_DEFAULT . ' AND `lastUpdate` < (' . (time() + 60) @@ -557,20 +562,21 @@ SQL; $category = $catID; } - $myFeed = new FreshRSS_Feed(isset($dao['url']) ? $dao['url'] : '', false); + $myFeed = new FreshRSS_Feed($dao['url'] ?? '', false); + $myFeed->_kind($dao['kind'] ?? FreshRSS_Feed::KIND_RSS); $myFeed->_category($category); $myFeed->_name($dao['name']); - $myFeed->_website(isset($dao['website']) ? $dao['website'] : '', false); - $myFeed->_description(isset($dao['description']) ? $dao['description'] : ''); - $myFeed->_lastUpdate(isset($dao['lastUpdate']) ? $dao['lastUpdate'] : 0); - $myFeed->_priority(isset($dao['priority']) ? $dao['priority'] : 10); - $myFeed->_pathEntries(isset($dao['pathEntries']) ? $dao['pathEntries'] : ''); - $myFeed->_httpAuth(isset($dao['httpAuth']) ? base64_decode($dao['httpAuth']) : ''); - $myFeed->_error(isset($dao['error']) ? $dao['error'] : 0); - $myFeed->_ttl(isset($dao['ttl']) ? $dao['ttl'] : FreshRSS_Feed::TTL_DEFAULT); - $myFeed->_attributes('', isset($dao['attributes']) ? $dao['attributes'] : ''); - $myFeed->_nbNotRead(isset($dao['cache_nbUnreads']) ? $dao['cache_nbUnreads'] : 0); - $myFeed->_nbEntries(isset($dao['cache_nbEntries']) ? $dao['cache_nbEntries'] : 0); + $myFeed->_website($dao['website'] ?? '', false); + $myFeed->_description($dao['description'] ?? ''); + $myFeed->_lastUpdate($dao['lastUpdate'] ?? 0); + $myFeed->_priority($dao['priority'] ?? 10); + $myFeed->_pathEntries($dao['pathEntries'] ?? ''); + $myFeed->_httpAuth(base64_decode($dao['httpAuth'] ?? '')); + $myFeed->_error($dao['error'] ?? 0); + $myFeed->_ttl($dao['ttl'] ?? FreshRSS_Feed::TTL_DEFAULT); + $myFeed->_attributes('', $dao['attributes'] ?? ''); + $myFeed->_nbNotRead($dao['cache_nbUnreads'] ?? 0); + $myFeed->_nbEntries($dao['cache_nbEntries'] ?? 0); if (isset($dao['id'])) { $myFeed->_id($dao['id']); } diff --git a/app/Models/FeedDAOSQLite.php b/app/Models/FeedDAOSQLite.php index 54146858b..a4432ea62 100644 --- a/app/Models/FeedDAOSQLite.php +++ b/app/Models/FeedDAOSQLite.php @@ -5,7 +5,7 @@ class FreshRSS_FeedDAOSQLite extends FreshRSS_FeedDAO { protected function autoUpdateDb(array $errorInfo) { if ($tableInfo = $this->pdo->query("PRAGMA table_info('feed')")) { $columns = $tableInfo->fetchAll(PDO::FETCH_COLUMN, 1); - foreach (['attributes'] as $column) { + foreach (['attributes', 'kind'] as $column) { if (!in_array($column, $columns)) { return $this->addColumn($column); } diff --git a/app/Models/View.php b/app/Models/View.php index e3a591155..365bfd261 100644 --- a/app/Models/View.php +++ b/app/Models/View.php @@ -7,12 +7,19 @@ class FreshRSS_View extends Minz_View { public $callbackBeforeFeeds; public $callbackBeforePagination; public $categories; + /** @var FreshRSS_Category|null */ public $category; + /** @var string */ public $current_user; + /** @var array */ public $entries; + /** @var FreshRSS_Entry */ public $entry; + /** @var FreshRSS_Feed|null */ public $feed; + /** @var array */ public $feeds; + /** @var int */ public $nbUnreadTags; public $tags; @@ -88,8 +95,14 @@ class FreshRSS_View extends Minz_View { public $nbPage; // RSS view - public $rss_title; - public $url; + /** @var string */ + public $rss_title = ''; + /** @var string */ + public $rss_url = ''; + /** @var string */ + public $rss_base = ''; + /** @var boolean */ + public $internal_rendering = false; // Content preview public $fatalError; diff --git a/app/SQL/install.sql.mysql.php b/app/SQL/install.sql.mysql.php index 1fed64fda..c52b58f65 100644 --- a/app/SQL/install.sql.mysql.php +++ b/app/SQL/install.sql.mysql.php @@ -16,6 +16,7 @@ ENGINE = INNODB; CREATE TABLE IF NOT EXISTS `_feed` ( `id` SMALLINT NOT NULL AUTO_INCREMENT, -- v0.7 `url` VARCHAR(511) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `kind` SMALLINT DEFAULT 0, -- 0.20.0 `category` SMALLINT DEFAULT 0, -- v0.7 `name` VARCHAR(191) NOT NULL, `website` VARCHAR(255) CHARACTER SET latin1 COLLATE latin1_bin, diff --git a/app/SQL/install.sql.pgsql.php b/app/SQL/install.sql.pgsql.php index 5b810deff..0a8298d29 100644 --- a/app/SQL/install.sql.pgsql.php +++ b/app/SQL/install.sql.pgsql.php @@ -13,6 +13,7 @@ CREATE TABLE IF NOT EXISTS `_category` ( CREATE TABLE IF NOT EXISTS `_feed` ( "id" SERIAL PRIMARY KEY, "url" VARCHAR(511) UNIQUE NOT NULL, + "kind" SMALLINT DEFAULT 0, -- 0.20.0 "category" SMALLINT DEFAULT 0, "name" VARCHAR(255) NOT NULL, "website" VARCHAR(255), diff --git a/app/SQL/install.sql.sqlite.php b/app/SQL/install.sql.sqlite.php index 74def4d98..44bf6fb33 100644 --- a/app/SQL/install.sql.sqlite.php +++ b/app/SQL/install.sql.sqlite.php @@ -14,6 +14,7 @@ CREATE TABLE IF NOT EXISTS `category` ( CREATE TABLE IF NOT EXISTS `feed` ( `id` INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, `url` VARCHAR(511) NOT NULL, + `kind` SMALLINT DEFAULT 0, -- 0.20.0 `category` SMALLINT DEFAULT 0, `name` VARCHAR(255) NOT NULL, `website` VARCHAR(255), diff --git a/app/i18n/cz/sub.php b/app/i18n/cz/sub.php index 8b896586d..2eff49030 100644 --- a/app/i18n/cz/sub.php +++ b/app/i18n/cz/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Informace', 'keep_min' => 'Minimální počet článků pro ponechání', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Vymazat mezipaměť', 'clear_cache_help' => 'Vymazat mezipaměť pro tento kanál.', diff --git a/app/i18n/de/sub.php b/app/i18n/de/sub.php index 2b52b59ee..52323221d 100644 --- a/app/i18n/de/sub.php +++ b/app/i18n/de/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Information', // IGNORE 'keep_min' => 'Minimale Anzahl an Artikeln, die behalten wird', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Zwischenspeicher leeren', 'clear_cache_help' => 'Zwischenspeicher für diesen Feed leeren.', diff --git a/app/i18n/en-us/sub.php b/app/i18n/en-us/sub.php index fc1c8358e..41b8c377b 100644 --- a/app/i18n/en-us/sub.php +++ b/app/i18n/en-us/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Information', // IGNORE 'keep_min' => 'Minimum number of articles to keep', // IGNORE + 'kind' => array( + '_' => 'Type of feed source', // IGNORE + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // IGNORE + 'feed_title' => array( + '_' => 'feed title', // IGNORE + 'help' => 'Example: //title or a static string: "My custom feed"', // IGNORE + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // IGNORE + 'item' => array( + '_' => 'finding news items
(most important)', // IGNORE + 'help' => 'Example: //div[@class="news-item"]', // IGNORE + ), + 'item_author' => array( + '_' => 'item author', // IGNORE + 'help' => 'Can also be a static string. Example: "Anonymous"', // IGNORE + ), + 'item_categories' => 'items tags', // IGNORE + 'item_content' => array( + '_' => 'item content', // IGNORE + 'help' => 'Example to take the full item: .', // IGNORE + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // IGNORE + 'help' => 'Example: descendant::img/@src', // IGNORE + ), + 'item_timestamp' => array( + '_' => 'item date', // IGNORE + 'help' => 'The result will be parsed by strtotime()', // IGNORE + ), + 'item_title' => array( + '_' => 'item title', // IGNORE + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // IGNORE + ), + 'item_uri' => array( + '_' => 'item link (URL)', // IGNORE + 'help' => 'Example: descendant::a/@href', // IGNORE + ), + 'relative' => 'XPath (relative to item) for:', // IGNORE + 'xpath' => 'XPath for:', // IGNORE + ), + 'rss' => 'RSS / Atom (default)', // IGNORE + ), 'maintenance' => array( 'clear_cache' => 'Clear cache', // IGNORE 'clear_cache_help' => 'Clear the cache for this feed.', // IGNORE diff --git a/app/i18n/en/sub.php b/app/i18n/en/sub.php index 2548916cf..902deb1b5 100644 --- a/app/i18n/en/sub.php +++ b/app/i18n/en/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Information', 'keep_min' => 'Minimum number of articles to keep', + 'kind' => array( + '_' => 'Type of feed source', + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', + 'feed_title' => array( + '_' => 'feed title', + 'help' => 'Example: //title or a static string: "My custom feed"', + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', + 'item' => array( + '_' => 'finding news items
(most important)', + 'help' => 'Example: //div[@class="news-item"]', + ), + 'item_author' => array( + '_' => 'item author', + 'help' => 'Can also be a static string. Example: "Anonymous"', + ), + 'item_categories' => 'items tags', + 'item_content' => array( + '_' => 'item content', + 'help' => 'Example to take the full item: .', + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', + 'help' => 'Example: descendant::img/@src', + ), + 'item_timestamp' => array( + '_' => 'item date', + 'help' => 'The result will be parsed by strtotime()', + ), + 'item_title' => array( + '_' => 'item title', + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', + ), + 'item_uri' => array( + '_' => 'item link (URL)', + 'help' => 'Example: descendant::a/@href', + ), + 'relative' => 'XPath (relative to item) for:', + 'xpath' => 'XPath for:', + ), + 'rss' => 'RSS / Atom (default)', + ), 'maintenance' => array( 'clear_cache' => 'Clear cache', 'clear_cache_help' => 'Clear the cache for this feed.', diff --git a/app/i18n/es/sub.php b/app/i18n/es/sub.php index ce29e369e..f55e0cbbb 100755 --- a/app/i18n/es/sub.php +++ b/app/i18n/es/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Información', 'keep_min' => 'Número mínimo de artículos a conservar', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Borrar caché', 'clear_cache_help' => 'Borrar la memoria caché de esta fuente.', diff --git a/app/i18n/fr/admin.php b/app/i18n/fr/admin.php index 706fa984d..4a628e2fe 100644 --- a/app/i18n/fr/admin.php +++ b/app/i18n/fr/admin.php @@ -72,8 +72,8 @@ return array( ), 'files' => 'Installation des fichiers', 'json' => array( - 'nok' => 'Vous ne disposez pas de l’extension recommendée JSON (paquet php-json).', - 'ok' => 'Vous disposez de l’extension recommendée JSON.', + 'nok' => 'Vous ne disposez pas de l’extension recommandée JSON (paquet php-json).', + 'ok' => 'Vous disposez de l’extension recommandée JSON.', ), 'mbstring' => array( 'nok' => 'Impossible de trouver la librairie recommandée mbstring pour Unicode.', @@ -199,7 +199,7 @@ return array( 'back_to_manage' => '← Revenir à la liste des utilisateurs', 'create' => 'Créer un nouvel utilisateur', 'database_size' => 'Volumétrie', - 'email' => 'Adresse email', + 'email' => 'adresse électronique', 'enabled' => 'Actif', 'feed_count' => 'Flux', 'is_admin' => 'Admin', diff --git a/app/i18n/fr/conf.php b/app/i18n/fr/conf.php index 4ebf7895b..8d097fa59 100644 --- a/app/i18n/fr/conf.php +++ b/app/i18n/fr/conf.php @@ -73,7 +73,7 @@ return array( '_' => 'Suppression du compte', 'warn' => 'Le compte et toutes les données associées vont être supprimées.', ), - 'email' => 'Adresse email', + 'email' => 'adresse électronique', 'password_api' => 'Mot de passe API
(ex. : pour applis mobiles)', 'password_form' => 'Mot de passe
(pour connexion par formulaire)', 'password_format' => '7 caractères minimum', @@ -185,7 +185,7 @@ return array( 'email' => 'Courriel', 'facebook' => 'Facebook', // IGNORE 'more_information' => 'Plus d’informations', - 'print' => 'Print', // IGNORE + 'print' => 'Imprimer', 'raindrop' => 'Raindrop.io', // IGNORE 'remove' => 'Supprimer la méthode de partage', 'shaarli' => 'Shaarli', // IGNORE diff --git a/app/i18n/fr/install.php b/app/i18n/fr/install.php index b9157ff53..d27fa6049 100644 --- a/app/i18n/fr/install.php +++ b/app/i18n/fr/install.php @@ -71,8 +71,8 @@ return array( 'ok' => 'Vous disposez de fileinfo.', ), 'json' => array( - 'nok' => 'Vous ne disposez pas de l’extension recommendée JSON (paquet php-json).', - 'ok' => 'Vous disposez de l’extension recommendée JSON.', + 'nok' => 'Vous ne disposez pas de l’extension recommandée JSON (paquet php-json).', + 'ok' => 'Vous disposez de l’extension recommandée JSON.', ), 'mbstring' => array( 'nok' => 'Impossible de trouver la librairie recommandée mbstring pour Unicode.', @@ -124,7 +124,7 @@ return array( 'missing_applied_migrations' => 'Quelque chose s’est mal passé, vous devriez créer le fichier %s à la main.', 'ok' => 'L’installation s’est bien passée.', 'session' => array( - 'nok' => 'Le serveur Web semble mal configué pour les cookies nécessaires aux sessions PHP!', + 'nok' => 'Le serveur Web semble mal configuré pour les cookies nécessaires aux sessions PHP!', ), 'step' => 'étape %d', 'steps' => 'Étapes', diff --git a/app/i18n/fr/sub.php b/app/i18n/fr/sub.php index 710d75918..c8528504a 100644 --- a/app/i18n/fr/sub.php +++ b/app/i18n/fr/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Informations', 'keep_min' => 'Nombre minimum d’articles à conserver', + 'kind' => array( + '_' => 'Type de source de flux', + 'html_xpath' => array( + '_' => 'HTML + XPath (Moissonnage du Web)', + 'feed_title' => array( + '_' => 'titre de flux', + 'help' => 'Exemple : //title ou un text statique : "Mon flux personnalisé"', + ), + 'help' => 'XPath 1.0 est un langage de requête pour les utilisateurs avancés, supporté par FreshRSS pour le moissonnage du Web (Web scraping).', + 'item' => array( + '_' => 'trouver les articles', + 'help' => 'Exemple : //div[@class="article"]', + ), + 'item_author' => array( + '_' => 'auteur de l’article', + 'help' => 'Peut aussi être une chaîne de texte statique. Exemple : "Anonyme"', + ), + 'item_categories' => 'catégories (tags) de l’article', + 'item_content' => array( + '_' => 'contenu de l’article', + 'help' => 'Exemple pour prendre l’article complet : .', + ), + 'item_thumbnail' => array( + '_' => 'miniature de l’article', + 'help' => 'Exemple : descendant::img/@src', + ), + 'item_timestamp' => array( + '_' => 'date de l’article', + 'help' => 'Le résultat sera passé à la fonction strtotime()', + ), + 'item_title' => array( + '_' => 'titre de l’article', + 'help' => 'Utiliser en particulier l’axe XPath descendant:: comme descendant::h2', + ), + 'item_uri' => array( + '_' => 'lien (URL) de l’article', + 'help' => 'Exemple : descendant::a/@href', + ), + 'relative' => 'XPath (relatif à l’article) pour :', + 'xpath' => 'XPath pour :', + ), + 'rss' => 'RSS / Atom (par défaut)', + ), 'maintenance' => array( 'clear_cache' => 'Vider le cache', 'clear_cache_help' => 'Supprime le cache de ce flux.', @@ -100,7 +143,7 @@ return array( 'ttl' => 'Ne pas automatiquement rafraîchir plus souvent que', 'url' => 'URL du flux', 'useragent' => 'Sélectionner l’agent utilisateur pour télécharger ce flux', - 'useragent_help' => 'Exemple: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0)', + 'useragent_help' => 'Exemple : Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0)', 'validator' => 'Vérifier la validité du flux', 'website' => 'URL du site', 'websub' => 'Notification instantanée par WebSub', diff --git a/app/i18n/fr/user.php b/app/i18n/fr/user.php index e51c2910b..dabc5fab6 100644 --- a/app/i18n/fr/user.php +++ b/app/i18n/fr/user.php @@ -13,28 +13,28 @@ return array( 'email' => array( 'feedback' => array( - 'invalid' => 'L’adresse email est invalide.', - 'required' => 'L’adresse email est requise.', + 'invalid' => 'L’adresse électronique est invalide.', + 'required' => 'L’adresse électronique est requise.', ), 'validation' => array( - 'change_email' => 'Vous pouvez changer votre adresse email dans votre profil.', + 'change_email' => 'Vous pouvez changer votre adresse électronique dans votre profil.', 'email_sent_to' => 'Nous venons d’envoyer un email à %s, veuillez suivre ses indications pour valider votre adresse.', 'feedback' => array( 'email_failed' => 'Nous n’avons pas pu vous envoyer d’email à cause d’une mauvaise configuration du serveur.', 'email_sent' => 'Un email a été envoyé à votre adresse.', - 'error' => 'L’adresse email n’a pas pu être validée.', - 'ok' => 'L’adresse email a été validée.', - 'unnecessary' => 'L’adresse email a déjà été validée.', - 'wrong_token' => 'L’adresse email n’a pas pu être validée à cause d’un mauvais token.', + 'error' => 'L’adresse électronique n’a pas pu être validée.', + 'ok' => 'L’adresse électronique a été validée.', + 'unnecessary' => 'L’adresse électronique a déjà été validée.', + 'wrong_token' => 'L’adresse électronique n’a pas pu être validée à cause d’un mauvais token.', ), - 'need_to' => 'Vous devez valider votre adresse email avant de pouvoir utiliser %s.', + 'need_to' => 'Vous devez valider votre adresse électronique avant de pouvoir utiliser %s.', 'resend_email' => 'Renvoyer l’email', - 'title' => 'Validation de l’adresse email', + 'title' => 'Validation de l’adresse électronique', ), ), 'mailer' => array( 'email_need_validation' => array( - 'body' => 'Vous venez de vous inscrire sur %s mais vous devez encore valider votre adresse email. Pour cela, veuillez cliquer sur ce lien :', + 'body' => 'Vous venez de vous inscrire sur %s mais vous devez encore valider votre adresse électronique. Pour cela, veuillez cliquer sur ce lien :', 'title' => 'Vous devez valider votre compte', 'welcome' => 'Bienvenue %s,', ), diff --git a/app/i18n/he/sub.php b/app/i18n/he/sub.php index 1f4dc019f..6068a63c0 100644 --- a/app/i18n/he/sub.php +++ b/app/i18n/he/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'מידע', 'keep_min' => 'מסםר מינימלי של מאמרים לשמור', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Clear cache', // TODO 'clear_cache_help' => 'Clear the cache for this feed.', // TODO diff --git a/app/i18n/it/sub.php b/app/i18n/it/sub.php index ac5080ffb..cab35180e 100644 --- a/app/i18n/it/sub.php +++ b/app/i18n/it/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Informazioni', 'keep_min' => 'Numero minimo di articoli da mantenere', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Clear cache', // TODO 'clear_cache_help' => 'Clear the cache for this feed.', // TODO diff --git a/app/i18n/ja/sub.php b/app/i18n/ja/sub.php index 4b68e46fd..ba7fa23b1 100644 --- a/app/i18n/ja/sub.php +++ b/app/i18n/ja/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'インフォメーション', 'keep_min' => '最小数の記事は保持されます', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'キャッシュのクリア', 'clear_cache_help' => 'このフィードのキャッシュをクリアします。', diff --git a/app/i18n/ko/sub.php b/app/i18n/ko/sub.php index 27b1f8bfa..ff9af8c39 100644 --- a/app/i18n/ko/sub.php +++ b/app/i18n/ko/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => '정보', 'keep_min' => '최소 유지 글 개수', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => '캐쉬 지우기', 'clear_cache_help' => '이 피드의 캐쉬 지우기.', diff --git a/app/i18n/nl/sub.php b/app/i18n/nl/sub.php index 611e97497..b8439f0b5 100644 --- a/app/i18n/nl/sub.php +++ b/app/i18n/nl/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Informatie', 'keep_min' => 'Minimum aantal artikelen om te houden', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Cache leegmaken', 'clear_cache_help' => 'Cache voor deze feed leegmaken.', diff --git a/app/i18n/oc/sub.php b/app/i18n/oc/sub.php index fe4b38776..5cc7c792a 100644 --- a/app/i18n/oc/sub.php +++ b/app/i18n/oc/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Informacions', 'keep_min' => 'Nombre minimum d’articles de servar', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Escafar lo cache', 'clear_cache_help' => 'Escafar lo cache d’aqueste flux sul disc', diff --git a/app/i18n/pl/sub.php b/app/i18n/pl/sub.php index 3c2f7b815..204d9ffef 100644 --- a/app/i18n/pl/sub.php +++ b/app/i18n/pl/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Informacja', 'keep_min' => 'Minimalna liczba wiadomości do do przechowywania', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Wyczyść pamięć podręczną', 'clear_cache_help' => 'Czyści pamięć podręczną tego kanału.', diff --git a/app/i18n/pt-br/sub.php b/app/i18n/pt-br/sub.php index bc512e867..25d76ad9f 100644 --- a/app/i18n/pt-br/sub.php +++ b/app/i18n/pt-br/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Informações', 'keep_min' => 'Número mínimo de artigos para manter', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Limpar o cache', 'clear_cache_help' => 'Limpar o cache em disco deste feed', diff --git a/app/i18n/ru/sub.php b/app/i18n/ru/sub.php index e11404674..1be761ab6 100644 --- a/app/i18n/ru/sub.php +++ b/app/i18n/ru/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Информация', 'keep_min' => 'Оставлять статей не менее', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Очистить кэш', 'clear_cache_help' => 'Очистить кэш для этой ленты.', diff --git a/app/i18n/sk/sub.php b/app/i18n/sk/sub.php index 3da71a24c..ef6e037fb 100644 --- a/app/i18n/sk/sub.php +++ b/app/i18n/sk/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Informácia', 'keep_min' => 'Minimálny počet článkov na uchovanie', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Vymazať vyrovnáciu pamäť', 'clear_cache_help' => 'Vymazať vyrovnáciu pamäť pre tento kanál.', diff --git a/app/i18n/tr/sub.php b/app/i18n/tr/sub.php index 4704b401c..e9f58f895 100644 --- a/app/i18n/tr/sub.php +++ b/app/i18n/tr/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => 'Bilgi', 'keep_min' => 'En az tutulacak makale sayısı', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => 'Önbelleği temizle', 'clear_cache_help' => 'Bu akışın önbelleğini temizler.', diff --git a/app/i18n/zh-cn/sub.php b/app/i18n/zh-cn/sub.php index 3fcdbf5c8..d45ba91eb 100644 --- a/app/i18n/zh-cn/sub.php +++ b/app/i18n/zh-cn/sub.php @@ -61,6 +61,49 @@ return array( ), 'information' => '信息', 'keep_min' => '至少保存的文章数', + 'kind' => array( + '_' => 'Type of feed source', // TODO + 'html_xpath' => array( + '_' => 'HTML + XPath (Web scraping)', // TODO + 'feed_title' => array( + '_' => 'feed title', // TODO + 'help' => 'Example: //title or a static string: "My custom feed"', // TODO + ), + 'help' => 'XPath 1.0 is a standard query language for advanced users, and which FreshRSS supports to enable Web scraping.', // TODO + 'item' => array( + '_' => 'finding news items
(most important)', // TODO + 'help' => 'Example: //div[@class="news-item"]', // TODO + ), + 'item_author' => array( + '_' => 'item author', // TODO + 'help' => 'Can also be a static string. Example: "Anonymous"', // TODO + ), + 'item_categories' => 'items tags', // TODO + 'item_content' => array( + '_' => 'item content', // TODO + 'help' => 'Example to take the full item: .', // TODO + ), + 'item_thumbnail' => array( + '_' => 'item thumbnail', // TODO + 'help' => 'Example: descendant::img/@src', // TODO + ), + 'item_timestamp' => array( + '_' => 'item date', // TODO + 'help' => 'The result will be parsed by strtotime()', // TODO + ), + 'item_title' => array( + '_' => 'item title', // TODO + 'help' => 'Use in particular the XPath axis descendant:: like descendant::h2', // TODO + ), + 'item_uri' => array( + '_' => 'item link (URL)', // TODO + 'help' => 'Example: descendant::a/@href', // TODO + ), + 'relative' => 'XPath (relative to item) for:', // TODO + 'xpath' => 'XPath for:', // TODO + ), + 'rss' => 'RSS / Atom (default)', // TODO + ), 'maintenance' => array( 'clear_cache' => '清理缓存', 'clear_cache_help' => '清除该feed的缓存', diff --git a/app/layout/layout.phtml b/app/layout/layout.phtml index cb9b6c1ba..1d41cc690 100644 --- a/app/layout/layout.phtml +++ b/app/layout/layout.phtml @@ -31,7 +31,7 @@ if (_t('gen.dir') === 'rtl') { rss_title)) { + if ($this->rss_title != '') { $url_rss = $url_base; $url_rss['a'] = 'rss'; if (FreshRSS_Context::$user_conf->since_hours_posts_per_rss) { diff --git a/app/views/helpers/export/articles.phtml b/app/views/helpers/export/articles.phtml index c131b8474..ad5210968 100644 --- a/app/views/helpers/export/articles.phtml +++ b/app/views/helpers/export/articles.phtml @@ -22,7 +22,7 @@ foreach ($this->entriesRaw as $entryRaw) { if ($entryRaw == null) { continue; } - $entry = FreshRSS_EntryDAO::daoToEntry($entryRaw); + $entry = FreshRSS_Entry::fromArray($entryRaw); if (!isset($this->feed)) { $feed = FreshRSS_CategoryDAO::findFeed($this->categories, $entry->feed()); if ($feed === null) { diff --git a/app/views/helpers/feed/update.phtml b/app/views/helpers/feed/update.phtml index 264881f77..f71be5135 100644 --- a/app/views/helpers/feed/update.phtml +++ b/app/views/helpers/feed/update.phtml @@ -373,6 +373,110 @@ + +
+ +
+ +
+
+ +
+ feed->attributes('xpath')); + ?> +

+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +
+
+
+
+
+ + +
+
+
diff --git a/app/views/index/normal.phtml b/app/views/index/normal.phtml index 5dde2a171..06323dcb0 100644 --- a/app/views/index/normal.phtml +++ b/app/views/index/normal.phtml @@ -21,14 +21,17 @@ $today = @strtotime('today');
entries as $item): $lastEntry = $item; $nbEntries++; ob_flush(); - $this->entry = Minz_ExtensionManager::callHook('entry_before_display', $item); - if ($this->entry == null) { + /** @var FreshRSS_Entry */ + $item = Minz_ExtensionManager::callHook('entry_before_display', $item); + if ($item == null) { continue; } + $this->entry = $item; // We most likely already have the feed object in cache $this->feed = FreshRSS_CategoryDAO::findFeed($this->categories, $this->entry->feed()); diff --git a/app/views/index/reader.phtml b/app/views/index/reader.phtml index e4fb74708..b408e3480 100644 --- a/app/views/index/reader.phtml +++ b/app/views/index/reader.phtml @@ -15,10 +15,12 @@ $content_width = FreshRSS_Context::$user_conf->content_width; entries as $item): $lastEntry = $item; $nbEntries++; ob_flush(); + /** @var FreshRSS_Entry */ $item = Minz_ExtensionManager::callHook('entry_before_display', $item); if ($item == null) { continue; diff --git a/app/views/index/rss.phtml b/app/views/index/rss.phtml index eedb31fa4..0b07a02f3 100755 --- a/app/views/index/rss.phtml +++ b/app/views/index/rss.phtml @@ -1,15 +1,26 @@ '; ?> - +rss_base == '' ? '' : ' xml:base="' . $this->rss_base . '"' ?> +> <?= $this->rss_title ?> - + internal_rendering ? $this->rss_url : Minz_Url::display('', 'html', true) ?> rss_title) ?> GMT - + entries as $item) { + if (!$this->internal_rendering) { + /** @var FreshRSS_Entry */ + $item = Minz_ExtensionManager::callHook('entry_before_display', $item); + if ($item == null) { + continue; + } + } ?> <?= $item->title() ?> @@ -27,12 +38,23 @@ foreach ($this->entries as $item) { echo "\t\t\t" , '', $category, '', "\n"; } } + $enclosures = $item->enclosures(false); + if (is_array($enclosures)) { + foreach ($enclosures as $enclosure) { + // https://www.rssboard.org/media-rss + echo "\t\t\t" , '', "\n"; + } + } ?> content(); ?>]]> date(true)) ?> - id() ?> + id() > 0 ? $item->id() : $item->guid() ?> diff --git a/app/views/subscription/add.phtml b/app/views/subscription/add.phtml index 380f5434f..344e25ade 100644 --- a/app/views/subscription/add.phtml +++ b/app/views/subscription/add.phtml @@ -51,6 +51,97 @@ +
+ + + + +
+ +
+ +
+
+ +
+

+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +
+
+
+
+
diff --git a/data/cache/.gitignore b/data/cache/.gitignore index 0307e6493..6c43765c7 100644 --- a/data/cache/.gitignore +++ b/data/cache/.gitignore @@ -1 +1,3 @@ -*.spc \ No newline at end of file +*.spc +*.html +!index.html diff --git a/lib/Minz/Url.php b/lib/Minz/Url.php index be3184b40..40cadb49a 100644 --- a/lib/Minz/Url.php +++ b/lib/Minz/Url.php @@ -121,7 +121,8 @@ class Minz_Url { /** * @param string $controller * @param string $action - * @param string ...$args + * @param string|int ...$args + * @return string|false */ function _url ($controller, $action, ...$args) { $nb_args = count($args); @@ -132,8 +133,8 @@ function _url ($controller, $action, ...$args) { $params = array (); for ($i = 0; $i < $nb_args; $i += 2) { - $arg = $args[$i]; - $params[$arg] = $args[$i + 1]; + $arg = '' . $args[$i]; + $params[$arg] = '' . $args[$i + 1]; } return Minz_Url::display (array ('c' => $controller, 'a' => $action, 'params' => $params)); diff --git a/lib/Minz/View.php b/lib/Minz/View.php index 431a8b700..6cf811bff 100644 --- a/lib/Minz/View.php +++ b/lib/Minz/View.php @@ -112,6 +112,12 @@ class Minz_View { } } + public function renderToString(): string { + ob_start(); + $this->render(); + return ob_get_clean(); + } + /** * Ajoute un élément du layout * @param string $part l'élément partial à ajouter diff --git a/lib/SimplePie/SimplePie.php b/lib/SimplePie/SimplePie.php index b0e973e83..bf4a66bb4 100644 --- a/lib/SimplePie/SimplePie.php +++ b/lib/SimplePie/SimplePie.php @@ -2275,7 +2275,7 @@ class SimplePie */ public function get_base($element = array()) { - if (!($this->get_type() & SIMPLEPIE_TYPE_RSS_SYNDICATION) && !empty($element['xml_base_explicit']) && isset($element['xml_base'])) + if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) { return $element['xml_base']; } diff --git a/lib/lib_phpQuery.php b/lib/lib_phpQuery.php index 411aa120c..1fabfcb6d 100644 --- a/lib/lib_phpQuery.php +++ b/lib/lib_phpQuery.php @@ -436,7 +436,8 @@ class DOMDocumentWrapper { } protected function isXML($markup) { // return strpos($markup, 'set_cache_name_function('sha1'); $simplePie->set_cache_location(CACHE_PATH); $simplePie->set_cache_duration($limits['cache_duration']); + $simplePie->enable_order_by_date(false); $feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']); $simplePie->set_timeout($feed_timeout > 0 ? $feed_timeout : $limits['timeout']); @@ -290,7 +291,10 @@ function customSimplePie($attributes = array()): SimplePie { return $simplePie; } -function sanitizeHTML($data, $base = '', $maxLength = false) { +/** + * @param int|false $maxLength + */ +function sanitizeHTML($data, string $base = '', $maxLength = false) { if (!is_string($data) || ($maxLength !== false && $maxLength <= 0)) { return ''; } @@ -311,6 +315,127 @@ function sanitizeHTML($data, $base = '', $maxLength = false) { return $result; } +function cleanCache(int $hours = 720) { + $files = glob(CACHE_PATH . '/*.{html,spc}', GLOB_BRACE | GLOB_NOSORT); + foreach ($files as $file) { + if (substr($file, -10) === 'index.html') { + continue; + } + $cacheMtime = @filemtime($file); + if ($cacheMtime !== false && $cacheMtime < time() - (3600 * $hours)) { + unlink($file); + } + } +} + +/** + * Set an XML preamble to enforce the HTML content type charset received by HTTP. + * @param string $html the row downloaded HTML content + * @param string $contentType an HTTP Content-Type such as 'text/html; charset=utf-8' + * @return string an HTML string with XML encoding information for DOMDocument::loadHTML() + */ +function enforceHttpEncoding(string $html, string $contentType = ''): string { + $httpCharset = preg_match('/\bcharset=([0-9a-z_-]{2,12})$/i', $contentType, $matches) === false ? '' : $matches[1] ?? ''; + if ($httpCharset == '') { + // No charset defined by HTTP, do nothing + return $html; + } + $httpCharsetNormalized = SimplePie_Misc::encoding($httpCharset); + if ($httpCharsetNormalized === 'windows-1252') { + // Default charset for HTTP, do nothing + return $html; + } + if (substr($html, 0, 3) === "\xEF\xBB\xBF" || // UTF-8 BOM + substr($html, 0, 2) === "\xFF\xFE" || // UTF-16 Little Endian BOM + substr($html, 0, 2) === "\xFE\xFF" || // UTF-16 Big Endian BOM + substr($html, 0, 4) === "\xFF\xFE\x00\x00" || // UTF-32 Little Endian BOM + substr($html, 0, 4) === "\x00\x00\xFE\xFF") { // UTF-32 Big Endian BOM + // Existing byte order mark, do nothing + return $html; + } + if (preg_match('/^<[?]xml[^>]+encoding\b/', substr($html, 0, 64))) { + // Existing XML declaration, do nothing + return $html; + } + return '<' . '?xml version="1.0" encoding="' . $httpCharsetNormalized . '" ?' . ">\n" . $html; +} + +/** + * @param array $attributes + */ +function getHtml(string $url, array $attributes = []): string { + $limits = FreshRSS_Context::$system_conf->limits; + $feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']); + + $cachePath = FreshRSS_Feed::cacheFilename($url, $attributes, FreshRSS_Feed::KIND_HTML_XPATH); + $cacheMtime = @filemtime($cachePath); + if ($cacheMtime !== false && $cacheMtime > time() - intval($limits['cache_duration'])) { + $html = @file_get_contents($cachePath); + if ($html != '') { + syslog(LOG_DEBUG, 'FreshRSS uses cache for ' . SimplePie_Misc::url_remove_credentials($url)); + return $html; + } + } + + if (mt_rand(0, 30) === 1) { // Remove old entries once in a while + cleanCache(); + } + + if (FreshRSS_Context::$system_conf->simplepie_syslog_enabled) { + syslog(LOG_INFO, 'FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url)); + } + + // TODO: Implement HTTP 1.1 conditional GET If-Modified-Since + $ch = curl_init(); + curl_setopt_array($ch, [ + CURLOPT_URL => $url, + CURLOPT_REFERER => SimplePie_Misc::url_remove_credentials($url), + CURLOPT_HTTPHEADER => array('Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'), + CURLOPT_USERAGENT => FRESHRSS_USERAGENT, + CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], + CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'], + //CURLOPT_FAILONERROR => true; + CURLOPT_MAXREDIRS => 4, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_ENCODING => '', //Enable all encodings + ]); + + curl_setopt_array($ch, FreshRSS_Context::$system_conf->curl_options); + + if (isset($attributes['curl_params']) && is_array($attributes['curl_params'])) { + curl_setopt_array($ch, $attributes['curl_params']); + } + + if (isset($attributes['ssl_verify'])) { + curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, $attributes['ssl_verify'] ? 2 : 0); + curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, $attributes['ssl_verify'] ? true : false); + if (!$attributes['ssl_verify']) { + curl_setopt($ch, CURLOPT_SSL_CIPHER_LIST, 'DEFAULT@SECLEVEL=1'); + } + } + $html = curl_exec($ch); + $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $c_content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); //TODO: Check if that may be null + $c_error = curl_error($ch); + curl_close($ch); + + if ($c_status != 200 || $c_error != '' || $html === false) { + Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url); + } + if ($html == false) { + $html = ''; + } else { + $html = enforceHttpEncoding($html, $c_content_type); + } + + if (file_put_contents($cachePath, $html) === false) { + Minz_Log::warning("Error saving cache $cachePath for $url"); + } + + return $html; +} + /** * Validate an email address, supports internationalized addresses. * diff --git a/p/api/fever.php b/p/api/fever.php index beb0883e4..139cd658a 100644 --- a/p/api/fever.php +++ b/p/api/fever.php @@ -114,7 +114,7 @@ class FeverDAO extends Minz_ModelPdo $entries = array(); foreach ($result as $dao) { - $entries[] = FreshRSS_EntryDAO::daoToEntry($dao); + $entries[] = FreshRSS_Entry::fromArray($dao); } return $entries; diff --git a/p/api/greader.php b/p/api/greader.php index 7c4aba9ea..43e3647d1 100644 --- a/p/api/greader.php +++ b/p/api/greader.php @@ -536,6 +536,7 @@ function entriesToArray($entries) { $items = array(); foreach ($entries as $item) { + /** @var FreshRSS_Entry $entry */ $entry = Minz_ExtensionManager::callHook('entry_before_display', $item); if ($entry == null) { continue; diff --git a/p/scripts/extra.js b/p/scripts/extra.js index 00a460917..505b05110 100644 --- a/p/scripts/extra.js +++ b/p/scripts/extra.js @@ -213,6 +213,49 @@ function init_configuration_alert() { }; } +/** + * Allow a