aboutsummaryrefslogtreecommitdiff
path: root/app/Models
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2022-02-28 20:22:43 +0100
committerGravatar GitHub <noreply@github.com> 2022-02-28 20:22:43 +0100
commit1fe66ad020ca8f0560bb9c6e311852ed77228f78 (patch)
treedf78da3f33a9f13a9d6ba3f2744c369bd6e313a6 /app/Models
parentfa23ae76ea46b329fb65329081df95e864b03b23 (diff)
Implement Web scraping "HTML + XPath" (#4220)
* More PHP type hints for Fever Follow-up of https://github.com/FreshRSS/FreshRSS/pull/4201 Related to https://github.com/FreshRSS/FreshRSS/issues/4200 * Detail * Draft * Progress * More draft * Fix thumbnail PHP type hint https://github.com/FreshRSS/FreshRSS/issues/4215 * More types * A bit more * Refactor FreshRSS_Entry::fromArray * Progress * Starts to work * Categories * Fonctional * Layout update * Fix relative URLs * Cache system * Forgotten files * Remove a debug line * Automatic form validation of XPath expressions * data-leave-validation * Fix reload action * Simpler examples * Fix column type for PostgreSQL * Enforce HTTP encoding * Readme * Fix get full content * target="_blank" * gitignore * htmlspecialchars_utf8 * Implement HTML <base> And fix/revert `xml:base` support in SimplePie https://github.com/simplepie/simplepie/commit/e49c578817aa504d8d05cd7f33857aeda9d41908 * SimplePie upstream PR merged https://github.com/simplepie/simplepie/pull/723
Diffstat (limited to 'app/Models')
-rw-r--r--app/Models/Entry.php96
-rw-r--r--app/Models/EntryDAO.php31
-rw-r--r--app/Models/EntryDAOPGSQL.php4
-rw-r--r--app/Models/EntryDAOSQLite.php4
-rw-r--r--app/Models/Feed.php188
-rw-r--r--app/Models/FeedDAO.php42
-rw-r--r--app/Models/FeedDAOSQLite.php2
-rw-r--r--app/Models/View.php17
8 files changed, 270 insertions, 114 deletions
diff --git a/app/Models/Entry.php b/app/Models/Entry.php
index a190e505d..ab88d777a 100644
--- a/app/Models/Entry.php
+++ b/app/Models/Entry.php
@@ -59,6 +59,38 @@ class FreshRSS_Entry extends Minz_Model {
$this->_guid($guid);
}
+ /** @param array<string,mixed> $dao */
+ public static function fromArray(array $dao): FreshRSS_Entry {
+ if (!isset($dao['content'])) {
+ $dao['content'] = '';
+ }
+ if (isset($dao['thumbnail'])) {
+ $dao['content'] .= '<p class="enclosure-content"><img src="' . $dao['thumbnail'] . '" alt="" /></p>';
+ }
+ $entry = new FreshRSS_Entry(
+ $dao['id_feed'] ?? 0,
+ $dao['guid'] ?? '',
+ $dao['title'] ?? '',
+ $dao['author'] ?? '',
+ $dao['content'] ?? '',
+ $dao['link'] ?? '',
+ $dao['date'] ?? 0,
+ $dao['is_read'] ?? false,
+ $dao['is_favorite'] ?? false,
+ $dao['tags'] ?? ''
+ );
+ if (isset($dao['id'])) {
+ $entry->_id($dao['id']);
+ }
+ if (!empty($dao['timestamp'])) {
+ $entry->_date(strtotime($dao['timestamp']));
+ }
+ if (!empty($dao['categories'])) {
+ $entry->_tags($dao['categories']);
+ }
+ return $entry;
+ }
+
public function id(): string {
return $this->id;
}
@@ -83,6 +115,7 @@ class FreshRSS_Entry extends Minz_Model {
return $this->content;
}
+ /** @return array<array<string,string>> */
public function enclosures(bool $searchBodyImages = false): array {
$results = [];
try {
@@ -97,11 +130,20 @@ class FreshRSS_Entry extends Minz_Model {
if ($searchEnclosures) {
$enclosures = $xpath->query('//div[@class="enclosure"]/p[@class="enclosure-content"]/*[@src]');
foreach ($enclosures as $enclosure) {
- $results[] = [
+ $result = [
'url' => $enclosure->getAttribute('src'),
'type' => $enclosure->getAttribute('data-type'),
+ 'medium' => $enclosure->getAttribute('data-medium'),
'length' => $enclosure->getAttribute('data-length'),
];
+ if (empty($result['medium'])) {
+ switch (strtolower($enclosure->nodeName)) {
+ case 'img': $result['medium'] = 'image'; break;
+ case 'video': $result['medium'] = 'video'; break;
+ case 'audio': $result['medium'] = 'audio'; break;
+ }
+ }
+ $results[] = $result;
}
}
if ($searchBodyImages) {
@@ -432,52 +474,12 @@ class FreshRSS_Entry extends Minz_Model {
}
}
- public static function getContentByParsing(string $url, string $path, array $attributes = array(), int $maxRedirs = 3): string {
- $limits = FreshRSS_Context::$system_conf->limits;
- $feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']);
-
- if (FreshRSS_Context::$system_conf->simplepie_syslog_enabled) {
- syslog(LOG_INFO, 'FreshRSS GET ' . SimplePie_Misc::url_remove_credentials($url));
- }
-
- $ch = curl_init();
- curl_setopt_array($ch, [
- CURLOPT_URL => $url,
- CURLOPT_REFERER => SimplePie_Misc::url_remove_credentials($url),
- CURLOPT_HTTPHEADER => array('Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
- CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
- CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
- CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
- //CURLOPT_FAILONERROR => true;
- CURLOPT_MAXREDIRS => 4,
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_FOLLOWLOCATION => true,
- CURLOPT_ENCODING => '', //Enable all encodings
- ]);
-
- curl_setopt_array($ch, FreshRSS_Context::$system_conf->curl_options);
-
- if (isset($attributes['curl_params']) && is_array($attributes['curl_params'])) {
- curl_setopt_array($ch, $attributes['curl_params']);
- }
-
- if (isset($attributes['ssl_verify'])) {
- curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, $attributes['ssl_verify'] ? 2 : 0);
- curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, $attributes['ssl_verify'] ? true : false);
- if (!$attributes['ssl_verify']) {
- curl_setopt($ch, CURLOPT_SSL_CIPHER_LIST, 'DEFAULT@SECLEVEL=1');
- }
- }
- $html = curl_exec($ch);
- $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- $c_error = curl_error($ch);
- curl_close($ch);
-
- if ($c_status != 200 || $c_error != '') {
- Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url);
- }
-
- if (is_string($html) && strlen($html) > 0) {
+ /**
+ * @param array<string,mixed> $attributes
+ */
+ public static function getContentByParsing(string $url, string $path, array $attributes = [], int $maxRedirs = 3): string {
+ $html = getHtml($url, $attributes);
+ if (strlen($html) > 0) {
require_once(LIB_PATH . '/lib_phpQuery.php');
/**
* @var phpQueryObject @doc
diff --git a/app/Models/EntryDAO.php b/app/Models/EntryDAO.php
index a10440edb..8f248e20f 100644
--- a/app/Models/EntryDAO.php
+++ b/app/Models/EntryDAO.php
@@ -164,7 +164,7 @@ INSERT IGNORE INTO `_entry` (
)
SELECT @rank:=@rank+1 AS id, guid, title, author, content_bin, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags
FROM `_entrytmp`
-ORDER BY date;
+ORDER BY date, id;
DELETE FROM `_entrytmp` WHERE id <= @rank;
SQL;
@@ -658,6 +658,7 @@ SQL;
}
}
+ /** @return FreshRSS_Entry|null */
public function searchByGuid($id_feed, $guid) {
// un guid est unique pour un flux donné
$sql = 'SELECT id, guid, title, author, '
@@ -669,9 +670,10 @@ SQL;
$stm->bindParam(':guid', $guid);
$stm->execute();
$res = $stm->fetchAll(PDO::FETCH_ASSOC);
- return isset($res[0]) ? self::daoToEntry($res[0]) : null;
+ return isset($res[0]) ? FreshRSS_Entry::fromArray($res[0]) : null;
}
+ /** @return FreshRSS_Entry|null */
public function searchById($id) {
$sql = 'SELECT id, guid, title, author, '
. ($this->isCompressed() ? 'UNCOMPRESS(content_bin) AS content' : 'content')
@@ -681,7 +683,7 @@ SQL;
$stm->bindParam(':id', $id, PDO::PARAM_INT);
$stm->execute();
$res = $stm->fetchAll(PDO::FETCH_ASSOC);
- return isset($res[0]) ? self::daoToEntry($res[0]) : null;
+ return isset($res[0]) ? FreshRSS_Entry::fromArray($res[0]) : null;
}
public function searchIdByGuid($id_feed, $guid) {
@@ -1061,7 +1063,7 @@ SQL;
$stm = $this->listWhereRaw($type, $id, $state, $order, $limit, $firstId, $filters, $date_min);
if ($stm) {
while ($row = $stm->fetch(PDO::FETCH_ASSOC)) {
- yield self::daoToEntry($row);
+ yield FreshRSS_Entry::fromArray($row);
}
} else {
yield false;
@@ -1092,7 +1094,7 @@ SQL;
$stm = $this->pdo->prepare($sql);
$stm->execute($ids);
while ($row = $stm->fetch(PDO::FETCH_ASSOC)) {
- yield self::daoToEntry($row);
+ yield FreshRSS_Entry::fromArray($row);
}
}
@@ -1251,23 +1253,4 @@ SQL;
$unread = empty($res[1]) ? 0 : intval($res[1]);
return array('all' => $all, 'unread' => $unread, 'read' => $all - $unread);
}
-
- public static function daoToEntry($dao) {
- $entry = new FreshRSS_Entry(
- $dao['id_feed'],
- $dao['guid'],
- $dao['title'],
- $dao['author'],
- $dao['content'],
- $dao['link'],
- $dao['date'],
- $dao['is_read'],
- $dao['is_favorite'],
- isset($dao['tags']) ? $dao['tags'] : ''
- );
- if (isset($dao['id'])) {
- $entry->_id($dao['id']);
- }
- return $entry;
- }
}
diff --git a/app/Models/EntryDAOPGSQL.php b/app/Models/EntryDAOPGSQL.php
index 7a46670fc..b97417a7c 100644
--- a/app/Models/EntryDAOPGSQL.php
+++ b/app/Models/EntryDAOPGSQL.php
@@ -45,13 +45,13 @@ rank bigint := (SELECT maxrank - COUNT(*) FROM `_entrytmp`);
BEGIN
INSERT INTO `_entry`
(id, guid, title, author, content, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags)
- (SELECT rank + row_number() OVER(ORDER BY date) AS id, guid, title, author, content,
+ (SELECT rank + row_number() OVER(ORDER BY date, id) AS id, guid, title, author, content,
link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags
FROM `_entrytmp` AS etmp
WHERE NOT EXISTS (
SELECT 1 FROM `_entry` AS ereal
WHERE (etmp.id = ereal.id) OR (etmp.id_feed = ereal.id_feed AND etmp.guid = ereal.guid))
- ORDER BY date);
+ ORDER BY date, id);
DELETE FROM `_entrytmp` WHERE id <= maxrank;
END $$;';
$hadTransaction = $this->pdo->inTransaction();
diff --git a/app/Models/EntryDAOSQLite.php b/app/Models/EntryDAOSQLite.php
index 8b0f2d252..16d15f899 100644
--- a/app/Models/EntryDAOSQLite.php
+++ b/app/Models/EntryDAOSQLite.php
@@ -41,13 +41,13 @@ DROP TABLE IF EXISTS `tmp`;
CREATE TEMP TABLE `tmp` AS
SELECT id, guid, title, author, content, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags
FROM `_entrytmp`
- ORDER BY date;
+ ORDER BY date, id;
INSERT OR IGNORE INTO `_entry`
(id, guid, title, author, content, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags)
SELECT rowid + (SELECT MAX(id) - COUNT(*) FROM `tmp`) AS id,
guid, title, author, content, link, date, `lastSeen`, hash, is_read, is_favorite, id_feed, tags
FROM `tmp`
- ORDER BY date;
+ ORDER BY date, id;
DELETE FROM `_entrytmp` WHERE id <= (SELECT MAX(id) FROM `tmp`);
DROP TABLE IF EXISTS `tmp`;
';
diff --git a/app/Models/Feed.php b/app/Models/Feed.php
index 3425f4bce..0e02194ef 100644
--- a/app/Models/Feed.php
+++ b/app/Models/Feed.php
@@ -1,6 +1,28 @@
<?php
class FreshRSS_Feed extends Minz_Model {
+
+ /**
+ * Normal RSS or Atom feed
+ * @var int
+ */
+ const KIND_RSS = 0;
+ /**
+ * Invalid RSS or Atom feed
+ * @var int
+ */
+ const KIND_RSS_FORCED = 2;
+ /**
+ * Normal HTML with XPath scraping
+ * @var int
+ */
+ const KIND_HTML_XPATH = 10;
+ /**
+ * Normal JSON with XPath scraping
+ * @var int
+ */
+ const KIND_JSON_XPATH = 20;
+
const PRIORITY_MAIN_STREAM = 10;
const PRIORITY_NORMAL = 0;
const PRIORITY_ARCHIVED = -10;
@@ -10,33 +32,50 @@ class FreshRSS_Feed extends Minz_Model {
const ARCHIVING_RETENTION_COUNT_LIMIT = 10000;
const ARCHIVING_RETENTION_PERIOD = 'P3M';
- /**
- * @var int
- */
+ /** @var int */
private $id = 0;
- private $url;
- /**
- * @var int
- */
+ /** @var string */
+ private $url = '';
+ /** @var int */
+ private $kind = 0;
+ /** @var int */
private $category = 1;
+ /** @var int */
private $nbEntries = -1;
+ /** @var int */
private $nbNotRead = -1;
+ /** @var int */
private $nbPendingNotRead = 0;
+ /** @var string */
private $name = '';
+ /** @var string */
private $website = '';
+ /** @var string */
private $description = '';
+ /** @var int */
private $lastUpdate = 0;
+ /** @var int */
private $priority = self::PRIORITY_MAIN_STREAM;
+ /** @var string */
private $pathEntries = '';
+ /** @var string */
private $httpAuth = '';
+ /** @var bool */
private $error = false;
+ /** @var int */
private $ttl = self::TTL_DEFAULT;
private $attributes = [];
+ /** @var bool */
private $mute = false;
+ /** @var string */
private $hash = '';
+ /** @var string */
private $lockPath = '';
+ /** @var string */
private $hubUrl = '';
+ /** @var string */
private $selfUrl = '';
+ /** @var array<FreshRSS_FilterAction> $filterActions */
private $filterActions = null;
public function __construct(string $url, bool $validate = true) {
@@ -47,6 +86,9 @@ class FreshRSS_Feed extends Minz_Model {
}
}
+ /**
+ * @return FreshRSS_Feed
+ */
public static function example() {
$f = new FreshRSS_Feed('http://example.net/', false);
$f->faviconPrepare();
@@ -71,6 +113,9 @@ class FreshRSS_Feed extends Minz_Model {
public function selfUrl(): string {
return $this->selfUrl;
}
+ public function kind(): int {
+ return $this->kind;
+ }
public function hubUrl(): string {
return $this->hubUrl;
}
@@ -200,6 +245,9 @@ class FreshRSS_Feed extends Minz_Model {
}
$this->url = $value;
}
+ public function _kind($value) {
+ $this->kind = $value;
+ }
public function _category($value) {
$value = intval($value);
$this->category = $value >= 0 ? $value : 0;
@@ -267,7 +315,7 @@ class FreshRSS_Feed extends Minz_Model {
* @return SimplePie|null
*/
public function load(bool $loadDetails = false, bool $noCache = false) {
- if ($this->url !== null) {
+ if ($this->url != '') {
// @phpstan-ignore-next-line
if (CACHE_PATH === false) {
throw new Minz_FileNotExistException(
@@ -347,6 +395,7 @@ class FreshRSS_Feed extends Minz_Model {
$guids = [];
$hasBadGuids = $this->attributes('hasBadGuids');
+ // TODO: Replace very slow $simplePie->get_item($i) by getting all items at once
for ($i = $simplePie->get_item_quantity() - 1; $i >= 0; $i--) {
$item = $simplePie->get_item($i);
if ($item == null) {
@@ -375,6 +424,7 @@ class FreshRSS_Feed extends Minz_Model {
$hasBadGuids = $this->attributes('hasBadGuids');
// We want chronological order and SimplePie uses reverse order.
+ // TODO: Replace very slow $simplePie->get_item($i) by getting all items at once
for ($i = $simplePie->get_item_quantity() - 1; $i >= 0; $i--) {
$item = $simplePie->get_item($i);
if ($item == null) {
@@ -428,15 +478,18 @@ class FreshRSS_Feed extends Minz_Model {
} elseif ($medium === 'audio' || strpos($mime, 'audio') === 0) {
$enclosureContent .= '<p class="enclosure-content"><audio preload="none" src="' . $elink
. ($length == null ? '' : '" data-length="' . intval($length))
- . '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8')
+ . ($mime == '' ? '' : '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8'))
. '" controls="controls"></audio> <a download="" href="' . $elink . '">💾</a></p>';
} elseif ($medium === 'video' || strpos($mime, 'video') === 0) {
$enclosureContent .= '<p class="enclosure-content"><video preload="none" src="' . $elink
. ($length == null ? '' : '" data-length="' . intval($length))
- . '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8')
+ . ($mime == '' ? '' : '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8'))
. '" controls="controls"></video> <a download="" href="' . $elink . '">💾</a></p>';
} else { //e.g. application, text, unknown
- $enclosureContent .= '<p class="enclosure-content"><a download="" href="' . $elink . '">💾</a></p>';
+ $enclosureContent .= '<p class="enclosure-content"><a download="" href="' . $elink
+ . ($mime == '' ? '' : '" data-type="' . htmlspecialchars($mime, ENT_COMPAT, 'UTF-8'))
+ . ($medium == '' ? '' : '" data-medium="' . htmlspecialchars($medium, ENT_COMPAT, 'UTF-8'))
+ . '">💾</a></p>';
}
$thumbnailContent = '';
@@ -490,6 +543,97 @@ class FreshRSS_Feed extends Minz_Model {
}
/**
+ * @param array<string,mixed> $attributes
+ * @return SimplePie|null
+ */
+ public function loadHtmlXpath(bool $loadDetails = false, bool $noCache = false, array $attributes = []) {
+ if ($this->url == '') {
+ return null;
+ }
+ $feedSourceUrl = htmlspecialchars_decode($this->url, ENT_QUOTES);
+ if ($this->httpAuth != '') {
+ $feedSourceUrl = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $feedSourceUrl);
+ }
+
+ // Same naming conventions than https://github.com/RSS-Bridge/rss-bridge/wiki/XPathAbstract
+ // https://github.com/RSS-Bridge/rss-bridge/wiki/The-collectData-function
+ /** @var array<string,string> */
+ $xPathSettings = $this->attributes('xpath');
+ $xPathFeedTitle = $xPathSettings['feedTitle'] ?? '';
+ $xPathItem = $xPathSettings['item'] ?? '';
+ $xPathItemTitle = $xPathSettings['itemTitle'] ?? '';
+ $xPathItemContent = $xPathSettings['itemContent'] ?? '';
+ $xPathItemUri = $xPathSettings['itemUri'] ?? '';
+ $xPathItemAuthor = $xPathSettings['itemAuthor'] ?? '';
+ $xPathItemTimestamp = $xPathSettings['itemTimestamp'] ?? '';
+ $xPathItemThumbnail = $xPathSettings['itemThumbnail'] ?? '';
+ $xPathItemCategories = $xPathSettings['itemCategories'] ?? '';
+ if ($xPathItem == '') {
+ return null;
+ }
+
+ $html = getHtml($feedSourceUrl, $attributes);
+ if (strlen($html) <= 0) {
+ return null;
+ }
+
+ $view = new FreshRSS_View();
+ $view->_path('index/rss.phtml');
+ $view->internal_rendering = true;
+ $view->rss_url = $feedSourceUrl;
+ $view->entries = [];
+
+ try {
+ $doc = new DOMDocument();
+ $doc->recover = true;
+ $doc->strictErrorChecking = false;
+ $doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
+ $xpath = new DOMXPath($doc);
+ $view->rss_title = $xPathFeedTitle == '' ? '' : htmlspecialchars(@$xpath->evaluate('normalize-space(' . $xPathFeedTitle . ')'), ENT_COMPAT, 'UTF-8');
+ $view->rss_base = htmlspecialchars(trim($xpath->evaluate('normalize-space(//base/@href)')), ENT_COMPAT, 'UTF-8');
+ $nodes = $xpath->query($xPathItem);
+ if (empty($nodes)) {
+ return null;
+ }
+
+ foreach ($nodes as $node) {
+ $item = [];
+ $item['title'] = $xPathItemTitle == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemTitle . ')', $node);
+ $item['content'] = $xPathItemContent == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemContent . ')', $node);
+ $item['link'] = $xPathItemUri == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemUri . ')', $node);
+ $item['author'] = $xPathItemAuthor == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemAuthor . ')', $node);
+ $item['timestamp'] = $xPathItemTimestamp == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemTimestamp . ')', $node);
+ $item['thumbnail'] = $xPathItemThumbnail == '' ? '' : @$xpath->evaluate('normalize-space(' . $xPathItemThumbnail . ')', $node);
+ if ($xPathItemCategories != '') {
+ $itemCategories = @$xpath->query($xPathItemCategories);
+ if ($itemCategories) {
+ foreach ($itemCategories as $itemCategory) {
+ $item['categories'][] = $itemCategory->textContent;
+ }
+ }
+ }
+ if ($item['title'] . $item['content'] . $item['link'] != '') {
+ $item['guid'] = 'urn:sha1:' . sha1($item['title'] . $item['content'] . $item['link']);
+ $item = Minz_Helper::htmlspecialchars_utf8($item);
+ $view->entries[] = FreshRSS_Entry::fromArray($item);
+ }
+ }
+ } catch (Exception $ex) {
+ Minz_Log::warning($ex->getMessage());
+ return null;
+ }
+
+ if (count($view->entries) < 1) {
+ return null;
+ }
+
+ $simplePie = customSimplePie();
+ $simplePie->set_raw_data($view->renderToString());
+ $simplePie->init();
+ return $simplePie;
+ }
+
+ /**
* To keep track of some new potentially unread articles since last commit+fetch from database
*/
public function incPendingUnread(int $n = 1) {
@@ -532,18 +676,23 @@ class FreshRSS_Feed extends Minz_Model {
return false;
}
- protected function cacheFilename(): string {
- $simplePie = customSimplePie($this->attributes());
- $filename = $simplePie->get_cache_filename($this->url);
- return CACHE_PATH . '/' . $filename . '.spc';
+ public static function cacheFilename(string $url, array $attributes, int $kind = FreshRSS_Feed::KIND_RSS): string {
+ $simplePie = customSimplePie($attributes);
+ $filename = $simplePie->get_cache_filename($url);
+ if ($kind == FreshRSS_Feed::KIND_HTML_XPATH) {
+ return CACHE_PATH . '/' . $filename . '.html';
+ } else {
+ return CACHE_PATH . '/' . $filename . '.spc';
+ }
}
public function clearCache(): bool {
- return @unlink($this->cacheFilename());
+ return @unlink(FreshRSS_Feed::cacheFilename($this->url, $this->attributes(), $this->kind));
}
+ /** @return int|false */
public function cacheModifiedTime() {
- return @filemtime($this->cacheFilename());
+ return @filemtime(FreshRSS_Feed::cacheFilename($this->url, $this->attributes(), $this->kind));
}
public function lock(): bool {
@@ -567,7 +716,7 @@ class FreshRSS_Feed extends Minz_Model {
* @return array<FreshRSS_FilterAction>
*/
public function filterActions(): array {
- if ($this->filterActions == null) {
+ if (empty($this->filterActions)) {
$this->filterActions = array();
$filters = $this->attributes('filters');
if (is_array($filters)) {
@@ -582,6 +731,9 @@ class FreshRSS_Feed extends Minz_Model {
return $this->filterActions;
}
+ /**
+ * @param array<FreshRSS_FilterAction> $filterActions
+ */
private function _filterActions($filterActions) {
$this->filterActions = $filterActions;
if (is_array($this->filterActions) && !empty($this->filterActions)) {
diff --git a/app/Models/FeedDAO.php b/app/Models/FeedDAO.php
index ab73b2ec2..c4a0b1429 100644
--- a/app/Models/FeedDAO.php
+++ b/app/Models/FeedDAO.php
@@ -5,7 +5,9 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
protected function addColumn(string $name) {
Minz_Log::warning(__method__ . ': ' . $name);
try {
- if ($name === 'attributes') { //v1.11.0
+ if ($name === 'kind') { //v1.20.0
+ return $this->pdo->exec('ALTER TABLE `_feed` ADD COLUMN kind SMALLINT DEFAULT 0') !== false;
+ } elseif ($name === 'attributes') { //v1.11.0
return $this->pdo->exec('ALTER TABLE `_feed` ADD COLUMN attributes TEXT') !== false;
}
} catch (Exception $e) {
@@ -17,7 +19,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
protected function autoUpdateDb(array $errorInfo) {
if (isset($errorInfo[0])) {
if ($errorInfo[0] === FreshRSS_DatabaseDAO::ER_BAD_FIELD_ERROR || $errorInfo[0] === FreshRSS_DatabaseDAOPGSQL::UNDEFINED_COLUMN) {
- foreach (['attributes'] as $column) {
+ foreach (['attributes', 'kind'] as $column) {
if (stripos($errorInfo[2], $column) !== false) {
return $this->addColumn($column);
}
@@ -32,6 +34,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
INSERT INTO `_feed`
(
url,
+ kind,
category,
name,
website,
@@ -45,7 +48,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
attributes
)
VALUES
- (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)';
+ (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)';
$stm = $this->pdo->prepare($sql);
$valuesTmp['url'] = safe_ascii($valuesTmp['url']);
@@ -59,6 +62,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
$values = array(
substr($valuesTmp['url'], 0, 511),
+ $valuesTmp['kind'] ?? FreshRSS_Feed::KIND_RSS,
$valuesTmp['category'],
mb_strcut(trim($valuesTmp['name']), 0, FreshRSS_DatabaseDAO::LENGTH_INDEX_UNICODE, 'UTF-8'),
substr($valuesTmp['website'], 0, 255),
@@ -84,7 +88,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
}
}
- public function addFeedObject($feed): int {
+ public function addFeedObject(FreshRSS_Feed $feed): int {
// TODO: not sure if we should write this method in DAO since DAO
// should not be aware about feed class
@@ -94,6 +98,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
$values = array(
'id' => $feed->id(),
'url' => $feed->url(),
+ 'kind' => $feed->kind(),
'category' => $feed->category(),
'name' => $feed->name(),
'website' => $feed->website(),
@@ -252,7 +257,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
public function selectAll() {
$sql = <<<'SQL'
-SELECT id, url, category, name, website, description, `lastUpdate`,
+SELECT id, url, kind, category, name, website, description, `lastUpdate`,
priority, `pathEntries`, `httpAuth`, error, ttl, attributes
FROM `_feed`
SQL;
@@ -346,7 +351,7 @@ SQL;
*/
public function listFeedsOrderUpdate(int $defaultCacheDuration = 3600, int $limit = 0) {
$this->updateTTL();
- $sql = 'SELECT id, url, name, website, `lastUpdate`, `pathEntries`, `httpAuth`, ttl, attributes '
+ $sql = 'SELECT id, url, kind, name, website, `lastUpdate`, `pathEntries`, `httpAuth`, ttl, attributes '
. 'FROM `_feed` '
. ($defaultCacheDuration < 0 ? '' : 'WHERE ttl >= ' . FreshRSS_Feed::TTL_DEFAULT
. ' AND `lastUpdate` < (' . (time() + 60)
@@ -557,20 +562,21 @@ SQL;
$category = $catID;
}
- $myFeed = new FreshRSS_Feed(isset($dao['url']) ? $dao['url'] : '', false);
+ $myFeed = new FreshRSS_Feed($dao['url'] ?? '', false);
+ $myFeed->_kind($dao['kind'] ?? FreshRSS_Feed::KIND_RSS);
$myFeed->_category($category);
$myFeed->_name($dao['name']);
- $myFeed->_website(isset($dao['website']) ? $dao['website'] : '', false);
- $myFeed->_description(isset($dao['description']) ? $dao['description'] : '');
- $myFeed->_lastUpdate(isset($dao['lastUpdate']) ? $dao['lastUpdate'] : 0);
- $myFeed->_priority(isset($dao['priority']) ? $dao['priority'] : 10);
- $myFeed->_pathEntries(isset($dao['pathEntries']) ? $dao['pathEntries'] : '');
- $myFeed->_httpAuth(isset($dao['httpAuth']) ? base64_decode($dao['httpAuth']) : '');
- $myFeed->_error(isset($dao['error']) ? $dao['error'] : 0);
- $myFeed->_ttl(isset($dao['ttl']) ? $dao['ttl'] : FreshRSS_Feed::TTL_DEFAULT);
- $myFeed->_attributes('', isset($dao['attributes']) ? $dao['attributes'] : '');
- $myFeed->_nbNotRead(isset($dao['cache_nbUnreads']) ? $dao['cache_nbUnreads'] : 0);
- $myFeed->_nbEntries(isset($dao['cache_nbEntries']) ? $dao['cache_nbEntries'] : 0);
+ $myFeed->_website($dao['website'] ?? '', false);
+ $myFeed->_description($dao['description'] ?? '');
+ $myFeed->_lastUpdate($dao['lastUpdate'] ?? 0);
+ $myFeed->_priority($dao['priority'] ?? 10);
+ $myFeed->_pathEntries($dao['pathEntries'] ?? '');
+ $myFeed->_httpAuth(base64_decode($dao['httpAuth'] ?? ''));
+ $myFeed->_error($dao['error'] ?? 0);
+ $myFeed->_ttl($dao['ttl'] ?? FreshRSS_Feed::TTL_DEFAULT);
+ $myFeed->_attributes('', $dao['attributes'] ?? '');
+ $myFeed->_nbNotRead($dao['cache_nbUnreads'] ?? 0);
+ $myFeed->_nbEntries($dao['cache_nbEntries'] ?? 0);
if (isset($dao['id'])) {
$myFeed->_id($dao['id']);
}
diff --git a/app/Models/FeedDAOSQLite.php b/app/Models/FeedDAOSQLite.php
index 54146858b..a4432ea62 100644
--- a/app/Models/FeedDAOSQLite.php
+++ b/app/Models/FeedDAOSQLite.php
@@ -5,7 +5,7 @@ class FreshRSS_FeedDAOSQLite extends FreshRSS_FeedDAO {
protected function autoUpdateDb(array $errorInfo) {
if ($tableInfo = $this->pdo->query("PRAGMA table_info('feed')")) {
$columns = $tableInfo->fetchAll(PDO::FETCH_COLUMN, 1);
- foreach (['attributes'] as $column) {
+ foreach (['attributes', 'kind'] as $column) {
if (!in_array($column, $columns)) {
return $this->addColumn($column);
}
diff --git a/app/Models/View.php b/app/Models/View.php
index e3a591155..365bfd261 100644
--- a/app/Models/View.php
+++ b/app/Models/View.php
@@ -7,12 +7,19 @@ class FreshRSS_View extends Minz_View {
public $callbackBeforeFeeds;
public $callbackBeforePagination;
public $categories;
+ /** @var FreshRSS_Category|null */
public $category;
+ /** @var string */
public $current_user;
+ /** @var array<FreshRSS_Entry> */
public $entries;
+ /** @var FreshRSS_Entry */
public $entry;
+ /** @var FreshRSS_Feed|null */
public $feed;
+ /** @var array<FreshRSS_Feed> */
public $feeds;
+ /** @var int */
public $nbUnreadTags;
public $tags;
@@ -88,8 +95,14 @@ class FreshRSS_View extends Minz_View {
public $nbPage;
// RSS view
- public $rss_title;
- public $url;
+ /** @var string */
+ public $rss_title = '';
+ /** @var string */
+ public $rss_url = '';
+ /** @var string */
+ public $rss_base = '';
+ /** @var boolean */
+ public $internal_rendering = false;
// Content preview
public $fatalError;