From 1fe66ad020ca8f0560bb9c6e311852ed77228f78 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Mon, 28 Feb 2022 20:22:43 +0100 Subject: Implement Web scraping "HTML + XPath" (#4220) * More PHP type hints for Fever Follow-up of https://github.com/FreshRSS/FreshRSS/pull/4201 Related to https://github.com/FreshRSS/FreshRSS/issues/4200 * Detail * Draft * Progress * More draft * Fix thumbnail PHP type hint https://github.com/FreshRSS/FreshRSS/issues/4215 * More types * A bit more * Refactor FreshRSS_Entry::fromArray * Progress * Starts to work * Categories * Fonctional * Layout update * Fix relative URLs * Cache system * Forgotten files * Remove a debug line * Automatic form validation of XPath expressions * data-leave-validation * Fix reload action * Simpler examples * Fix column type for PostgreSQL * Enforce HTTP encoding * Readme * Fix get full content * target="_blank" * gitignore * htmlspecialchars_utf8 * Implement HTML And fix/revert `xml:base` support in SimplePie https://github.com/simplepie/simplepie/commit/e49c578817aa504d8d05cd7f33857aeda9d41908 * SimplePie upstream PR merged https://github.com/simplepie/simplepie/pull/723 --- app/views/helpers/export/articles.phtml | 2 +- app/views/helpers/feed/update.phtml | 104 ++++++++++++++++++++++++++++++++ app/views/index/normal.phtml | 7 ++- app/views/index/reader.phtml | 2 + app/views/index/rss.phtml | 30 +++++++-- app/views/subscription/add.phtml | 91 ++++++++++++++++++++++++++++ 6 files changed, 229 insertions(+), 7 deletions(-) (limited to 'app/views') diff --git a/app/views/helpers/export/articles.phtml b/app/views/helpers/export/articles.phtml index c131b8474..ad5210968 100644 --- a/app/views/helpers/export/articles.phtml +++ b/app/views/helpers/export/articles.phtml @@ -22,7 +22,7 @@ foreach ($this->entriesRaw as $entryRaw) { if ($entryRaw == null) { continue; } - $entry = FreshRSS_EntryDAO::daoToEntry($entryRaw); + $entry = FreshRSS_Entry::fromArray($entryRaw); if (!isset($this->feed)) { $feed = FreshRSS_CategoryDAO::findFeed($this->categories, $entry->feed()); if ($feed === null) { diff --git a/app/views/helpers/feed/update.phtml b/app/views/helpers/feed/update.phtml index 264881f77..f71be5135 100644 --- a/app/views/helpers/feed/update.phtml +++ b/app/views/helpers/feed/update.phtml @@ -373,6 +373,110 @@ + +
+ +
+ +
+
+ +
+ feed->attributes('xpath')); + ?> +

+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +
+
+
+
+
+ + +
+
+
diff --git a/app/views/index/normal.phtml b/app/views/index/normal.phtml index 5dde2a171..06323dcb0 100644 --- a/app/views/index/normal.phtml +++ b/app/views/index/normal.phtml @@ -21,14 +21,17 @@ $today = @strtotime('today');
entries as $item): $lastEntry = $item; $nbEntries++; ob_flush(); - $this->entry = Minz_ExtensionManager::callHook('entry_before_display', $item); - if ($this->entry == null) { + /** @var FreshRSS_Entry */ + $item = Minz_ExtensionManager::callHook('entry_before_display', $item); + if ($item == null) { continue; } + $this->entry = $item; // We most likely already have the feed object in cache $this->feed = FreshRSS_CategoryDAO::findFeed($this->categories, $this->entry->feed()); diff --git a/app/views/index/reader.phtml b/app/views/index/reader.phtml index e4fb74708..b408e3480 100644 --- a/app/views/index/reader.phtml +++ b/app/views/index/reader.phtml @@ -15,10 +15,12 @@ $content_width = FreshRSS_Context::$user_conf->content_width; entries as $item): $lastEntry = $item; $nbEntries++; ob_flush(); + /** @var FreshRSS_Entry */ $item = Minz_ExtensionManager::callHook('entry_before_display', $item); if ($item == null) { continue; diff --git a/app/views/index/rss.phtml b/app/views/index/rss.phtml index eedb31fa4..0b07a02f3 100755 --- a/app/views/index/rss.phtml +++ b/app/views/index/rss.phtml @@ -1,15 +1,26 @@ '; ?> - +rss_base == '' ? '' : ' xml:base="' . $this->rss_base . '"' ?> +> <?= $this->rss_title ?> - + internal_rendering ? $this->rss_url : Minz_Url::display('', 'html', true) ?> rss_title) ?> GMT - + entries as $item) { + if (!$this->internal_rendering) { + /** @var FreshRSS_Entry */ + $item = Minz_ExtensionManager::callHook('entry_before_display', $item); + if ($item == null) { + continue; + } + } ?> <?= $item->title() ?> @@ -27,12 +38,23 @@ foreach ($this->entries as $item) { echo "\t\t\t" , '', $category, '', "\n"; } } + $enclosures = $item->enclosures(false); + if (is_array($enclosures)) { + foreach ($enclosures as $enclosure) { + // https://www.rssboard.org/media-rss + echo "\t\t\t" , '', "\n"; + } + } ?> content(); ?>]]> date(true)) ?> - id() ?> + id() > 0 ? $item->id() : $item->guid() ?> diff --git a/app/views/subscription/add.phtml b/app/views/subscription/add.phtml index 380f5434f..344e25ade 100644 --- a/app/views/subscription/add.phtml +++ b/app/views/subscription/add.phtml @@ -51,6 +51,97 @@ +
+ + + + +
+ +
+ +
+
+ +
+

+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +

+
+
+
+ +
+ +
+
+
+
+
-- cgit v1.2.3