From 8f9c4143fcc133f28db4c3f618649fb1170e33b4 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Fri, 6 Jan 2023 19:53:43 +0100 Subject: Better enclosures (#4944) * Better enclosures #fix https://github.com/FreshRSS/FreshRSS/issues/4702 Improvement of https://github.com/FreshRSS/FreshRSS/pull/2898 * A few fixes * Better enclosure titles * Improve thumbnails * Implement thumbnail for HTML+XPath * Avoid duplicate enclosures #fix https://github.com/FreshRSS/FreshRSS/issues/1668 * Fix regex * Add basic support for media:credit And use
for enclosures * Fix link encoding + simplify code * Fix some SimplePie bugs Encoding errors in enclosure links * Remove debugging syslog * Remove debugging syslog * SimplePie fix multiple RSS2 enclosures #fix https://github.com/FreshRSS/FreshRSS/issues/4974 * Improve thumbnails * Performance with yield Avoid generating all enclosures if not used * API keep providing enclosures inside content Clients are typically not showing the enclosures to the users (tested with News+, FeedMe, Readrops, Fluent Reader Lite) * Lint * Fix API output enclosure * Fix API content strcut * API tolerate enclosures without a type --- app/Models/Entry.php | 167 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 140 insertions(+), 27 deletions(-) (limited to 'app/Models/Entry.php') diff --git a/app/Models/Entry.php b/app/Models/Entry.php index 47fcf3b4a..ec7629253 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -67,7 +67,9 @@ class FreshRSS_Entry extends Minz_Model { $dao['content'] = ''; } if (!empty($dao['thumbnail'])) { - $dao['content'] .= '

'; + $dao['attributes']['thumbnail'] = [ + 'url' => $dao['thumbnail'], + ]; } $entry = new FreshRSS_Entry( $dao['id_feed'] ?? 0, @@ -116,15 +118,117 @@ class FreshRSS_Entry extends Minz_Model { return $this->authors; } } - public function content(): string { - return $this->content; + + /** + * Basic test without ambition to catch all cases such as unquoted addresses, variants of entities, HTML comments, etc. + */ + private static function containsLink(string $html, string $link): bool { + return preg_match('/(?P[\'"])' . preg_quote($link, '/') . '(?P=delim)/', $html) == 1; + } + + private static function enclosureIsImage(array $enclosure): bool { + $elink = $enclosure['url'] ?? ''; + $length = $enclosure['length'] ?? 0; + $medium = $enclosure['medium'] ?? ''; + $mime = $enclosure['type'] ?? ''; + + return $elink != '' && $medium === 'image' || strpos($mime, 'image') === 0 || + ($mime == '' && $length == 0 && preg_match('/[.](avif|gif|jpe?g|png|svg|webp)$/i', $elink)); } - /** @return array> */ - public function enclosures(bool $searchBodyImages = false): array { - $results = []; + /** + * @param bool $withEnclosures Set to true to include the enclosures in the returned HTML, false otherwise. + * @param bool $allowDuplicateEnclosures Set to false to remove obvious enclosure duplicates (based on simple string comparison), true otherwise. + * @return string HTML content + */ + public function content(bool $withEnclosures = true, bool $allowDuplicateEnclosures = false): string { + if (!$withEnclosures) { + return $this->content; + } + + $content = $this->content; + + $thumbnail = $this->attributes('thumbnail'); + if (!empty($thumbnail['url'])) { + $elink = $thumbnail['url']; + if ($allowDuplicateEnclosures || !self::containsLink($content, $elink)) { + $content .= << +

+ +

+
+HTML; + } + } + + $attributeEnclosures = $this->attributes('enclosures'); + if (empty($attributeEnclosures)) { + return $content; + } + + foreach ($attributeEnclosures as $enclosure) { + $elink = $enclosure['url'] ?? ''; + if ($elink == '') { + continue; + } + if (!$allowDuplicateEnclosures && self::containsLink($content, $elink)) { + continue; + } + $credit = $enclosure['credit'] ?? ''; + $description = $enclosure['description'] ?? ''; + $length = $enclosure['length'] ?? 0; + $medium = $enclosure['medium'] ?? ''; + $mime = $enclosure['type'] ?? ''; + $thumbnails = $enclosure['thumbnails'] ?? []; + $etitle = $enclosure['title'] ?? ''; + + $content .= '
'; + + foreach ($thumbnails as $thumbnail) { + $content .= '

'; + } + + if (self::enclosureIsImage($enclosure)) { + $content .= '

'; + } elseif ($medium === 'audio' || strpos($mime, 'audio') === 0) { + $content .= '

💾

'; + } elseif ($medium === 'video' || strpos($mime, 'video') === 0) { + $content .= '

💾

'; + } else { //e.g. application, text, unknown + $content .= '

💾

'; + } + + if ($credit != '') { + $content .= '

© ' . $credit . '

'; + } + if ($description != '') { + $content .= '
' . $description . '
'; + } + $content .= "
\n"; + } + + return $content; + } + + /** @return iterable> */ + public function enclosures(bool $searchBodyImages = false) { + $attributeEnclosures = $this->attributes('enclosures'); + if (is_array($attributeEnclosures)) { + // FreshRSS 1.20.1+: The enclosures are saved as attributes + yield from $attributeEnclosures; + } try { - $searchEnclosures = strpos($this->content, '

content, 'query('//div[@class="enclosure"]/p[@class="enclosure-content"]/*[@src]'); foreach ($enclosures as $enclosure) { $result = [ @@ -148,7 +253,7 @@ class FreshRSS_Entry extends Minz_Model { case 'audio': $result['medium'] = 'audio'; break; } } - $results[] = $result; + yield Minz_Helper::htmlspecialchars_utf8($result); } } if ($searchBodyImages) { @@ -159,26 +264,31 @@ class FreshRSS_Entry extends Minz_Model { $src = $img->getAttribute('data-src'); } if ($src != null) { - $results[] = [ + $result = [ 'url' => $src, - 'alt' => $img->getAttribute('alt'), ]; + yield Minz_Helper::htmlspecialchars_utf8($result); } } } - return $results; } catch (Exception $ex) { - return $results; + Minz_Log::debug(__METHOD__ . ' ' . $ex->getMessage()); } } /** * @return array|null */ - public function thumbnail() { - foreach ($this->enclosures(true) as $enclosure) { - if (!empty($enclosure['url']) && empty($enclosure['type'])) { - return $enclosure; + public function thumbnail(bool $searchEnclosures = true) { + $thumbnail = $this->attributes('thumbnail'); + if (!empty($thumbnail['url'])) { + return $thumbnail; + } + if ($searchEnclosures) { + foreach ($this->enclosures(true) as $enclosure) { + if (self::enclosureIsImage($enclosure)) { + return $enclosure; + } } } return null; @@ -587,7 +697,7 @@ class FreshRSS_Entry extends Minz_Model { if ($entry) { // l’article existe déjà en BDD, en se contente de recharger ce contenu - $this->content = $entry->content(); + $this->content = $entry->content(false); } else { try { // The article is not yet in the database, so let’s fetch it @@ -629,7 +739,7 @@ class FreshRSS_Entry extends Minz_Model { 'guid' => $this->guid(), 'title' => $this->title(), 'author' => $this->authors(true), - 'content' => $this->content(), + 'content' => $this->content(false), 'link' => $this->link(), 'date' => $this->date(true), 'hash' => $this->hash(), @@ -677,7 +787,6 @@ class FreshRSS_Entry extends Minz_Model { 'published' => $this->date(true), // 'updated' => $this->date(true), 'title' => $this->title(), - 'summary' => ['content' => $this->content()], 'canonical' => [ ['href' => htmlspecialchars_decode($this->link(), ENT_QUOTES)], ], @@ -697,13 +806,16 @@ class FreshRSS_Entry extends Minz_Model { if ($mode === 'compat') { $item['title'] = escapeToUnicodeAlternative($this->title(), false); unset($item['alternate'][0]['type']); - if (mb_strlen($this->content(), 'UTF-8') > self::API_MAX_COMPAT_CONTENT_LENGTH) { - $item['summary']['content'] = mb_strcut($this->content(), 0, self::API_MAX_COMPAT_CONTENT_LENGTH, 'UTF-8'); - } - } elseif ($mode === 'freshrss') { + $item['summary'] = [ + 'content' => mb_strcut($this->content(true), 0, self::API_MAX_COMPAT_CONTENT_LENGTH, 'UTF-8'), + ]; + } else { + $item['content'] = [ + 'content' => $this->content(false), + ]; + } + if ($mode === 'freshrss') { $item['guid'] = $this->guid(); - unset($item['summary']); - $item['content'] = ['content' => $this->content()]; } if ($category != null && $mode !== 'freshrss') { $item['categories'][] = 'user/-/label/' . htmlspecialchars_decode($category->name(), ENT_QUOTES); @@ -718,10 +830,11 @@ class FreshRSS_Entry extends Minz_Model { } } foreach ($this->enclosures() as $enclosure) { - if (!empty($enclosure['url']) && !empty($enclosure['type'])) { + if (!empty($enclosure['url'])) { $media = [ 'href' => $enclosure['url'], - 'type' => $enclosure['type'], + 'type' => $enclosure['type'] ?? $enclosure['medium'] ?? + (self::enclosureIsImage($enclosure) ? 'image' : ''), ]; if (!empty($enclosure['length'])) { $media['length'] = intval($enclosure['length']); -- cgit v1.2.3