diff options
| -rw-r--r-- | app/Controllers/feedController.php | 9 | ||||
| -rw-r--r-- | app/Models/Category.php | 15 | ||||
| -rw-r--r-- | app/Models/Entry.php | 29 | ||||
| -rw-r--r-- | app/Models/Feed.php | 44 | ||||
| -rw-r--r-- | lib/lib_rss.php | 11 |
5 files changed, 56 insertions, 52 deletions
diff --git a/app/Controllers/feedController.php b/app/Controllers/feedController.php index 4ec4b5a55..122807cf0 100644 --- a/app/Controllers/feedController.php +++ b/app/Controllers/feedController.php @@ -1141,16 +1141,9 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController { return; } - $attributes = $feed->attributes(); - $attributes['path_entries_filter'] = Minz_Request::paramString('selector_filter', true); - //Fetch & select content. try { - $fullContent = FreshRSS_Entry::getContentByParsing( - htmlspecialchars_decode($entry->link(), ENT_QUOTES), - htmlspecialchars_decode($content_selector, ENT_QUOTES), - $attributes - ); + $fullContent = $entry->getContentByParsing(); if ($fullContent != '') { $this->view->selectorSuccess = true; diff --git a/app/Models/Category.php b/app/Models/Category.php index 6674b4e72..13bb184d5 100644 --- a/app/Models/Category.php +++ b/app/Models/Category.php @@ -94,6 +94,11 @@ class FreshRSS_Category extends Minz_Model { return $this->nbNotRead; } + /** @return array<int,mixed> */ + public function curlOptions(): array { + return []; // TODO (e.g., credentials for Dynamic OPML) + } + /** * @return array<int,FreshRSS_Feed> * @throws Minz_ConfigurationNamespaceException @@ -158,11 +163,10 @@ class FreshRSS_Category extends Minz_Model { } /** - * @param array<string> $attributes * @throws FreshRSS_Context_Exception */ - public static function cacheFilename(string $url, array $attributes): string { - $simplePie = customSimplePie($attributes); + public function cacheFilename(string $url): string { + $simplePie = customSimplePie($this->attributes(), $this->curlOptions()); $filename = $simplePie->get_cache_filename($url); return CACHE_PATH . '/' . $filename . '.opml.xml'; } @@ -173,9 +177,8 @@ class FreshRSS_Category extends Minz_Model { return false; } $ok = true; - $attributes = []; //TODO - $cachePath = self::cacheFilename($url, $attributes); - $opml = httpGet($url, $cachePath, 'opml', $attributes); + $cachePath = $this->cacheFilename($url); + $opml = httpGet($url, $cachePath, 'opml', $this->attributes(), $this->curlOptions()); if ($opml == '') { Minz_Log::warning('Error getting dynamic OPML for category ' . $this->id() . '! ' . SimplePie_Misc::url_remove_credentials($url)); diff --git a/app/Models/Entry.php b/app/Models/Entry.php index c782f4c94..e5e859a1d 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -690,12 +690,18 @@ HTML; } /** - * @param array<string,mixed> $attributes + * @param string $url Overridden URL. Will default to the entry URL. * @throws Minz_Exception */ - public static function getContentByParsing(string $url, string $path, array $attributes = [], int $maxRedirs = 3): string { - $cachePath = FreshRSS_Feed::cacheFilename($url, $attributes, FreshRSS_Feed::KIND_HTML_XPATH); - $html = httpGet($url, $cachePath, 'html', $attributes); + public function getContentByParsing(string $url = '', int $maxRedirs = 3): string { + $url = $url ?: htmlspecialchars_decode($this->link(), ENT_QUOTES); + $feed = $this->feed(); + if ($url === '' || $feed === null || $feed->pathEntries() === '') { + return ''; + } + + $cachePath = $feed->cacheFilename($url . '#' . $feed->pathEntries()); + $html = httpGet($url, $cachePath, 'html'); if (strlen($html) > 0) { $doc = new DOMDocument(); $doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING); @@ -709,7 +715,7 @@ HTML; $refresh = preg_replace('/^[0-9.; ]*\s*(url\s*=)?\s*/i', '', trim($meta->getAttribute('content'))); $refresh = SimplePie_Misc::absolutize_url($refresh, $url); if ($refresh != false && $refresh !== $url) { - return self::getContentByParsing($refresh, $path, $attributes, $maxRedirs - 1); + return $this->getContentByParsing($refresh, $maxRedirs - 1); } } } @@ -724,11 +730,12 @@ HTML; } $content = ''; - $nodes = $xpath->query((new Gt\CssXPath\Translator($path))->asXPath()); + $nodes = $xpath->query((new Gt\CssXPath\Translator($feed->pathEntries()))->asXPath()); if ($nodes != false) { + $path_entries_filter = $feed->attributeString('path_entries_filter'); foreach ($nodes as $node) { - if (!empty($attributes['path_entries_filter'])) { - $filterednodes = $xpath->query((new Gt\CssXPath\Translator($attributes['path_entries_filter']))->asXPath(), $node) ?: []; + if ($path_entries_filter != null) { + $filterednodes = $xpath->query((new Gt\CssXPath\Translator($path_entries_filter))->asXPath(), $node) ?: []; foreach ($filterednodes as $filterednode) { if ($filterednode->parentNode === null) { continue; @@ -760,11 +767,7 @@ HTML; } else { try { // The article is not yet in the database, so let’s fetch it - $fullContent = self::getContentByParsing( - htmlspecialchars_decode($this->link(), ENT_QUOTES), - htmlspecialchars_decode($feed->pathEntries(), ENT_QUOTES), - $feed->attributes() - ); + $fullContent = $this->getContentByParsing(); if ('' !== $fullContent) { $fullContent = "<!-- FULLCONTENT start //-->{$fullContent}<!-- FULLCONTENT end //-->"; $originalContent = $this->originalContent(); diff --git a/app/Models/Feed.php b/app/Models/Feed.php index b8425e86b..a957c8d10 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -174,6 +174,16 @@ class FreshRSS_Feed extends Minz_Model { ]; } } + + /** @return array<int,mixed> */ + public function curlOptions(): array { + $curl_options = []; + if ($this->httpAuth !== '') { + $curl_options[CURLOPT_USERPWD] = htmlspecialchars_decode($this->httpAuth, ENT_QUOTES); + } + return $curl_options; + } + public function inError(): bool { return $this->error; } @@ -348,11 +358,8 @@ class FreshRSS_Feed extends Minz_Model { Minz_Exception::ERROR ); } else { + $simplePie = customSimplePie($this->attributes(), $this->curlOptions()); $url = htmlspecialchars_decode($this->url, ENT_QUOTES); - if ($this->httpAuth != '') { - $url = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $url) ?? ''; - } - $simplePie = customSimplePie($this->attributes()); if (substr($url, -11) === '#force_feed') { $simplePie->force_feed(true); $url = substr($url, 0, -11); @@ -636,16 +643,12 @@ class FreshRSS_Feed extends Minz_Model { return null; } $feedSourceUrl = htmlspecialchars_decode($this->url, ENT_QUOTES); - if ($this->httpAuth != '') { - $feedSourceUrl = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $feedSourceUrl); - } if ($feedSourceUrl == null) { return null; } - $cachePath = FreshRSS_Feed::cacheFilename($feedSourceUrl, $this->attributes(), $this->kind()); $httpAccept = 'json'; - $json = httpGet($feedSourceUrl, $cachePath, $httpAccept, $this->attributes()); + $json = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions()); if (strlen($json) <= 0) { return null; } @@ -672,9 +675,6 @@ class FreshRSS_Feed extends Minz_Model { return null; } $feedSourceUrl = htmlspecialchars_decode($this->url, ENT_QUOTES); - if ($this->httpAuth != '') { - $feedSourceUrl = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $feedSourceUrl); - } if ($feedSourceUrl == null) { return null; } @@ -698,9 +698,8 @@ class FreshRSS_Feed extends Minz_Model { return null; } - $cachePath = FreshRSS_Feed::cacheFilename($feedSourceUrl, $this->attributes(), $this->kind()); - $html = httpGet($feedSourceUrl, $cachePath, - $this->kind() === FreshRSS_Feed::KIND_XML_XPATH ? 'xml' : 'html', $this->attributes()); + $httpAccept = $this->kind() === FreshRSS_Feed::KIND_XML_XPATH ? 'xml' : 'html'; + $html = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions()); if (strlen($html) <= 0) { return null; } @@ -892,15 +891,16 @@ class FreshRSS_Feed extends Minz_Model { } /** - * @param array<string,mixed> $attributes + * @param string $url Overridden URL. Will default to the feed URL. * @throws FreshRSS_Context_Exception */ - public static function cacheFilename(string $url, array $attributes, int $kind = FreshRSS_Feed::KIND_RSS): string { - $simplePie = customSimplePie($attributes); + public function cacheFilename(string $url = ''): string { + $simplePie = customSimplePie($this->attributes(), $this->curlOptions()); + $url = $url ?: htmlspecialchars_decode($this->url); $filename = $simplePie->get_cache_filename($url); - if ($kind === FreshRSS_Feed::KIND_HTML_XPATH) { + if ($this->kind === FreshRSS_Feed::KIND_HTML_XPATH) { return CACHE_PATH . '/' . $filename . '.html'; - } elseif ($kind === FreshRSS_Feed::KIND_XML_XPATH) { + } elseif ($this->kind === FreshRSS_Feed::KIND_XML_XPATH) { return CACHE_PATH . '/' . $filename . '.xml'; } else { return CACHE_PATH . '/' . $filename . '.spc'; @@ -908,12 +908,12 @@ class FreshRSS_Feed extends Minz_Model { } public function clearCache(): bool { - return @unlink(FreshRSS_Feed::cacheFilename($this->url, $this->attributes(), $this->kind)); + return @unlink($this->cacheFilename()); } /** @return int|false */ public function cacheModifiedTime() { - $filename = FreshRSS_Feed::cacheFilename($this->url, $this->attributes(), $this->kind); + $filename = $this->cacheFilename(); clearstatcache(true, $filename); return @filemtime($filename); } diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 880b4e65a..6669b36dc 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -303,9 +303,10 @@ function sensitive_log($log) { /** * @param array<string,mixed> $attributes + * @param array<int,mixed> $curl_options * @throws FreshRSS_Context_Exception */ -function customSimplePie(array $attributes = array()): SimplePie { +function customSimplePie(array $attributes = [], array $curl_options = []): SimplePie { $limits = FreshRSS_Context::systemConf()->limits; $simplePie = new SimplePie(); $simplePie->set_useragent(FRESHRSS_USERAGENT); @@ -318,7 +319,7 @@ function customSimplePie(array $attributes = array()): SimplePie { $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : (int)$attributes['timeout']; $simplePie->set_timeout($feed_timeout > 0 ? $feed_timeout : $limits['timeout']); - $curl_options = FreshRSS_Context::systemConf()->curl_options; + $curl_options = array_replace(FreshRSS_Context::systemConf()->curl_options, $curl_options); if (isset($attributes['ssl_verify'])) { $curl_options[CURLOPT_SSL_VERIFYHOST] = $attributes['ssl_verify'] ? 2 : 0; $curl_options[CURLOPT_SSL_VERIFYPEER] = (bool)$attributes['ssl_verify']; @@ -482,8 +483,9 @@ function enforceHttpEncoding(string $html, string $contentType = ''): string { /** * @param string $type {html,json,opml,xml} * @param array<string,mixed> $attributes + * @param array<int,mixed> $curl_options */ -function httpGet(string $url, string $cachePath, string $type = 'html', array $attributes = []): string { +function httpGet(string $url, string $cachePath, string $type = 'html', array $attributes = [], array $curl_options = []): string { $limits = FreshRSS_Context::systemConf()->limits; $feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : intval($attributes['timeout']); @@ -548,6 +550,9 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a curl_setopt($ch, CURLOPT_SSL_CIPHER_LIST, 'DEFAULT@SECLEVEL=1'); } } + + curl_setopt_array($ch, $curl_options); + $body = curl_exec($ch); $c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE); $c_content_type = '' . curl_getinfo($ch, CURLINFO_CONTENT_TYPE); |
