aboutsummaryrefslogtreecommitdiff
path: root/app
diff options
context:
space:
mode:
authorGravatar Inverle <inverle@proton.me> 2025-06-22 00:09:18 +0200
committerGravatar GitHub <noreply@github.com> 2025-06-22 00:09:18 +0200
commit18b5c8ec6da64da90706643ffa52736a85a2ca59 (patch)
tree0b7ac0082c0a0aa86c0f7a4caccb616011576023 /app
parenta6948218fb1c66fe146c7651555e5a1f791c8112 (diff)
Handle redirects when scraping feed from HTML (#7654)
* Handle redirects when scraping feed from HTML * pass codesniffer * pass PHPStan * Optimize * Another approach relying on HTML base Standard way to save an HTML document with relative references * Fix case of existing HTML base which should not be overriden --------- Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
Diffstat (limited to 'app')
-rw-r--r--app/Controllers/extensionController.php2
-rw-r--r--app/Models/Category.php2
-rw-r--r--app/Models/Entry.php10
-rw-r--r--app/Models/Feed.php4
4 files changed, 11 insertions, 7 deletions
diff --git a/app/Controllers/extensionController.php b/app/Controllers/extensionController.php
index 5afa43bd8..ef63c38d2 100644
--- a/app/Controllers/extensionController.php
+++ b/app/Controllers/extensionController.php
@@ -48,7 +48,7 @@ class FreshRSS_extension_Controller extends FreshRSS_ActionController {
$cacheFile = CACHE_PATH . '/extension_list.json';
if (FreshRSS_Context::userConf()->retrieve_extension_list === true) {
if (!file_exists($cacheFile) || (time() - (filemtime($cacheFile) ?: 0) > 86400)) {
- $json = httpGet($extensionListUrl, $cacheFile, 'json');
+ $json = httpGet($extensionListUrl, $cacheFile, 'json')['body'];
} else {
$json = @file_get_contents($cacheFile) ?: '';
}
diff --git a/app/Models/Category.php b/app/Models/Category.php
index e883a99cf..554e002fb 100644
--- a/app/Models/Category.php
+++ b/app/Models/Category.php
@@ -188,7 +188,7 @@ class FreshRSS_Category extends Minz_Model {
}
$ok = true;
$cachePath = $this->cacheFilename($url);
- $opml = httpGet($url, $cachePath, 'opml', $this->attributes(), $this->curlOptions());
+ $opml = httpGet($url, $cachePath, 'opml', $this->attributes(), $this->curlOptions())['body'];
if ($opml == '') {
Minz_Log::warning('Error getting dynamic OPML for category ' . $this->id() . '! ' .
\SimplePie\Misc::url_remove_credentials($url));
diff --git a/app/Models/Entry.php b/app/Models/Entry.php
index bc5ed2279..66c05a830 100644
--- a/app/Models/Entry.php
+++ b/app/Models/Entry.php
@@ -845,7 +845,7 @@ HTML;
* @param string $url Overridden URL. Will default to the entry URL.
* @throws Minz_Exception
*/
- public function getContentByParsing(string $url = '', int $maxRedirs = 3): string {
+ public function getContentByParsing(string $url = '', int $maxRedirs = 4): string {
$url = $url ?: htmlspecialchars_decode($this->link(), ENT_QUOTES);
$feed = $this->feed();
if ($url === '' || $feed === null || $feed->pathEntries() === '') {
@@ -872,12 +872,16 @@ HTML;
}
$cachePath = $feed->cacheFilename($url . '#' . $feed->pathEntries());
- $html = httpGet($url, $cachePath, 'html', $feed->attributes(), $feed->curlOptions());
- if (strlen($html) > 0) {
+ $response = httpGet($url, $cachePath, 'html', $feed->attributes(), $feed->curlOptions());
+ $html = $response['body'];
+ if ($html !== '') {
$doc = new DOMDocument();
$doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
$xpath = new DOMXPath($doc);
+ // Account for HTTP redirections
+ $url = $response['effective_url'] ?: $url;
+ $maxRedirs -= $response['redirect_count'];
if ($maxRedirs > 0) {
//Follow any HTML redirection
$metas = $xpath->query('//meta[@content]') ?: [];
diff --git a/app/Models/Feed.php b/app/Models/Feed.php
index fc17c875f..3c5fed507 100644
--- a/app/Models/Feed.php
+++ b/app/Models/Feed.php
@@ -788,7 +788,7 @@ class FreshRSS_Feed extends Minz_Model {
}
$httpAccept = $this->kind() === FreshRSS_Feed::KIND_HTML_XPATH_JSON_DOTNOTATION ? 'html' : 'json';
- $content = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions());
+ $content = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions())['body'];
if (strlen($content) <= 0) {
return null;
}
@@ -846,7 +846,7 @@ class FreshRSS_Feed extends Minz_Model {
}
$httpAccept = $this->kind() === FreshRSS_Feed::KIND_XML_XPATH ? 'xml' : 'html';
- $html = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions());
+ $html = httpGet($feedSourceUrl, $this->cacheFilename(), $httpAccept, $this->attributes(), $this->curlOptions())['body'];
if (strlen($html) <= 0) {
return null;
}