aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2020-05-21 11:56:55 +0200
committerGravatar GitHub <noreply@github.com> 2020-05-21 11:56:55 +0200
commitb906d79d61beaeed7474f33fb6dac01a63ca90b9 (patch)
tree346ad2c52d21d93b29f32a3f81e62c5728e643e3
parentd9a2889c3fa31315f2535ebd1b787d9c1f5fb2f8 (diff)
getContentByParsing follow HTML redirections (#2985)
* getContentByParsing follow HTML redirections Add the ability to follow HTML redirections in getContentByParsing: ```html <meta http-equiv="Refresh" content="1; url=https://example.net/article123.html" /> ``` * Better regex * Trim http-equiv
-rw-r--r--app/Models/Entry.php17
1 files changed, 16 insertions, 1 deletions
diff --git a/app/Models/Entry.php b/app/Models/Entry.php
index 2ce44c73d..b5328df90 100644
--- a/app/Models/Entry.php
+++ b/app/Models/Entry.php
@@ -352,7 +352,7 @@ class FreshRSS_Entry extends Minz_Model {
}
}
- public static function getContentByParsing($url, $path, $attributes = array()) {
+ public static function getContentByParsing($url, $path, $attributes = array(), $maxRedirs = 3) {
$system_conf = Minz_Configuration::get('system');
$limits = $system_conf->limits;
$feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']);
@@ -392,6 +392,21 @@ class FreshRSS_Entry extends Minz_Model {
if ($html) {
require_once(LIB_PATH . '/lib_phpQuery.php');
$doc = phpQuery::newDocument($html);
+
+ if ($maxRedirs > 0) {
+ //Follow any HTML redirection
+ $metas = $doc->find('meta[http-equiv][content]');
+ foreach ($metas as $meta) {
+ if (strtolower(trim($meta->getAttribute('http-equiv'))) === 'refresh') {
+ $refresh = preg_replace('/^[0-9.; ]*\s*(url\s*=)?\s*/i', '', trim($meta->getAttribute('content')));
+ $refresh = SimplePie_Misc::absolutize_url($refresh, $url);
+ if ($refresh != false && $refresh !== $url) {
+ return self::getContentByParsing($refresh, $path, $attributes, $maxRedirs - 1);
+ }
+ }
+ }
+ }
+
$content = $doc->find($path);
return trim(sanitizeHTML($content->__toString(), $url));
} else {