diff options
| author | 2020-05-21 11:56:55 +0200 | |
|---|---|---|
| committer | 2020-05-21 11:56:55 +0200 | |
| commit | b906d79d61beaeed7474f33fb6dac01a63ca90b9 (patch) | |
| tree | 346ad2c52d21d93b29f32a3f81e62c5728e643e3 /app | |
| parent | d9a2889c3fa31315f2535ebd1b787d9c1f5fb2f8 (diff) | |
getContentByParsing follow HTML redirections (#2985)
* getContentByParsing follow HTML redirections
Add the ability to follow HTML redirections in getContentByParsing:
```html
<meta http-equiv="Refresh" content="1; url=https://example.net/article123.html" />
```
* Better regex
* Trim http-equiv
Diffstat (limited to 'app')
| -rw-r--r-- | app/Models/Entry.php | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/app/Models/Entry.php b/app/Models/Entry.php index 2ce44c73d..b5328df90 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -352,7 +352,7 @@ class FreshRSS_Entry extends Minz_Model { } } - public static function getContentByParsing($url, $path, $attributes = array()) { + public static function getContentByParsing($url, $path, $attributes = array(), $maxRedirs = 3) { $system_conf = Minz_Configuration::get('system'); $limits = $system_conf->limits; $feed_timeout = empty($attributes['timeout']) ? 0 : intval($attributes['timeout']); @@ -392,6 +392,21 @@ class FreshRSS_Entry extends Minz_Model { if ($html) { require_once(LIB_PATH . '/lib_phpQuery.php'); $doc = phpQuery::newDocument($html); + + if ($maxRedirs > 0) { + //Follow any HTML redirection + $metas = $doc->find('meta[http-equiv][content]'); + foreach ($metas as $meta) { + if (strtolower(trim($meta->getAttribute('http-equiv'))) === 'refresh') { + $refresh = preg_replace('/^[0-9.; ]*\s*(url\s*=)?\s*/i', '', trim($meta->getAttribute('content'))); + $refresh = SimplePie_Misc::absolutize_url($refresh, $url); + if ($refresh != false && $refresh !== $url) { + return self::getContentByParsing($refresh, $path, $attributes, $maxRedirs - 1); + } + } + } + } + $content = $doc->find($path); return trim(sanitizeHTML($content->__toString(), $url)); } else { |
