diff options
| author | 2022-08-12 13:06:04 +0200 | |
|---|---|---|
| committer | 2022-08-12 13:06:04 +0200 | |
| commit | 96e0efa6f09271f34941f7078179921e194395ab (patch) | |
| tree | 76fe3f3aa014c1141c2ef7a46142958f788c5e8a /app/Models/Entry.php | |
| parent | aaf15fba7d3b4334e938956c1d1260a7cf76a4fa (diff) | |
Get content by parsing: fix base for protocol-relative URLs (#4500)
* Get content by parsing: fix base for protocol-relative URLs
* Guess missing URL scheme for base
* Light refactoring
Diffstat (limited to 'app/Models/Entry.php')
| -rw-r--r-- | app/Models/Entry.php | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/app/Models/Entry.php b/app/Models/Entry.php index e383f9060..0be353c28 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -547,9 +547,13 @@ class FreshRSS_Entry extends Minz_Model { } $base = $xpath->evaluate('normalize-space(//base/@href)'); - if ($base != false && is_string($base)) { - $url = $base; + if ($base == false || !is_string($base)) { + $base = $url; + } elseif (substr($base, 0, 2) === '//') { + //Protocol-relative URLs "//www.example.net" + $base = (parse_url($url, PHP_URL_SCHEME) ?? 'https') . ':' . $base; } + $content = ''; $nodes = $xpath->query(new Gt\CssXPath\Translator($path)); if ($nodes != false) { @@ -557,7 +561,7 @@ class FreshRSS_Entry extends Minz_Model { $content .= $doc->saveHtml($node) . "\n"; } } - $html = trim(sanitizeHTML($content, $url)); + $html = trim(sanitizeHTML($content, $base)); return $html; } else { throw new Exception(); |
