diff options
| author | 2025-11-02 00:28:35 +0100 | |
|---|---|---|
| committer | 2025-11-02 00:28:35 +0100 | |
| commit | 500d05f3c5ec3a3dffa7791f7447bc0d31d6f7e0 (patch) | |
| tree | a59a20582ecfa1ba6fc8d3df74b8d9edd3b901d8 /lib/simplepie | |
| parent | baf84575d4aa3fa7a73950cd2e91059b5f651906 (diff) | |
Implement whitelist for SimplePie sanitizer (#7924)
* Implement whitelist for SimplePie sanitizer
ref: https://github.com/FreshRSS/FreshRSS/pull/7770#issuecomment-3140334326
https://github.com/FreshRSS/simplepie/pull/53
https://github.com/simplepie/simplepie/pull/947
* Remove `<plaintext>` from whitelist
* Improve order
* Remove some tags from whitelist
* Revert partially
* sync
* Display contents of `<noscript>` and `<noembed>`
* sync
* Allow use of `<track>`
* sync again
* Sync to SimplePie fork
https://github.com/FreshRSS/simplepie/pull/53
* Alphabetic order
* Reduce list of stripped attributes
* Temporarily strip some attributes
---------
Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
Diffstat (limited to 'lib/simplepie')
| -rw-r--r-- | lib/simplepie/simplepie/CHANGELOG.md | 14 | ||||
| -rw-r--r-- | lib/simplepie/simplepie/phpcs.xml | 2 | ||||
| -rw-r--r-- | lib/simplepie/simplepie/src/Sanitize.php | 115 | ||||
| -rw-r--r-- | lib/simplepie/simplepie/src/SimplePie.php | 67 |
4 files changed, 191 insertions, 7 deletions
diff --git a/lib/simplepie/simplepie/CHANGELOG.md b/lib/simplepie/simplepie/CHANGELOG.md index 4d51eecca..9e0aeca55 100644 --- a/lib/simplepie/simplepie/CHANGELOG.md +++ b/lib/simplepie/simplepie/CHANGELOG.md @@ -7,8 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased](https://github.com/simplepie/simplepie/compare/1.9.0...master) +### Added + Nothing yet. +### Changed + +Nothing yet. + +### Fixed + +Nothing yet. + +### Deprecated + +- `SimplePie\SimplePie::set_item_limit()` is deprecated since it only affects multi-feed mode, which has been deprecated in SimplePie 1.9 (by @jtojnar in [#954](https://github.com/simplepie/simplepie/pull/954)) + ## [1.9.0](https://github.com/simplepie/simplepie/compare/1.8.1...1.9.0) - 2025-09-12 ### Added diff --git a/lib/simplepie/simplepie/phpcs.xml b/lib/simplepie/simplepie/phpcs.xml index 6ec1797bf..0dfa9d9cb 100644 --- a/lib/simplepie/simplepie/phpcs.xml +++ b/lib/simplepie/simplepie/phpcs.xml @@ -23,7 +23,7 @@ <rule ref="PSR1.Classes.ClassDeclaration.MissingNamespace"> <exclude-pattern>./library/</exclude-pattern> </rule> - <rule ref="Squiz.Classes.ValidClassName.NotCamelCaps"> + <rule ref="Squiz.Classes.ValidClassName.NotPascalCase"> <exclude-pattern>./library/</exclude-pattern> </rule> </ruleset> diff --git a/lib/simplepie/simplepie/src/Sanitize.php b/lib/simplepie/simplepie/src/Sanitize.php index c8aa2dce6..df1176b75 100644 --- a/lib/simplepie/simplepie/src/Sanitize.php +++ b/lib/simplepie/simplepie/src/Sanitize.php @@ -50,6 +50,14 @@ class Sanitize implements RegistryAware public $strip_attributes = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc']; /** @var string[] */ public $rename_attributes = []; + /** @var array<string, string[]> */ + public $allowed_html_elements_with_attributes = []; + /** @var string[] */ + public $allowed_html_attributes = []; + /** @var bool */ + public $allow_data_attr = true; + /** @var bool */ + public $allow_aria_attr = true; /** @var array<string, array<string, string>> */ public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']]; /** @var bool */ @@ -237,6 +245,42 @@ class Sanitize implements RegistryAware } /** + * @param array<string,string[]> $tags Set array of allowed HTML elements with their allowed attributes. + * Note that `<html>`, `<head>`, `<body>`, `<div>` are always allowed. + * Preferred over {@see Sanitize::strip_htmltags()}. + */ + public function allowed_html_elements_with_attributes(array $tags = []): void + { + $this->strip_htmltags = []; + $this->strip_attributes = []; + $this->allowed_html_elements_with_attributes = $tags; + } + + /** + * @param string[] $attrs Set default array of allowed HTML attributes. + */ + public function allowed_html_attributes(array $attrs = []): void + { + $this->allowed_html_attributes = $attrs; + } + + /** + * @param bool $allow Whether data-* should be allowed or not + */ + public function allow_data_attr(bool $allow = true): void + { + $this->allow_data_attr = $allow; + } + + /** + * @param bool $allow Whether aria-* should be allowed or not + */ + public function allow_aria_attr(bool $allow = true): void + { + $this->allow_aria_attr = $allow; + } + + /** * @return void */ public function encode_instead_of_strip(bool $encode = false) @@ -460,6 +504,16 @@ class Sanitize implements RegistryAware } } + if (!empty($this->rename_attributes)) { + foreach ($this->rename_attributes as $attrib) { + $this->rename_attr($attrib, $xpath); + } + } + + if (!empty($this->allowed_html_elements_with_attributes)) { + $this->enforce_allowed_html_nodes($document, $this->allow_data_attr, $this->allow_aria_attr); + } + // Strip out HTML tags and attributes that might cause various security problems. // Based on recommendations by Mark Pilgrim at: // https://web.archive.org/web/20110902041826/http://diveintomark.org:80/archives/2003/06/12/how_to_consume_rss_safely @@ -469,12 +523,6 @@ class Sanitize implements RegistryAware } } - if ($this->rename_attributes) { - foreach ($this->rename_attributes as $attrib) { - $this->rename_attr($attrib, $xpath); - } - } - if ($this->strip_attributes) { foreach ($this->strip_attributes as $attrib) { $this->strip_attr($attrib, $xpath); @@ -640,6 +688,61 @@ class Sanitize implements RegistryAware } /** + * Keep only allowed HTML elements (tags) and their allowed attributes. + */ + protected function enforce_allowed_html_nodes(\DOMNode $element, bool $allow_data_attr = true, bool $allow_aria_attr = true): void + { + if ($element instanceof \DOMElement) { + $tag = $element->tagName; + $parent = $element->parentNode; + if (!in_array($tag, ['html', 'head', 'body', 'div'], true) + && !isset($this->allowed_html_elements_with_attributes[$tag])) { + if (!in_array($tag, ['script', 'style', 'svg', 'math'], true)) { + // Preserve children inside the disallowed element + for ($i = $element->childNodes->length - 1; $i >= 0; $i--) { + $child = $element->childNodes->item($i); + if ($child === null) { + continue; + } + if ($child instanceof \DOMText) { + $child->nodeValue = htmlspecialchars($child->nodeValue ?? '', ENT_QUOTES, 'UTF-8'); + } + if ($parent !== null) { + $parent->insertBefore($child, $element); + } + $this->enforce_allowed_html_nodes($child, $allow_data_attr, $allow_aria_attr); + } + } + if ($parent !== null) { + $parent->removeChild($element); + return; + } + } + $allowed_attrs = array_merge($this->allowed_html_elements_with_attributes[$tag] ?? [], $this->allowed_html_attributes); + for ($i = $element->attributes->length - 1; $i >= 0; $i--) { + $attr = $element->attributes[$i]->nodeName; + // Skip data-*, aria-* if allowed + if (($allow_data_attr && str_starts_with($attr, 'data-')) + || ($allow_aria_attr && str_starts_with($attr, 'aria-'))) { + continue; + } + + if (!in_array($attr, $allowed_attrs, true)) { + $element->removeAttributeNode($element->attributes[$i]); + } + } + } + if ($element instanceof \DOMElement || $element instanceof \DOMDocument) { + for ($i = $element->childNodes->length - 1; $i >= 0; $i--) { + $child = $element->childNodes->item($i); + if ($child !== null) { + $this->enforce_allowed_html_nodes($child, $allow_data_attr, $allow_aria_attr); + } + } + } + } + + /** * @param int-mask-of<SimplePie::CONSTRUCT_*> $type * @return void */ diff --git a/lib/simplepie/simplepie/src/SimplePie.php b/lib/simplepie/simplepie/src/SimplePie.php index 35064ec70..38c61e841 100644 --- a/lib/simplepie/simplepie/src/SimplePie.php +++ b/lib/simplepie/simplepie/src/SimplePie.php @@ -664,6 +664,35 @@ class SimplePie public $rename_attributes = []; /** + * @var array<string,string[]> Stores allowed tags and attributes. + * Preferred over $strip_htmltags and $strip_attributes. + * Note that `<html>`, `<head>`, `<body>`, `<div>` are always allowed. + * @see SimplePie::allowed_html_elements_with_attributes() + * @access private + */ + public $allowed_html_elements_with_attributes = []; + + /** + * @var string[] Stores array of default allowed attributes. + * @see SimplePie::allowed_html_attributes() + * @access private + */ + public $allowed_html_attributes = []; + + /** + * @var bool Whether `data-*` attributes should be allowed or not + * @see SimplePie::allow_data_attr() + * @access private + */ + public $allow_data_attr = true; + /** + * @var bool Whether `aria-*` attributes should be allowed or not + * @see SimplePie::allow_aria_attr() + * @access private + */ + public $allow_aria_attr = true; + + /** * @var bool Should we throw exceptions, or use the old-style error property? * @access private */ @@ -1525,6 +1554,42 @@ class SimplePie } /** + * @param array<string,string[]> $tags Set array of allowed tags and attributes. + * Note that `<html>`, `<head>`, `<body>`, `<div>` are always allowed. + * Preferred over {@see SimplePie::allowed_html_attributes()} and {@see SimplePie::strip_attributes()}. + * + * Example: `['a' => ['href', 'title'], 'img' => ['src', 'alt']]` + */ + public function allowed_html_elements_with_attributes(array $tags = []): void + { + $this->sanitize->allowed_html_elements_with_attributes($tags); + } + + /** + * @param string[] $attrs Set default array of allowed attributes. + */ + public function allowed_html_attributes(array $attrs = []): void + { + $this->sanitize->allowed_html_attributes($attrs); + } + + /** + * @param bool $allow Whether `data-*` attributes should be allowed or not + */ + public function allow_data_attr(bool $allow = true): void + { + $this->sanitize->allow_data_attr($allow); + } + + /** + * @param bool $allow Whether `aria-*` attributes should be allowed or not + */ + public function allow_aria_attr(bool $allow = true): void + { + $this->sanitize->allow_aria_attr($allow); + } + + /** * @return void */ public function encode_instead_of_strip(bool $enable = true) @@ -1651,6 +1716,8 @@ class SimplePie /** * Set the limit for items returned per-feed with multifeeds * + * @deprecated since SimplePie 1.10.0, this does nothing outside multifeeds. + * * @param int $limit The maximum number of items to return. * @return void */ |
