From 0e6ad01dbfc396bee63f2e100df75bcd6488ed16 Mon Sep 17 00:00:00 2001 From: Kiblyn11 <4669951+Kiblyn11@users.noreply.github.com> Date: Wed, 17 Feb 2021 21:50:25 +0100 Subject: Fix: handle very big feed (#3416) * fix: handle big xml files which cause out of memory exceptions by working with chunks in cleanMd5 function (because of preg_replace) and parse (because of xml_parse) * Review * Fixes in error handling (case of the last call to xml_parse, case of error during fopen, break in case of XML error...) * Takes advantage of the chunking for computing the cache hash * Larger chunks of 1MB Co-authored-by: e Co-authored-by: Alexandre Alapetite --- lib/SimplePie/SimplePie.php | 24 ++++++++++++++++++------ lib/SimplePie/SimplePie/Parser.php | 23 ++++++++++++++++++++--- 2 files changed, 38 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/SimplePie/SimplePie.php b/lib/SimplePie/SimplePie.php index bddf67645..c5a036c2d 100644 --- a/lib/SimplePie/SimplePie.php +++ b/lib/SimplePie/SimplePie.php @@ -1322,12 +1322,24 @@ class SimplePie function cleanMd5($rss) { - return md5(preg_replace(array( - '#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+#', - '#<(media:starRating|media:statistics) [^/<>]+/>#', - '##s', - ), '', $rss)); - + //Process by chunks not to use too much memory + if (($stream = fopen('php://temp', 'r+')) && + fwrite($stream, $rss) && + rewind($stream)) + { + $ctx = hash_init('md5'); + while ($stream_data = fread($stream, 1048576)) + { + hash_update($ctx, preg_replace([ + '#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+#', + '#<(media:starRating|media:statistics) [^/<>]+/>#', + '##s', + ], '', $stream_data)); + } + fclose($stream); + return hash_final($ctx); + } + return ''; } /** diff --git a/lib/SimplePie/SimplePie/Parser.php b/lib/SimplePie/SimplePie/Parser.php index ddaba46fd..fca10335c 100644 --- a/lib/SimplePie/SimplePie/Parser.php +++ b/lib/SimplePie/SimplePie/Parser.php @@ -181,12 +181,29 @@ class SimplePie_Parser xml_set_element_handler($xml, 'tag_open', 'tag_close'); // Parse! - if (!xml_parse($xml, $data, true)) + if (($stream = fopen('php://temp', 'r+')) && + fwrite($stream, $data) && + rewind($stream)) + { + //Parse by chunks not to use too much memory + do + { + $stream_data = fread($stream, 1048576); + if (!xml_parse($xml, $stream_data === false ? '' : $stream_data, feof($stream))) + { + $this->error_code = xml_get_error_code($xml); + $this->error_string = xml_error_string($this->error_code); + $return = false; + break; + } + } while (!feof($stream)); + fclose($stream); + } + else { - $this->error_code = xml_get_error_code($xml); - $this->error_string = xml_error_string($this->error_code); $return = false; } + $this->current_line = xml_get_current_line_number($xml); $this->current_column = xml_get_current_column_number($xml); $this->current_byte = xml_get_current_byte_index($xml); -- cgit v1.2.3