summaryrefslogtreecommitdiff
path: root/app/Utils/httpUtil.php
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2025-12-08 23:18:33 +0100
committerGravatar GitHub <noreply@github.com> 2025-12-08 23:18:33 +0100
commitec2663812470820dc802628f9cb1b768f4f26fc6 (patch)
tree8454f8fef024db4bc5513897d75acca132845a10 /app/Utils/httpUtil.php
parentfd9e2843f10058147d26d510471e5420ac8101a1 (diff)
Fix saveHTML() scrambling encoding in newer libxml2 (#8296)
fix https://github.com/FreshRSS/FreshRSS/pull/8279#issuecomment-3620674818
Diffstat (limited to 'app/Utils/httpUtil.php')
-rw-r--r--app/Utils/httpUtil.php18
1 files changed, 15 insertions, 3 deletions
diff --git a/app/Utils/httpUtil.php b/app/Utils/httpUtil.php
index e43891f34..f5cd95738 100644
--- a/app/Utils/httpUtil.php
+++ b/app/Utils/httpUtil.php
@@ -203,8 +203,8 @@ final class FreshRSS_http_Util {
}
}
if ($httpCharsetNormalized === 'UTF-8') {
- // Save encoding information as XML declaration
- return '<' . '?xml version="1.0" encoding="' . $httpCharsetNormalized . '" ?' . ">\n" . $html;
+ // Save encoding information as Unicode BOM
+ return "\xEF\xBB\xBF" . $html;
}
// Give up
return $html;
@@ -241,7 +241,19 @@ final class FreshRSS_http_Util {
$doc->documentElement->insertBefore($base, $doc->documentElement->firstChild);
}
}
- return $doc->saveHTML() ?: $html;
+
+ // Save the start of HTML because libxml2 saveHTML() risks scrambling it
+ $htmlPos = stripos($html, '<html');
+ $htmlStart = $htmlPos === false || $htmlPos > 512 ? '' : substr($html, 0, $htmlPos);
+
+ $html = $doc->saveHTML() ?: $html;
+ if ($htmlStart !== '' && !str_starts_with($html, $htmlStart)) {
+ // libxml2 saveHTML() risks removing Unicode BOM and XML declaration,
+ // which affects future detection of charset encoding, so manually restore it
+ $htmlPos = stripos($html, '<html');
+ $html = $htmlPos === false || $htmlPos > 512 ? $html : $htmlStart . substr($html, $htmlPos);
+ }
+ return $html;
}
/**