aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2026-01-10 23:38:00 +0100
committerGravatar GitHub <noreply@github.com> 2026-01-10 23:38:00 +0100
commit7573fee4f0949cce53af376a97837c32e50a5568 (patch)
tree96c6b6885f4ef8594b271b5461bcfe78554ba3da
parenta5bbd679d9716dc0a2f4892efc46c403590845c0 (diff)
Fix MySQL transliterator_transliterate fallback (#8427)
The string syntax of `strtr()` cannot handle mutibytes characters, so need to be rewritten using an array map approach. Extend the fallback replacements to include the Windows/ISO charsets of the latin languages for which we have a translation
-rw-r--r--app/Models/DatabaseDAO.php42
-rw-r--r--tests/app/Models/DatabaseDAOTest.php116
2 files changed, 154 insertions, 4 deletions
diff --git a/app/Models/DatabaseDAO.php b/app/Models/DatabaseDAO.php
index e419e5892..f98803537 100644
--- a/app/Models/DatabaseDAO.php
+++ b/app/Models/DatabaseDAO.php
@@ -495,10 +495,44 @@ SQL;
return $transliterated;
}
}
- return strtolower(strtr($str,
- 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÖØòóôõöøÈÉÊËèéêëÇçÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ',
- 'AAAAAAaaaaaaOOOOOOooooooEEEEeeeeCcIIIIiiiiUUUUuuuuyNn'
- ));
+ // Fallback covering only Latin: Windows-1252 / ISO-8859-15 / ISO-8859-1, Windows-1250 / ISO-8859-2, Windows-1257 / ISO-8859-13, Windows-1254 / ISO-8859-9
+ // phpcs:disable PSR12.Operators.OperatorSpacing.NoSpaceBefore, PSR12.Operators.OperatorSpacing.NoSpaceAfter, Squiz.WhiteSpace.OperatorSpacing.NoSpaceBefore, Squiz.WhiteSpace.OperatorSpacing.NoSpaceAfter
+ $replacements = [
+ 'A' => 'a', 'À'=>'a', 'Á'=>'a', 'Â'=>'a', 'Ä'=>'a', 'Ã'=>'a', 'Å'=>'a', 'Ă'=>'a', 'Ą'=>'a', 'Ā'=>'a',
+ 'à'=>'a', 'á'=>'a', 'â'=>'a', 'ä'=>'a', 'ã'=>'a', 'å'=>'a', 'ă'=>'a', 'ą'=>'a', 'ā'=>'a',
+ 'B' => 'b',
+ 'C' => 'c', 'Ç'=>'c', 'Ć'=>'c', 'Č'=>'c', 'ç'=>'c', 'ć'=>'c', 'č'=>'c',
+ 'D' => 'd', 'Ď'=>'d', 'Đ'=>'d', 'ď'=>'d', 'đ'=>'d',
+ 'E' => 'e', 'È'=>'e', 'É'=>'e', 'Ê'=>'e', 'Ë'=>'e', 'Ę'=>'e', 'Ě'=>'e', 'Ē'=>'e', 'Ė'=>'e',
+ 'è'=>'e', 'é'=>'e', 'ê'=>'e', 'ë'=>'e', 'ę'=>'e', 'ě'=>'e', 'ē'=>'e', 'ė'=>'e',
+ 'F' => 'f',
+ 'G' => 'g', 'Ğ'=>'g', 'Ģ'=>'g', 'ğ'=>'g', 'ģ'=>'g',
+ 'H' => 'h',
+ 'I' => 'i', 'Ì'=>'i', 'Í'=>'i', 'Î'=>'i', 'Ï'=>'i', 'İ'=>'i', 'Ī'=>'i', 'Į'=>'i',
+ 'ì'=>'i', 'í'=>'i', 'î'=>'i', 'ï'=>'i', 'ı'=>'i', 'ī'=>'i', 'į'=>'i',
+ 'J' => 'j',
+ 'K' => 'k', 'Ķ'=>'k', 'ķ'=>'k',
+ 'L' => 'l', 'Ĺ'=>'l', 'Ľ'=>'l', 'Ł'=>'l', 'Ļ'=>'l', 'ĺ'=>'l', 'ľ'=>'l', 'ł'=>'l', 'ļ'=>'l',
+ 'M' => 'm',
+ 'N' => 'n', 'Ñ'=>'n', 'Ń'=>'n', 'Ň'=>'n', 'Ņ'=>'n', 'ñ'=>'n', 'ń'=>'n', 'ň'=>'n', 'ņ'=>'n',
+ 'O' => 'o', 'Ò'=>'o', 'Ó'=>'o', 'Ô'=>'o', 'Ö'=>'o', 'Õ'=>'o', 'Ø'=>'o', 'Ő'=>'o', 'ò'=>'o', 'ó'=>'o', 'ô'=>'o', 'ö'=>'o', 'õ'=>'o', 'ø'=>'o', 'ő'=>'o',
+ 'P' => 'p',
+ 'Q' => 'q',
+ 'R' => 'r', 'Ŕ'=>'r', 'Ř'=>'r', 'ŕ'=>'r', 'ř'=>'r',
+ 'S' => 's', 'Ś'=>'s', 'Š'=>'s', 'Ş'=>'s', 'ß'=>'ss', 'ś'=>'s', 'š'=>'s', 'ş'=>'s',
+ 'T' => 't', 'Ť'=>'t', 'Ţ'=>'t', 'ť'=>'t', 'ţ'=>'t',
+ 'U' => 'u', 'Ù'=>'u', 'Ú'=>'u', 'Û'=>'u', 'Ü'=>'u', 'Ů'=>'u', 'Ű'=>'u', 'Ū'=>'u', 'Ų'=>'u',
+ 'ù'=>'u', 'ú'=>'u', 'û'=>'u', 'ü'=>'u', 'ů'=>'u', 'ű'=>'u', 'ū'=>'u', 'ų'=>'u',
+ 'V' => 'v',
+ 'W' => 'w',
+ 'X' => 'x',
+ 'Y' => 'y', 'Ý'=>'y', 'Ÿ'=>'y', 'ý'=>'y', 'ÿ'=>'y',
+ 'Z' => 'z', 'Ź'=>'z', 'Ż'=>'z', 'Ž'=>'z', 'ź'=>'z', 'ż'=>'z', 'ž'=>'z',
+ 'Æ'=>'ae', 'æ'=>'ae',
+ 'Œ'=>'oe', 'œ'=>'oe',
+ ];
+ // phpcs:enable PSR12.Operators.OperatorSpacing.NoSpaceBefore, PSR12.Operators.OperatorSpacing.NoSpaceAfter, Squiz.WhiteSpace.OperatorSpacing.NoSpaceBefore, Squiz.WhiteSpace.OperatorSpacing.NoSpaceAfter
+ return strtr($str, $replacements);
}
/**
diff --git a/tests/app/Models/DatabaseDAOTest.php b/tests/app/Models/DatabaseDAOTest.php
new file mode 100644
index 000000000..1c619aa5f
--- /dev/null
+++ b/tests/app/Models/DatabaseDAOTest.php
@@ -0,0 +1,116 @@
+<?php
+declare(strict_types=1);
+
+use PHPUnit\Framework\Attributes\DataProvider;
+
+final class DatabaseDAOTest extends \PHPUnit\Framework\TestCase {
+
+ /** @return list<array{string,string,bool,bool}> */
+ public static function provideStrilikeCommon(): array {
+ return [
+ ['abc', 'abc', false, true],
+ ['AbC', 'aBc', false, true],
+ ['zabc', 'abc', false, false],
+ ['abcd', 'abc', false, false],
+ ['aéc', 'ac', false, false],
+ ['abcd', 'bc', true, true],
+ ['abcd', 'BC', true, true],
+ ['aßc', 'ß', true, true],
+ ['aéc', 'é', true, true],
+ ['Été', 'Ét', true, true],
+ ['aßc', 'ac', true, false],
+ ['ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz', false, true],
+ ['abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', true, true],
+ ];
+ }
+
+ /** @return list<array{string,string,bool,bool}> */
+ public static function provideStrilikeAccents(): array {
+ return [
+ ['café', 'cafè', false, false],
+ ['Été', 'Eté', false, false],
+ ['Été', 'Et', true, false],
+ ];
+ }
+
+ /** @return list<array{string,string,bool,bool}> */
+ public static function provideStrilikeNoAccents(): array {
+ return [
+ ['café', 'cafè', false, true],
+ ['Été', 'Eté', false, true],
+ ['Été', 'Et', true, true],
+ ];
+ }
+
+ /** @return list<array{string,string,bool,bool}> */
+ public static function provideStrilikeAccentsCasing(): array {
+ return [
+ ['Été', 'été', false, true],
+ ['AÎNÉE', 'aîné', true, true],
+ ['AÎNÉ', 'aine', false, false],
+ ['AÎNÉE', 'aine', true, false],
+ ];
+ }
+
+ /** @return list<array{string,string,bool,bool}> */
+ public static function provideStrilikeUnicodeCasing(): array {
+ return [
+ ['ČĆĐŠŽ', 'čćđšž', false, true], // Croatian
+ ['ÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ', 'áčďéěíňóřšťúůýž', false, true], // Czech
+ ['ÆØÅ', 'æøå', false, true], // Danish
+ ['ŠŽÕÄÖÜ', 'šžõäöü', false, true], // Estonian
+ ['ÄÖ', 'äö', false, true], // Finnish
+ ['ÀÂÆÇÈÉÊËÎÏÔŒÙÛÜŸ', 'àâæçèéêëîïôœùûüÿ', false, true], // French
+ ['ÄÖÜ', 'äöü', false, true], // German
+ ['ΑΆΒΓΔΕΈΖΗΉΘΙΊΪΚΛΜΝΞΟΌΠΡΣΤΥΎΫΦΧΨΩΏ', 'αάβγδεέζηήθιίϊκλμνξοόπρστυύϋφχψωώ', false, true], // Greek
+ ['ÁÉÍÓÖŐÚÜŰ', 'áéíóöőúüű', false, true], // Hungarian
+ ['ÁÉÍÓÚ', 'áéíóú', false, true], // Irish
+ ['ÀÈÉÌÒÓÙ', 'àèéìòóù', false, true], // Italian
+ ['ĀČĒĢĪĶĻŅŠŪŽ', 'āčēģīķļņšūž', false, true], // Latvian
+ ['ĄČĘĖĮŠŲŪŽ', 'ąčęėįšųūž', false, true], // Lithuanian
+ ['ĊĠĦŻ', 'ċġħż', false, true], // Maltese
+ ['ĄĆĘŁŃÓŚŹŻ', 'ąćęłńóśźż', false, true], // Polish
+ ['ÁÂÃÇÉÍÓÕÚ', 'áâãçéíóõú', false, true], // Portuguese
+ ['ĂÂÎȘȚ', 'ăâîșț', false, true], // Romanian
+ ['ÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ', 'áäčďéíĺľňóôŕšťúýž', false, true], // Slovak
+ ['ČŠŽ', 'čšž', false, true], // Slovenian
+ ['ÁÉÍÑÓÚÜ', 'áéíñóúü', false, true], // Spanish
+ ['ÅÄÖ', 'åäö', false, true], // Swedish
+ ];
+ }
+
+ /** @return list<array{string,string,bool,bool}> */
+ public static function provideStrilikeNoUnicodeCasing(): array {
+ return [
+ ['café', 'cafè', false, false],
+ ['café', 'Café', true, true],
+ ['Été', 'été', true, false],
+ ];
+ }
+
+ #[DataProvider('provideStrilikeCommon')]
+ #[DataProvider('provideStrilikeNoAccents')]
+ #[DataProvider('provideStrilikeUnicodeCasing')]
+ public static function test_strilike_MySQL(string $haystack, string $needle, bool $contains, bool $expected): void {
+ if (!function_exists('transliterator_transliterate') && str_contains($haystack, 'α')) {
+ self::markTestSkipped('transliterator_transliterate function not available to handle e.g. Greek.');
+ return; // @phpstan-ignore deadCode.unreachable
+ }
+ self::assertSame($expected, FreshRSS_DatabaseDAO::strilike($haystack, $needle, $contains));
+ }
+
+ #[DataProvider('provideStrilikeCommon')]
+ #[DataProvider('provideStrilikeAccents')]
+ #[DataProvider('provideStrilikeAccentsCasing')]
+ #[DataProvider('provideStrilikeUnicodeCasing')]
+ public static function test_strilike_PGSQL(string $haystack, string $needle, bool $contains, bool $expected): void {
+ self::assertSame($expected, FreshRSS_DatabaseDAOPGSQL::strilike($haystack, $needle, $contains));
+ }
+
+ #[DataProvider('provideStrilikeCommon')]
+ #[DataProvider('provideStrilikeAccents')]
+ #[DataProvider('provideStrilikeNoUnicodeCasing')]
+ public static function test_strilike_SQLite(string $haystack, string $needle, bool $contains, bool $expected): void {
+ self::assertSame($expected, FreshRSS_DatabaseDAOSQLite::strilike($haystack, $needle, $contains));
+ }
+}