From 1a552bd60eab4a4b940d3896376b599e155d7da0 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Fri, 6 Sep 2024 09:35:58 +0200 Subject: Regex search (#6706) * Regex search fix https://github.com/FreshRSS/FreshRSS/issues/3549 * Fix PHPStan * Fix escape * Fix ungreedy * Initial support for regex search in PostgreSQL and MySQL * Improvements, support MySQL * Fix multiline * Add support for SQLite * A few tests * Added author: and inurl: support, documentation * author example * Remove \b for now * Disable regex sanitization for now * Fix getInurlRegex * getNotInurlRegex * Quotes for inurl: * Fix test * Fix quoted tags + regex for tags https://github.com/FreshRSS/FreshRSS/issues/6761 * Fix wrong regex detection * Add MariaDB * Fix logic * Increase requirements for MySQL and MariaDB Check support for multiline mode in MySQL * Remove sanitizeRegexes() * Allow searching HTML code Allow searching for instance `/
/`
Fix https://github.com/FreshRSS/FreshRSS/issues/6775#issuecomment-2331769883

* Doc regex search HTML

* Fix Doctype
---
 tests/app/Models/SearchTest.php | 174 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 171 insertions(+), 3 deletions(-)

(limited to 'tests/app')

diff --git a/tests/app/Models/SearchTest.php b/tests/app/Models/SearchTest.php
index 27943cdb2..a25adc160 100644
--- a/tests/app/Models/SearchTest.php
+++ b/tests/app/Models/SearchTest.php
@@ -124,10 +124,10 @@ class SearchTest extends PHPUnit\Framework\TestCase {
 	public static function provideInurlSearch(): array {
 		return [
 			['inurl:word1', ['word1'], null],
-			['inurl: word1', [], ['word1']],
+			['inurl: word1', null, ['word1']],
 			['inurl:123', ['123'], null],
 			['inurl:word1 word2', ['word1'], ['word2']],
-			['inurl:"word1 word2"', ['"word1'], ['word2"']],
+			['inurl:"word1 word2"', ['word1 word2'], null],
 			['inurl:word1 word2 inurl:word3', ['word1', 'word3'], ['word2']],
 			["inurl:word1 'word2 word3' word4", ['word1'], ['word2 word3', 'word4']],
 			['inurl:word1+word2', ['word1+word2'], null],
@@ -196,7 +196,7 @@ class SearchTest extends PHPUnit\Framework\TestCase {
 			['# word1', null, ['#', 'word1']],
 			['#123', ['123'], null],
 			['#word1 word2', ['word1'], ['word2']],
-			['#"word1 word2"', ['"word1'], ['word2"'],],
+			['#"word1 word2"', ['word1 word2'], null],
 			['#word1 #word2', ['word1', 'word2'], null],
 			["#word1 'word2 word3' word4", ['word1'], ['word2 word3', 'word4']],
 			['#word1+word2', ['word1 word2'], null]
@@ -442,4 +442,172 @@ class SearchTest extends PHPUnit\Framework\TestCase {
 			],
 		];
 	}
+
+	/**
+	 * @dataProvider provideRegexPostreSQL
+	 * @param array $values
+	 */
+	public function test__regex_postgresql(string $input, string $sql, array $values): void {
+		[$filterValues, $filterSearch] = FreshRSS_EntryDAOPGSQL::sqlBooleanSearch('e.', new FreshRSS_BooleanSearch($input));
+		self::assertEquals(trim($sql), trim($filterSearch));
+		self::assertEquals($values, $filterValues);
+	}
+
+	/** @return array> */
+	public function provideRegexPostreSQL(): array {
+		return [
+			[
+				'intitle:/^ab$/',
+				'(e.title ~ ? )',
+				['^ab$']
+			],
+			[
+				'intitle:/^ab$/i',
+				'(e.title ~* ? )',
+				['^ab$']
+			],
+			[
+				'intitle:/^ab$/m',
+				'(e.title ~ ? )',
+				['(?m)^ab$']
+			],
+			[
+				'intitle:/^ab\\M/',
+				'(e.title ~ ? )',
+				['^ab\\M']
+			],
+			[
+				'author:/^ab$/',
+				"(REPLACE(e.author, ';', '\n') ~ ? )",
+				['^ab$']
+			],
+			[
+				'inurl:/^ab$/',
+				'(e.link ~ ? )',
+				['^ab$']
+			],
+			[
+				'/^ab$/',
+				'((e.title ~ ? OR e.content ~ ?) )',
+				['^ab$', '^ab$']
+			],
+			[
+				'!/^ab$/',
+				'(NOT e.title ~ ? AND NOT e.content ~ ? )',
+				['^ab$', '^ab$']
+			],
+			[	// Not a regex
+				'inurl:https://example.net/test/',
+				'(e.link LIKE ? )',
+				['%https://example.net/test/%']
+			],
+			[	// Not a regex
+				'https://example.net/test/',
+				'((e.title LIKE ? OR e.content LIKE ?) )',
+				['%https://example.net/test/%', '%https://example.net/test/%']
+			],
+		];
+	}
+
+	/**
+	 * @dataProvider provideRegexMariaDB
+	 * @param array $values
+	 */
+	public function test__regex_mariadb(string $input, string $sql, array $values): void {
+		FreshRSS_DatabaseDAO::$dummyConnection = true;
+		FreshRSS_DatabaseDAO::setStaticVersion('11.4.3-MariaDB-ubu2404');
+		[$filterValues, $filterSearch] = FreshRSS_EntryDAO::sqlBooleanSearch('e.', new FreshRSS_BooleanSearch($input));
+		self::assertEquals(trim($sql), trim($filterSearch));
+		self::assertEquals($values, $filterValues);
+	}
+
+	/** @return array> */
+	public function provideRegexMariaDB(): array {
+		return [
+			[
+				'intitle:/^ab$/',
+				"(e.title REGEXP ? )",
+				['(?-i)^ab$']
+			],
+			[
+				'intitle:/^ab$/i',
+				"(e.title REGEXP ? )",
+				['(?i)^ab$']
+			],
+			[
+				'intitle:/^ab$/m',
+				"(e.title REGEXP ? )",
+				['(?-i)(?m)^ab$']
+			],
+		];
+	}
+
+	/**
+	 * @dataProvider provideRegexMySQL
+	 * @param array $values
+	 */
+	public function test__regex_mysql(string $input, string $sql, array $values): void {
+		FreshRSS_DatabaseDAO::$dummyConnection = true;
+		FreshRSS_DatabaseDAO::setStaticVersion('9.0.1');
+		[$filterValues, $filterSearch] = FreshRSS_EntryDAO::sqlBooleanSearch('e.', new FreshRSS_BooleanSearch($input));
+		self::assertEquals(trim($sql), trim($filterSearch));
+		self::assertEquals($values, $filterValues);
+	}
+
+	/** @return array> */
+	public function provideRegexMySQL(): array {
+		return [
+			[
+				'intitle:/^ab$/',
+				"(REGEXP_LIKE(e.title,?,'c') )",
+				['^ab$']
+			],
+			[
+				'intitle:/^ab$/i',
+				"(REGEXP_LIKE(e.title,?,'i') )",
+				['^ab$']
+			],
+			[
+				'intitle:/^ab$/m',
+				"(REGEXP_LIKE(e.title,?,'mc') )",
+				['^ab$']
+			],
+		];
+	}
+
+	/**
+	 * @dataProvider provideRegexSQLite
+	 * @param array $values
+	 */
+	public function test__regex_sqlite(string $input, string $sql, array $values): void {
+		[$filterValues, $filterSearch] = FreshRSS_EntryDAOSQLite::sqlBooleanSearch('e.', new FreshRSS_BooleanSearch($input));
+		self::assertEquals(trim($sql), trim($filterSearch));
+		self::assertEquals($values, $filterValues);
+	}
+
+	/** @return array> */
+	public function provideRegexSQLite(): array {
+		return [
+			[
+				'intitle:/^ab$/',
+				"(e.title REGEXP ? )",
+				['/^ab$/']
+			],
+			[
+				'intitle:/^ab$/i',
+				"(e.title REGEXP ? )",
+				['/^ab$/i']
+			],
+			[
+				'intitle:/^ab$/m',
+				"(e.title REGEXP ? )",
+				['/^ab$/m']
+			],
+			[
+				'intitle:/^ab\\b/',
+				'(e.title REGEXP ? )',
+				['/^ab\\b/']
+			],
+		];
+	}
 }
-- 
cgit v1.2.3