aboutsummaryrefslogtreecommitdiff
path: root/app/Models/EntryDAO.php
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2024-09-06 09:35:58 +0200
committerGravatar GitHub <noreply@github.com> 2024-09-06 09:35:58 +0200
commit1a552bd60eab4a4b940d3896376b599e155d7da0 (patch)
treee3872dfc1bc51ed52a88bc22fc7582e858314a71 /app/Models/EntryDAO.php
parent35a7634e68d87ad4da46c96ee8066e8c681f8d18 (diff)
Regex search (#6706)
* Regex search fix https://github.com/FreshRSS/FreshRSS/issues/3549 * Fix PHPStan * Fix escape * Fix ungreedy * Initial support for regex search in PostgreSQL and MySQL * Improvements, support MySQL * Fix multiline * Add support for SQLite * A few tests * Added author: and inurl: support, documentation * author example * Remove \b for now * Disable regex sanitization for now * Fix getInurlRegex * getNotInurlRegex * Quotes for inurl: * Fix test * Fix quoted tags + regex for tags https://github.com/FreshRSS/FreshRSS/issues/6761 * Fix wrong regex detection * Add MariaDB * Fix logic * Increase requirements for MySQL and MariaDB Check support for multiline mode in MySQL * Remove sanitizeRegexes() * Allow searching HTML code Allow searching for instance `/<pre>/` Fix https://github.com/FreshRSS/FreshRSS/issues/6775#issuecomment-2331769883 * Doc regex search HTML * Fix Doctype
Diffstat (limited to 'app/Models/EntryDAO.php')
-rw-r--r--app/Models/EntryDAO.php116
1 files changed, 114 insertions, 2 deletions
diff --git a/app/Models/EntryDAO.php b/app/Models/EntryDAO.php
index 175df15c3..6a00e2108 100644
--- a/app/Models/EntryDAO.php
+++ b/app/Models/EntryDAO.php
@@ -27,6 +27,55 @@ class FreshRSS_EntryDAO extends Minz_ModelPdo {
return str_replace('INSERT INTO ', 'INSERT IGNORE INTO ', $sql);
}
+ /** @return array{pattern?:string,matchType?:string} */
+ protected static function regexToSql(string $regex): array {
+ if (preg_match('#^/(?P<pattern>.*)/(?P<matchType>[im]*)$#', $regex, $matches)) {
+ return $matches;
+ }
+ return [];
+ }
+
+ /** @param array<int|string> $values */
+ protected static function sqlRegex(string $expression, string $regex, array &$values): string {
+ // The implementation of this function is solely for MySQL and MariaDB
+ static $databaseDAOMySQL = null;
+ if ($databaseDAOMySQL === null) {
+ $databaseDAOMySQL = new FreshRSS_DatabaseDAO();
+ }
+
+ $matches = static::regexToSql($regex);
+ if (isset($matches['pattern'])) {
+ $matchType = $matches['matchType'] ?? '';
+ if ($databaseDAOMySQL->isMariaDB()) {
+ if (str_contains($matchType, 'm')) {
+ // multiline mode
+ $matches['pattern'] = '(?m)' . $matches['pattern'];
+ }
+ if (str_contains($matchType, 'i')) {
+ // case-insensitive match
+ $matches['pattern'] = '(?i)' . $matches['pattern'];
+ } else {
+ $matches['pattern'] = '(?-i)' . $matches['pattern'];
+ }
+ $values[] = $matches['pattern'];
+ return "{$expression} REGEXP ?";
+ } else { // MySQL
+ if (!str_contains($matchType, 'i')) {
+ // Case-sensitive matching
+ $matchType .= 'c';
+ }
+ $values[] = $matches['pattern'];
+ return "REGEXP_LIKE({$expression},?,'{$matchType}')";
+ }
+ }
+ return '';
+ }
+
+ /** Register any needed SQL function for the query, e.g. application-defined functions for SQLite */
+ protected function registerSqlFunctions(string $sql): void {
+ // Nothing to do for MySQL
+ }
+
private function updateToMediumBlob(): bool {
if ($this->pdo->dbType() !== 'mysql') {
return false;
@@ -910,24 +959,44 @@ SQL;
$values[] = "%{$author}%";
}
}
+ if ($filter->getAuthorRegex() !== null) {
+ foreach ($filter->getAuthorRegex() as $author) {
+ $sub_search .= 'AND ' . static::sqlRegex("REPLACE({$alias}author, ';', '\n')", $author, $values) . ' ';
+ }
+ }
if ($filter->getIntitle() !== null) {
foreach ($filter->getIntitle() as $title) {
$sub_search .= 'AND ' . $alias . 'title LIKE ? ';
$values[] = "%{$title}%";
}
}
+ if ($filter->getIntitleRegex() !== null) {
+ foreach ($filter->getIntitleRegex() as $title) {
+ $sub_search .= 'AND ' . static::sqlRegex($alias . 'title', $title, $values) . ' ';
+ }
+ }
if ($filter->getTags() !== null) {
foreach ($filter->getTags() as $tag) {
$sub_search .= 'AND ' . static::sqlConcat('TRIM(' . $alias . 'tags) ', " ' #'") . ' LIKE ? ';
$values[] = "%{$tag} #%";
}
}
+ if ($filter->getTagsRegex() !== null) {
+ foreach ($filter->getTagsRegex() as $tag) {
+ $sub_search .= 'AND ' . static::sqlRegex("REPLACE(REPLACE({$alias}tags, ' #', '#'), '#', '\n')", $tag, $values) . ' ';
+ }
+ }
if ($filter->getInurl() !== null) {
foreach ($filter->getInurl() as $url) {
$sub_search .= 'AND ' . $alias . 'link LIKE ? ';
$values[] = "%{$url}%";
}
}
+ if ($filter->getInurlRegex() !== null) {
+ foreach ($filter->getInurlRegex() as $url) {
+ $sub_search .= 'AND ' . static::sqlRegex($alias . 'link', $url, $values) . ' ';
+ }
+ }
if ($filter->getNotAuthor() !== null) {
foreach ($filter->getNotAuthor() as $author) {
@@ -935,29 +1004,49 @@ SQL;
$values[] = "%{$author}%";
}
}
+ if ($filter->getNotAuthorRegex() !== null) {
+ foreach ($filter->getNotAuthorRegex() as $author) {
+ $sub_search .= 'AND NOT ' . static::sqlRegex("REPLACE({$alias}author, ';', '\n')", $author, $values) . ' ';
+ }
+ }
if ($filter->getNotIntitle() !== null) {
foreach ($filter->getNotIntitle() as $title) {
$sub_search .= 'AND ' . $alias . 'title NOT LIKE ? ';
$values[] = "%{$title}%";
}
}
+ if ($filter->getNotIntitleRegex() !== null) {
+ foreach ($filter->getNotIntitleRegex() as $title) {
+ $sub_search .= 'AND NOT ' . static::sqlRegex($alias . 'title', $title, $values) . ' ';
+ }
+ }
if ($filter->getNotTags() !== null) {
foreach ($filter->getNotTags() as $tag) {
$sub_search .= 'AND ' . static::sqlConcat('TRIM(' . $alias . 'tags) ', " ' #'") . ' NOT LIKE ? ';
$values[] = "%{$tag} #%";
}
}
+ if ($filter->getNotTagsRegex() !== null) {
+ foreach ($filter->getNotTagsRegex() as $tag) {
+ $sub_search .= 'AND NOT ' . static::sqlRegex("REPLACE(REPLACE({$alias}tags, ' #', '#'), '#', '\n')", $tag, $values) . ' ';
+ }
+ }
if ($filter->getNotInurl() !== null) {
foreach ($filter->getNotInurl() as $url) {
$sub_search .= 'AND ' . $alias . 'link NOT LIKE ? ';
$values[] = "%{$url}%";
}
}
+ if ($filter->getNotInurlRegex() !== null) {
+ foreach ($filter->getNotInurlRegex() as $url) {
+ $sub_search .= 'AND NOT ' . static::sqlRegex($alias . 'link', $url, $values) . ' ';
+ }
+ }
if ($filter->getSearch() !== null) {
foreach ($filter->getSearch() as $search_value) {
if (static::isCompressed()) { // MySQL-only
- $sub_search .= 'AND CONCAT(' . $alias . 'title, UNCOMPRESS(' . $alias . 'content_bin)) LIKE ? ';
+ $sub_search .= "AND CONCAT({$alias}title, '\\n', UNCOMPRESS({$alias}content_bin)) LIKE ? ";
$values[] = "%{$search_value}%";
} else {
$sub_search .= 'AND (' . $alias . 'title LIKE ? OR ' . $alias . 'content LIKE ?) ';
@@ -966,10 +1055,21 @@ SQL;
}
}
}
+ if ($filter->getSearchRegex() !== null) {
+ foreach ($filter->getSearchRegex() as $search_value) {
+ if (static::isCompressed()) { // MySQL-only
+ $sub_search .= 'AND (' . static::sqlRegex($alias . 'title', $search_value, $values) .
+ ' OR ' . static::sqlRegex("UNCOMPRESS({$alias}content_bin)", $search_value, $values) . ') ';
+ } else {
+ $sub_search .= 'AND (' . static::sqlRegex($alias . 'title', $search_value, $values) .
+ ' OR ' . static::sqlRegex($alias . 'content', $search_value, $values) . ') ';
+ }
+ }
+ }
if ($filter->getNotSearch() !== null) {
foreach ($filter->getNotSearch() as $search_value) {
if (static::isCompressed()) { // MySQL-only
- $sub_search .= 'AND CONCAT(' . $alias . 'title, UNCOMPRESS(' . $alias . 'content_bin)) NOT LIKE ? ';
+ $sub_search .= "AND CONCAT({$alias}title, '\\n', UNCOMPRESS({$alias}content_bin)) NOT LIKE ? ";
$values[] = "%{$search_value}%";
} else {
$sub_search .= 'AND ' . $alias . 'title NOT LIKE ? AND ' . $alias . 'content NOT LIKE ? ';
@@ -978,6 +1078,17 @@ SQL;
}
}
}
+ if ($filter->getNotSearchRegex() !== null) {
+ foreach ($filter->getNotSearchRegex() as $search_value) {
+ if (static::isCompressed()) { // MySQL-only
+ $sub_search .= 'AND NOT ' . static::sqlRegex($alias . 'title', $search_value, $values) .
+ ' ANT NOT ' . static::sqlRegex("UNCOMPRESS({$alias}content_bin)", $search_value, $values) . ' ';
+ } else {
+ $sub_search .= 'AND NOT ' . static::sqlRegex($alias . 'title', $search_value, $values) .
+ ' AND NOT ' . static::sqlRegex($alias . 'content', $search_value, $values) . ' ';
+ }
+ }
+ }
if ($sub_search != '') {
if ($isOpen) {
@@ -1039,6 +1150,7 @@ SQL;
if ($filterSearch !== '') {
$search .= 'AND (' . $filterSearch . ') ';
$values = array_merge($values, $filterValues);
+ $this->registerSqlFunctions($search);
}
}
return [$values, $search];