diff options
| author | 2022-08-18 12:06:31 +0200 | |
|---|---|---|
| committer | 2022-08-18 12:06:31 +0200 | |
| commit | 4f111c5b305078a641d13ac41ce7d798e3cc19ce (patch) | |
| tree | eff91f43111d31fb8c6837fa084a2debd9407e57 | |
| parent | 2acf3a4dd87af5c09fcf513517cb852ff6e909ce (diff) | |
Easier full-text search possibility (#4505)
* Easier full-text search possibility
Contributes to https://github.com/FreshRSS/FreshRSS/issues/1331
Avoid concats in searches to make text indexes easier to build
* Fix tests
* Documentation
| -rw-r--r-- | app/Models/EntryDAO.php | 24 | ||||
| -rw-r--r-- | app/Models/EntryDAOSQLite.php | 4 | ||||
| -rw-r--r-- | docs/en/admins/01_Index.md | 1 | ||||
| -rw-r--r-- | docs/en/admins/DatabaseConfig.md | 33 | ||||
| -rw-r--r-- | docs/en/developers/03_Backend/01_Database_schema.md | 4 | ||||
| -rw-r--r-- | tests/app/Models/SearchTest.php | 10 |
6 files changed, 54 insertions, 22 deletions
diff --git a/app/Models/EntryDAO.php b/app/Models/EntryDAO.php index 02552affe..7cefc8c0e 100644 --- a/app/Models/EntryDAO.php +++ b/app/Models/EntryDAO.php @@ -10,10 +10,6 @@ class FreshRSS_EntryDAO extends Minz_ModelPdo implements FreshRSS_Searchable { return true; } - protected static function sqlConcat($s1, $s2) { - return 'CONCAT(' . $s1 . ',' . $s2 . ')'; //MySQL - } - public static function sqlHexDecode(string $x): string { return 'unhex(' . $x . ')'; } @@ -950,47 +946,49 @@ SQL; } if ($filter->getInurl()) { foreach ($filter->getInurl() as $url) { - $sub_search .= 'AND ' . static::sqlConcat($alias . 'link', $alias . 'guid') . ' LIKE ? '; + $sub_search .= 'AND ' . $alias . 'link LIKE ? '; $values[] = "%{$url}%"; } } if ($filter->getNotAuthor()) { foreach ($filter->getNotAuthor() as $author) { - $sub_search .= 'AND (NOT ' . $alias . 'author LIKE ?) '; + $sub_search .= 'AND ' . $alias . 'author NOT LIKE ? '; $values[] = "%{$author}%"; } } if ($filter->getNotIntitle()) { foreach ($filter->getNotIntitle() as $title) { - $sub_search .= 'AND (NOT ' . $alias . 'title LIKE ?) '; + $sub_search .= 'AND ' . $alias . 'title NOT LIKE ? '; $values[] = "%{$title}%"; } } if ($filter->getNotTags()) { foreach ($filter->getNotTags() as $tag) { - $sub_search .= 'AND (NOT ' . $alias . 'tags LIKE ?) '; + $sub_search .= 'AND ' . $alias . 'tags NOT LIKE ? '; $values[] = "%{$tag}%"; } } if ($filter->getNotInurl()) { foreach ($filter->getNotInurl() as $url) { - $sub_search .= 'AND (NOT ' . static::sqlConcat($alias . 'link', $alias . 'guid') . ' LIKE ?) '; + $sub_search .= 'AND ' . $alias . 'link NOT LIKE ? '; $values[] = "%{$url}%"; } } if ($filter->getSearch()) { foreach ($filter->getSearch() as $search_value) { - $sub_search .= 'AND ' . static::sqlConcat($alias . 'title', - static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ? '; + $sub_search .= 'AND (' . $alias . 'title LIKE ? OR ' . + (static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ?) '; + $values[] = "%{$search_value}%"; $values[] = "%{$search_value}%"; } } if ($filter->getNotSearch()) { foreach ($filter->getNotSearch() as $search_value) { - $sub_search .= 'AND (NOT ' . static::sqlConcat($alias . 'title', - static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ?) '; + $sub_search .= 'AND ' . $alias . 'title NOT LIKE ? AND ' . + (static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' NOT LIKE ? '; + $values[] = "%{$search_value}%"; $values[] = "%{$search_value}%"; } } diff --git a/app/Models/EntryDAOSQLite.php b/app/Models/EntryDAOSQLite.php index 27a7c3560..b1ab6601e 100644 --- a/app/Models/EntryDAOSQLite.php +++ b/app/Models/EntryDAOSQLite.php @@ -10,10 +10,6 @@ class FreshRSS_EntryDAOSQLite extends FreshRSS_EntryDAO { return false; } - protected static function sqlConcat($s1, $s2) { - return $s1 . '||' . $s2; - } - public static function sqlHexDecode(string $x): string { return $x; } diff --git a/docs/en/admins/01_Index.md b/docs/en/admins/01_Index.md index 749d44af3..792434389 100644 --- a/docs/en/admins/01_Index.md +++ b/docs/en/admins/01_Index.md @@ -18,6 +18,7 @@ Learn how to install, update, and backup FreshRSS, as well as how to use the com * [Setting Up Automatic Feed Updating](08_FeedUpdates.md) * [Access Control](09_AccessControl.md) * [Apache/Nginx configuration files](10_ServerConfig.md) +* [Database configuration](DatabaseConfig.md) * [Using the command line interface (CLI)](https://github.com/FreshRSS/FreshRSS/tree/edge/cli) * [Configuring the email address validation](05_Configuring_email_validation.md) * [Frequently asked questions](04_Frequently_Asked_Questions.md) diff --git a/docs/en/admins/DatabaseConfig.md b/docs/en/admins/DatabaseConfig.md new file mode 100644 index 000000000..aa80a3975 --- /dev/null +++ b/docs/en/admins/DatabaseConfig.md @@ -0,0 +1,33 @@ +# Database configuration + +FreshRSS supports the databases SQLite (built-in), PostgreSQL, MySQL / MariaDB. + +While the default installation should be fine for most cases, additional tuning can be made. + +## Full-text search optimisation in PostgreSQL + +Without changing anything in FreshRSS’ code (which is using [`ILIKE`](https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-LIKE)), it is possible to make text searches much faster by adding some indexes in PostgreSQL 9.1+ (at the cost of more disc space and slower insertions): + +```sql +CREATE EXTENSION pg_trgm; +CREATE INDEX gin_trgm_index_title ON freshrss_entry USING gin(title gin_trgm_ops); +CREATE INDEX gin_trgm_index_content ON freshrss_entry USING gin(content gin_trgm_ops); +``` + +Where `freshrss_entry` needs to be adapted to the name of the *entry* of a given use, e.g., `freshrss_alice_entry`. + +Such an index on the `entry.title` column makes searches such as `intitle:Hello` much faster. +General searches such as `Something` search both on `entry.title` and `entry.content` and therefore require the two indexes shown above. + +Likewise, if you wanted to speed up searches on the authors (`author:Alice`), you would add another index: + +```sql +CREATE INDEX gin_trgm_index_author ON freshrss_entry USING gin(author gin_trgm_ops); +``` + +Etc. for other text fields. The list of fields can be seen in [`CREATE TABLE _entry` section](https://github.com/FreshRSS/FreshRSS/blob/edge/app/SQL/install.sql.pgsql.php). + +### References + +* [GIN: Generalized Inverted Index](https://www.postgresql.org/docs/current/gin-intro.html) +* [`pg_trgm` module for fast text search](https://www.postgresql.org/docs/current/pgtrgm.html#id-1.11.7.42.8) diff --git a/docs/en/developers/03_Backend/01_Database_schema.md b/docs/en/developers/03_Backend/01_Database_schema.md index b2206e46a..280f3079b 100644 --- a/docs/en/developers/03_Backend/01_Database_schema.md +++ b/docs/en/developers/03_Backend/01_Database_schema.md @@ -1,3 +1,7 @@ # Database Schema > **TODO** + +## See also + +* [Database configuration](../../admins/DatabaseConfig.md) diff --git a/tests/app/Models/SearchTest.php b/tests/app/Models/SearchTest.php index 3fb5a144f..a2832c710 100644 --- a/tests/app/Models/SearchTest.php +++ b/tests/app/Models/SearchTest.php @@ -318,17 +318,17 @@ class SearchTest extends PHPUnit\Framework\TestCase { ], [ '#tag Hello OR (author:Alice inurl:example) OR (f:3 intitle:World) OR L:12', - ' ((e.tags LIKE ? AND e.title||e.content LIKE ? )) OR ((e.author LIKE ? AND e.link||e.guid LIKE ? )) OR' . + ' ((e.tags LIKE ? AND (e.title LIKE ? OR e.content LIKE ?) )) OR ((e.author LIKE ? AND e.link LIKE ? )) OR' . ' ((e.id_feed IN (?) AND e.title LIKE ? )) OR ((e.id IN (SELECT et.id_entry FROM `_entrytag` et WHERE et.id_tag IN (?)) )) ', - ['%tag%','%Hello%','%Alice%','%example%','3','%World%', '12'] + ['%tag%','%Hello%', '%Hello%', '%Alice%', '%example%', '3', '%World%', '12'] ], [ '#tag Hello (author:Alice inurl:example) (f:3 intitle:World) label:Bleu', - ' ((e.tags LIKE ? AND e.title||e.content LIKE ? )) AND' . - ' ((e.author LIKE ? AND e.link||e.guid LIKE ? )) AND' . + ' ((e.tags LIKE ? AND (e.title LIKE ? OR e.content LIKE ?) )) AND' . + ' ((e.author LIKE ? AND e.link LIKE ? )) AND' . ' ((e.id_feed IN (?) AND e.title LIKE ? )) AND' . ' ((e.id IN (SELECT et.id_entry FROM `_entrytag` et, `_tag` t WHERE et.id_tag = t.id AND t.name IN (?)) )) ', - ['%tag%','%Hello%','%Alice%','%example%','3','%World%', 'Bleu'] + ['%tag%', '%Hello%', '%Hello%', '%Alice%', '%example%', '3', '%World%', 'Bleu'] ], [ '!((author:Alice intitle:hello) OR (author:Bob intitle:world))', |
