aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2022-08-18 12:06:31 +0200
committerGravatar GitHub <noreply@github.com> 2022-08-18 12:06:31 +0200
commit4f111c5b305078a641d13ac41ce7d798e3cc19ce (patch)
treeeff91f43111d31fb8c6837fa084a2debd9407e57
parent2acf3a4dd87af5c09fcf513517cb852ff6e909ce (diff)
Easier full-text search possibility (#4505)
* Easier full-text search possibility Contributes to https://github.com/FreshRSS/FreshRSS/issues/1331 Avoid concats in searches to make text indexes easier to build * Fix tests * Documentation
-rw-r--r--app/Models/EntryDAO.php24
-rw-r--r--app/Models/EntryDAOSQLite.php4
-rw-r--r--docs/en/admins/01_Index.md1
-rw-r--r--docs/en/admins/DatabaseConfig.md33
-rw-r--r--docs/en/developers/03_Backend/01_Database_schema.md4
-rw-r--r--tests/app/Models/SearchTest.php10
6 files changed, 54 insertions, 22 deletions
diff --git a/app/Models/EntryDAO.php b/app/Models/EntryDAO.php
index 02552affe..7cefc8c0e 100644
--- a/app/Models/EntryDAO.php
+++ b/app/Models/EntryDAO.php
@@ -10,10 +10,6 @@ class FreshRSS_EntryDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
return true;
}
- protected static function sqlConcat($s1, $s2) {
- return 'CONCAT(' . $s1 . ',' . $s2 . ')'; //MySQL
- }
-
public static function sqlHexDecode(string $x): string {
return 'unhex(' . $x . ')';
}
@@ -950,47 +946,49 @@ SQL;
}
if ($filter->getInurl()) {
foreach ($filter->getInurl() as $url) {
- $sub_search .= 'AND ' . static::sqlConcat($alias . 'link', $alias . 'guid') . ' LIKE ? ';
+ $sub_search .= 'AND ' . $alias . 'link LIKE ? ';
$values[] = "%{$url}%";
}
}
if ($filter->getNotAuthor()) {
foreach ($filter->getNotAuthor() as $author) {
- $sub_search .= 'AND (NOT ' . $alias . 'author LIKE ?) ';
+ $sub_search .= 'AND ' . $alias . 'author NOT LIKE ? ';
$values[] = "%{$author}%";
}
}
if ($filter->getNotIntitle()) {
foreach ($filter->getNotIntitle() as $title) {
- $sub_search .= 'AND (NOT ' . $alias . 'title LIKE ?) ';
+ $sub_search .= 'AND ' . $alias . 'title NOT LIKE ? ';
$values[] = "%{$title}%";
}
}
if ($filter->getNotTags()) {
foreach ($filter->getNotTags() as $tag) {
- $sub_search .= 'AND (NOT ' . $alias . 'tags LIKE ?) ';
+ $sub_search .= 'AND ' . $alias . 'tags NOT LIKE ? ';
$values[] = "%{$tag}%";
}
}
if ($filter->getNotInurl()) {
foreach ($filter->getNotInurl() as $url) {
- $sub_search .= 'AND (NOT ' . static::sqlConcat($alias . 'link', $alias . 'guid') . ' LIKE ?) ';
+ $sub_search .= 'AND ' . $alias . 'link NOT LIKE ? ';
$values[] = "%{$url}%";
}
}
if ($filter->getSearch()) {
foreach ($filter->getSearch() as $search_value) {
- $sub_search .= 'AND ' . static::sqlConcat($alias . 'title',
- static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ? ';
+ $sub_search .= 'AND (' . $alias . 'title LIKE ? OR ' .
+ (static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ?) ';
+ $values[] = "%{$search_value}%";
$values[] = "%{$search_value}%";
}
}
if ($filter->getNotSearch()) {
foreach ($filter->getNotSearch() as $search_value) {
- $sub_search .= 'AND (NOT ' . static::sqlConcat($alias . 'title',
- static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ?) ';
+ $sub_search .= 'AND ' . $alias . 'title NOT LIKE ? AND ' .
+ (static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' NOT LIKE ? ';
+ $values[] = "%{$search_value}%";
$values[] = "%{$search_value}%";
}
}
diff --git a/app/Models/EntryDAOSQLite.php b/app/Models/EntryDAOSQLite.php
index 27a7c3560..b1ab6601e 100644
--- a/app/Models/EntryDAOSQLite.php
+++ b/app/Models/EntryDAOSQLite.php
@@ -10,10 +10,6 @@ class FreshRSS_EntryDAOSQLite extends FreshRSS_EntryDAO {
return false;
}
- protected static function sqlConcat($s1, $s2) {
- return $s1 . '||' . $s2;
- }
-
public static function sqlHexDecode(string $x): string {
return $x;
}
diff --git a/docs/en/admins/01_Index.md b/docs/en/admins/01_Index.md
index 749d44af3..792434389 100644
--- a/docs/en/admins/01_Index.md
+++ b/docs/en/admins/01_Index.md
@@ -18,6 +18,7 @@ Learn how to install, update, and backup FreshRSS, as well as how to use the com
* [Setting Up Automatic Feed Updating](08_FeedUpdates.md)
* [Access Control](09_AccessControl.md)
* [Apache/Nginx configuration files](10_ServerConfig.md)
+* [Database configuration](DatabaseConfig.md)
* [Using the command line interface (CLI)](https://github.com/FreshRSS/FreshRSS/tree/edge/cli)
* [Configuring the email address validation](05_Configuring_email_validation.md)
* [Frequently asked questions](04_Frequently_Asked_Questions.md)
diff --git a/docs/en/admins/DatabaseConfig.md b/docs/en/admins/DatabaseConfig.md
new file mode 100644
index 000000000..aa80a3975
--- /dev/null
+++ b/docs/en/admins/DatabaseConfig.md
@@ -0,0 +1,33 @@
+# Database configuration
+
+FreshRSS supports the databases SQLite (built-in), PostgreSQL, MySQL / MariaDB.
+
+While the default installation should be fine for most cases, additional tuning can be made.
+
+## Full-text search optimisation in PostgreSQL
+
+Without changing anything in FreshRSS’ code (which is using [`ILIKE`](https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-LIKE)), it is possible to make text searches much faster by adding some indexes in PostgreSQL 9.1+ (at the cost of more disc space and slower insertions):
+
+```sql
+CREATE EXTENSION pg_trgm;
+CREATE INDEX gin_trgm_index_title ON freshrss_entry USING gin(title gin_trgm_ops);
+CREATE INDEX gin_trgm_index_content ON freshrss_entry USING gin(content gin_trgm_ops);
+```
+
+Where `freshrss_entry` needs to be adapted to the name of the *entry* of a given use, e.g., `freshrss_alice_entry`.
+
+Such an index on the `entry.title` column makes searches such as `intitle:Hello` much faster.
+General searches such as `Something` search both on `entry.title` and `entry.content` and therefore require the two indexes shown above.
+
+Likewise, if you wanted to speed up searches on the authors (`author:Alice`), you would add another index:
+
+```sql
+CREATE INDEX gin_trgm_index_author ON freshrss_entry USING gin(author gin_trgm_ops);
+```
+
+Etc. for other text fields. The list of fields can be seen in [`CREATE TABLE _entry` section](https://github.com/FreshRSS/FreshRSS/blob/edge/app/SQL/install.sql.pgsql.php).
+
+### References
+
+* [GIN: Generalized Inverted Index](https://www.postgresql.org/docs/current/gin-intro.html)
+* [`pg_trgm` module for fast text search](https://www.postgresql.org/docs/current/pgtrgm.html#id-1.11.7.42.8)
diff --git a/docs/en/developers/03_Backend/01_Database_schema.md b/docs/en/developers/03_Backend/01_Database_schema.md
index b2206e46a..280f3079b 100644
--- a/docs/en/developers/03_Backend/01_Database_schema.md
+++ b/docs/en/developers/03_Backend/01_Database_schema.md
@@ -1,3 +1,7 @@
# Database Schema
> **TODO**
+
+## See also
+
+* [Database configuration](../../admins/DatabaseConfig.md)
diff --git a/tests/app/Models/SearchTest.php b/tests/app/Models/SearchTest.php
index 3fb5a144f..a2832c710 100644
--- a/tests/app/Models/SearchTest.php
+++ b/tests/app/Models/SearchTest.php
@@ -318,17 +318,17 @@ class SearchTest extends PHPUnit\Framework\TestCase {
],
[
'#tag Hello OR (author:Alice inurl:example) OR (f:3 intitle:World) OR L:12',
- ' ((e.tags LIKE ? AND e.title||e.content LIKE ? )) OR ((e.author LIKE ? AND e.link||e.guid LIKE ? )) OR' .
+ ' ((e.tags LIKE ? AND (e.title LIKE ? OR e.content LIKE ?) )) OR ((e.author LIKE ? AND e.link LIKE ? )) OR' .
' ((e.id_feed IN (?) AND e.title LIKE ? )) OR ((e.id IN (SELECT et.id_entry FROM `_entrytag` et WHERE et.id_tag IN (?)) )) ',
- ['%tag%','%Hello%','%Alice%','%example%','3','%World%', '12']
+ ['%tag%','%Hello%', '%Hello%', '%Alice%', '%example%', '3', '%World%', '12']
],
[
'#tag Hello (author:Alice inurl:example) (f:3 intitle:World) label:Bleu',
- ' ((e.tags LIKE ? AND e.title||e.content LIKE ? )) AND' .
- ' ((e.author LIKE ? AND e.link||e.guid LIKE ? )) AND' .
+ ' ((e.tags LIKE ? AND (e.title LIKE ? OR e.content LIKE ?) )) AND' .
+ ' ((e.author LIKE ? AND e.link LIKE ? )) AND' .
' ((e.id_feed IN (?) AND e.title LIKE ? )) AND' .
' ((e.id IN (SELECT et.id_entry FROM `_entrytag` et, `_tag` t WHERE et.id_tag = t.id AND t.name IN (?)) )) ',
- ['%tag%','%Hello%','%Alice%','%example%','3','%World%', 'Bleu']
+ ['%tag%', '%Hello%', '%Hello%', '%Alice%', '%example%', '3', '%World%', 'Bleu']
],
[
'!((author:Alice intitle:hello) OR (author:Bob intitle:world))',