aboutsummaryrefslogtreecommitdiff
path: root/app/Models/BooleanSearch.php
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2022-06-02 08:41:08 +0200
committerGravatar GitHub <noreply@github.com> 2022-06-02 08:41:08 +0200
commitf85c510ed49be031145f6b35e815ce890cd4f9aa (patch)
treec7ac947ba5ddacf85dc5d97330f38f7d91b0964c /app/Models/BooleanSearch.php
parentf988b996ab69104bc45b222fa88d34b5c78f98b3 (diff)
New search engine (#4378)
* New possibility to invoke user queries from a search expression From the search field: `S:"My query"`. Can be combined with other filters such as `S:"My query" date:P3d` as long as the user queries do not contain `OR`. A use-case is to have an RSS filter with a stable address or an external API call with the ability to update the user query. * Draft of parenthesis logic * More draft * Working parenthesis (a OR b) (c OR d) * Working (A) OR (B) * Support nested parentheses + unit tests + documentation * search:MySearch and S:3
Diffstat (limited to 'app/Models/BooleanSearch.php')
-rw-r--r--app/Models/BooleanSearch.php216
1 files changed, 208 insertions, 8 deletions
diff --git a/app/Models/BooleanSearch.php b/app/Models/BooleanSearch.php
index 774e42073..4cb74865a 100644
--- a/app/Models/BooleanSearch.php
+++ b/app/Models/BooleanSearch.php
@@ -7,17 +7,210 @@ class FreshRSS_BooleanSearch {
/** @var string */
private $raw_input = '';
+ /** @var array<FreshRSS_BooleanSearch|FreshRSS_Search> */
private $searches = array();
- public function __construct($input) {
+ /** @var string 'AND' or 'OR' */
+ private $operator;
+
+ public function __construct(string $input, int $level = 0, $operator = 'AND') {
+ $this->operator = $operator;
$input = trim($input);
if ($input == '') {
return;
}
$this->raw_input = $input;
- $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
- $input = preg_replace('/(?<=[\s!-]|^)&quot;(.*?)&quot;/', '"\1"', $input);
+ if ($level === 0) {
+ $input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
+ $input = preg_replace('/(?<=[\s!-]|^)&quot;(.*?)&quot;/', '"\1"', $input);
+
+ $input = $this->parseUserQueryNames($input);
+ $input = $this->parseUserQueryIds($input);
+ }
+
+ // Either parse everything as a series of BooleanSearch's combined by implicit AND
+ // or parse everything as a series of Search's combined by explicit OR
+ $this->parseParentheses($input, $level) || $this->parseOrSegments($input);
+ }
+
+ /**
+ * Parse the user queries (saved searches) by name and expand them in the input string.
+ */
+ private function parseUserQueryNames(string $input): string {
+ $all_matches = [];
+ if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) {
+ $all_matches[] = $matches;
+
+ }
+ if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matches)) {
+ $all_matches[] = $matches;
+ }
+
+ if (!empty($all_matches)) {
+ /** @var array<string,FreshRSS_UserQuery> */
+ $queries = [];
+ foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
+ $query = new FreshRSS_UserQuery($raw_query);
+ $queries[$query->getName()] = $query;
+ }
+
+ $fromS = [];
+ $toS = [];
+ foreach ($all_matches as $matches) {
+ for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
+ $name = trim($matches['search'][$i]);
+ if (!empty($queries[$name])) {
+ $fromS[] = $matches[0][$i];
+ $toS[] = '(' . trim($queries[$name]->getSearch()) . ')';
+ }
+ }
+ }
+
+ $input = str_replace($fromS, $toS, $input);
+ }
+ return $input;
+ }
+
+ /**
+ * Parse the user queries (saved searches) by ID and expand them in the input string.
+ */
+ private function parseUserQueryIds(string $input): string {
+ $all_matches = [];
+
+ if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matches)) {
+ $all_matches[] = $matches;
+ }
+
+ if (!empty($all_matches)) {
+ /** @var array<string,FreshRSS_UserQuery> */
+ $queries = [];
+ foreach (FreshRSS_Context::$user_conf->queries as $raw_query) {
+ $query = new FreshRSS_UserQuery($raw_query);
+ $queries[] = $query;
+ }
+
+ $fromS = [];
+ $toS = [];
+ foreach ($all_matches as $matches) {
+ for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
+ // Index starting from 1
+ $id = intval(trim($matches['search'][$i])) - 1;
+ if (!empty($queries[$id])) {
+ $fromS[] = $matches[0][$i];
+ $toS[] = '(' . trim($queries[$id]->getSearch()) . ')';
+ }
+ }
+ }
+
+ $input = str_replace($fromS, $toS, $input);
+ }
+ return $input;
+ }
+
+ /** @return bool True if some parenthesis logic took over, false otherwise */
+ private function parseParentheses(string $input, int $level): bool {
+ $input = trim($input);
+ $length = strlen($input);
+ $i = 0;
+ $before = '';
+ $hasParenthesis = false;
+ $nextOperator = 'AND';
+ while ($i < $length) {
+ $c = $input[$i];
+
+ if ($c === '(') {
+ $hasParenthesis = true;
+
+ $before = trim($before);
+ if (preg_match('/\bOR$/i', $before)) {
+ // Trim trailing OR
+ $before = substr($before, 0, -2);
+
+ // The text prior to the OR is a BooleanSearch
+ $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
+ if (count($searchBefore->searches()) > 0) {
+ $this->searches[] = $searchBefore;
+ }
+ $before = '';
+
+ // The next BooleanSearch will have to be combined with OR instead of default AND
+ $nextOperator = 'OR';
+ } elseif ($before !== '') {
+ // The text prior to the opening parenthesis is a BooleanSearch
+ $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
+ if (count($searchBefore->searches()) > 0) {
+ $this->searches[] = $searchBefore;
+ }
+ $before = '';
+ }
+
+ // Search the matching closing parenthesis
+ $parentheses = 1;
+ $sub = '';
+ $i++;
+ while ($i < $length) {
+ $c = $input[$i];
+ if ($c === '(') {
+ // One nested level deeper
+ $parentheses++;
+ $sub .= $c;
+ } elseif ($c === ')') {
+ $parentheses--;
+ if ($parentheses === 0) {
+ // Found the matching closing parenthesis
+ $searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
+ $nextOperator = 'AND';
+ if (count($searchSub->searches()) > 0) {
+ $this->searches[] = $searchSub;
+ }
+ $sub = '';
+ break;
+ } else {
+ $sub .= $c;
+ }
+ } else {
+ $sub .= $c;
+ }
+ $i++;
+ }
+ // $sub = trim($sub);
+ // if ($sub != '') {
+ // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
+ // }
+ // } elseif ($c === ')') {
+ // // TODO: Consider throwing an error or warning in case of non-matching parenthesis
+ } else {
+ $before .= $c;
+ }
+ $i++;
+ }
+ if ($hasParenthesis) {
+ $before = trim($before);
+ if (preg_match('/^OR\b/i', $before)) {
+ // The next BooleanSearch will have to be combined with OR instead of default AND
+ $nextOperator = 'OR';
+ // Trim leading OR
+ $before = substr($before, 2);
+ }
+
+ // The remaining text after the last parenthesis is a BooleanSearch
+ $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
+ $nextOperator = 'AND';
+ if (count($searchBefore->searches()) > 0) {
+ $this->searches[] = $searchBefore;
+ }
+ return true;
+ }
+ // There was no parenthesis logic to apply
+ return false;
+ }
+
+ private function parseOrSegments(string $input) {
+ $input = trim($input);
+ if ($input == '') {
+ return;
+ }
$splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE);
$segment = '';
@@ -43,16 +236,23 @@ class FreshRSS_BooleanSearch {
}
}
+ /**
+ * Either a list of FreshRSS_BooleanSearch combined by implicit AND
+ * or a series of FreshRSS_Search combined by explicit OR
+ * @return array<FreshRSS_BooleanSearch|FreshRSS_Search>
+ */
public function searches() {
return $this->searches;
}
+ /** @return string 'AND' or 'OR' depending on how this BooleanSearch should be combined */
+ public function operator(): string {
+ return $this->operator;
+ }
+
+ /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
public function add($search) {
- if ($search instanceof FreshRSS_Search) {
- $this->searches[] = $search;
- return $search;
- }
- return null;
+ $this->searches[] = $search;
}
public function __toString(): string {