diff options
| author | 2022-06-02 08:41:08 +0200 | |
|---|---|---|
| committer | 2022-06-02 08:41:08 +0200 | |
| commit | f85c510ed49be031145f6b35e815ce890cd4f9aa (patch) | |
| tree | c7ac947ba5ddacf85dc5d97330f38f7d91b0964c /app/Models/BooleanSearch.php | |
| parent | f988b996ab69104bc45b222fa88d34b5c78f98b3 (diff) | |
New search engine (#4378)
* New possibility to invoke user queries from a search expression
From the search field: `S:"My query"`.
Can be combined with other filters such as `S:"My query" date:P3d` as long as the user queries do not contain `OR`.
A use-case is to have an RSS filter with a stable address or an external API call with the ability to update the user query.
* Draft of parenthesis logic
* More draft
* Working parenthesis (a OR b) (c OR d)
* Working (A) OR (B)
* Support nested parentheses + unit tests + documentation
* search:MySearch and S:3
Diffstat (limited to 'app/Models/BooleanSearch.php')
| -rw-r--r-- | app/Models/BooleanSearch.php | 216 |
1 files changed, 208 insertions, 8 deletions
diff --git a/app/Models/BooleanSearch.php b/app/Models/BooleanSearch.php index 774e42073..4cb74865a 100644 --- a/app/Models/BooleanSearch.php +++ b/app/Models/BooleanSearch.php @@ -7,17 +7,210 @@ class FreshRSS_BooleanSearch { /** @var string */ private $raw_input = ''; + /** @var array<FreshRSS_BooleanSearch|FreshRSS_Search> */ private $searches = array(); - public function __construct($input) { + /** @var string 'AND' or 'OR' */ + private $operator; + + public function __construct(string $input, int $level = 0, $operator = 'AND') { + $this->operator = $operator; $input = trim($input); if ($input == '') { return; } $this->raw_input = $input; - $input = preg_replace('/:"(.*?)"/', ':"\1"', $input); - $input = preg_replace('/(?<=[\s!-]|^)"(.*?)"/', '"\1"', $input); + if ($level === 0) { + $input = preg_replace('/:"(.*?)"/', ':"\1"', $input); + $input = preg_replace('/(?<=[\s!-]|^)"(.*?)"/', '"\1"', $input); + + $input = $this->parseUserQueryNames($input); + $input = $this->parseUserQueryIds($input); + } + + // Either parse everything as a series of BooleanSearch's combined by implicit AND + // or parse everything as a series of Search's combined by explicit OR + $this->parseParentheses($input, $level) || $this->parseOrSegments($input); + } + + /** + * Parse the user queries (saved searches) by name and expand them in the input string. + */ + private function parseUserQueryNames(string $input): string { + $all_matches = []; + if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matches)) { + $all_matches[] = $matches; + + } + if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matches)) { + $all_matches[] = $matches; + } + + if (!empty($all_matches)) { + /** @var array<string,FreshRSS_UserQuery> */ + $queries = []; + foreach (FreshRSS_Context::$user_conf->queries as $raw_query) { + $query = new FreshRSS_UserQuery($raw_query); + $queries[$query->getName()] = $query; + } + + $fromS = []; + $toS = []; + foreach ($all_matches as $matches) { + for ($i = count($matches['search']) - 1; $i >= 0; $i--) { + $name = trim($matches['search'][$i]); + if (!empty($queries[$name])) { + $fromS[] = $matches[0][$i]; + $toS[] = '(' . trim($queries[$name]->getSearch()) . ')'; + } + } + } + + $input = str_replace($fromS, $toS, $input); + } + return $input; + } + + /** + * Parse the user queries (saved searches) by ID and expand them in the input string. + */ + private function parseUserQueryIds(string $input): string { + $all_matches = []; + + if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matches)) { + $all_matches[] = $matches; + } + + if (!empty($all_matches)) { + /** @var array<string,FreshRSS_UserQuery> */ + $queries = []; + foreach (FreshRSS_Context::$user_conf->queries as $raw_query) { + $query = new FreshRSS_UserQuery($raw_query); + $queries[] = $query; + } + + $fromS = []; + $toS = []; + foreach ($all_matches as $matches) { + for ($i = count($matches['search']) - 1; $i >= 0; $i--) { + // Index starting from 1 + $id = intval(trim($matches['search'][$i])) - 1; + if (!empty($queries[$id])) { + $fromS[] = $matches[0][$i]; + $toS[] = '(' . trim($queries[$id]->getSearch()) . ')'; + } + } + } + + $input = str_replace($fromS, $toS, $input); + } + return $input; + } + + /** @return bool True if some parenthesis logic took over, false otherwise */ + private function parseParentheses(string $input, int $level): bool { + $input = trim($input); + $length = strlen($input); + $i = 0; + $before = ''; + $hasParenthesis = false; + $nextOperator = 'AND'; + while ($i < $length) { + $c = $input[$i]; + + if ($c === '(') { + $hasParenthesis = true; + + $before = trim($before); + if (preg_match('/\bOR$/i', $before)) { + // Trim trailing OR + $before = substr($before, 0, -2); + + // The text prior to the OR is a BooleanSearch + $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator); + if (count($searchBefore->searches()) > 0) { + $this->searches[] = $searchBefore; + } + $before = ''; + + // The next BooleanSearch will have to be combined with OR instead of default AND + $nextOperator = 'OR'; + } elseif ($before !== '') { + // The text prior to the opening parenthesis is a BooleanSearch + $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator); + if (count($searchBefore->searches()) > 0) { + $this->searches[] = $searchBefore; + } + $before = ''; + } + + // Search the matching closing parenthesis + $parentheses = 1; + $sub = ''; + $i++; + while ($i < $length) { + $c = $input[$i]; + if ($c === '(') { + // One nested level deeper + $parentheses++; + $sub .= $c; + } elseif ($c === ')') { + $parentheses--; + if ($parentheses === 0) { + // Found the matching closing parenthesis + $searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator); + $nextOperator = 'AND'; + if (count($searchSub->searches()) > 0) { + $this->searches[] = $searchSub; + } + $sub = ''; + break; + } else { + $sub .= $c; + } + } else { + $sub .= $c; + } + $i++; + } + // $sub = trim($sub); + // if ($sub != '') { + // // TODO: Consider throwing an error or warning in case of non-matching parenthesis + // } + // } elseif ($c === ')') { + // // TODO: Consider throwing an error or warning in case of non-matching parenthesis + } else { + $before .= $c; + } + $i++; + } + if ($hasParenthesis) { + $before = trim($before); + if (preg_match('/^OR\b/i', $before)) { + // The next BooleanSearch will have to be combined with OR instead of default AND + $nextOperator = 'OR'; + // Trim leading OR + $before = substr($before, 2); + } + + // The remaining text after the last parenthesis is a BooleanSearch + $searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator); + $nextOperator = 'AND'; + if (count($searchBefore->searches()) > 0) { + $this->searches[] = $searchBefore; + } + return true; + } + // There was no parenthesis logic to apply + return false; + } + + private function parseOrSegments(string $input) { + $input = trim($input); + if ($input == '') { + return; + } $splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE); $segment = ''; @@ -43,16 +236,23 @@ class FreshRSS_BooleanSearch { } } + /** + * Either a list of FreshRSS_BooleanSearch combined by implicit AND + * or a series of FreshRSS_Search combined by explicit OR + * @return array<FreshRSS_BooleanSearch|FreshRSS_Search> + */ public function searches() { return $this->searches; } + /** @return string 'AND' or 'OR' depending on how this BooleanSearch should be combined */ + public function operator(): string { + return $this->operator; + } + + /** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */ public function add($search) { - if ($search instanceof FreshRSS_Search) { - $this->searches[] = $search; - return $search; - } - return null; + $this->searches[] = $search; } public function __toString(): string { |
