From d9c0d25b85ef3df7ea2cdc261e274efcdd5cfce0 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 1 Apr 2017 22:31:12 +0200 Subject: Improve search: intitle, author, inurl Allow multiple values of intitle: , author:, inurl: Note: Tests for UserQueryTest are broken due to https://github.com/sebastianbergmann/phpunit/wiki/Release-Announcement-for-PHPUnit-4.0.0#backwards-compatibility-issues --- app/Models/Search.php | 60 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 23 deletions(-) (limited to 'app/Models/Search.php') diff --git a/app/Models/Search.php b/app/Models/Search.php index 575a9a2cb..7b801f40b 100644 --- a/app/Models/Search.php +++ b/app/Models/Search.php @@ -81,6 +81,10 @@ class FreshRSS_Search { return $this->search; } + private static function removeEmptyValues($anArray) { + return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array(); + } + /** * Parse the search string to find intitle keyword and the search related * to it. @@ -90,14 +94,15 @@ class FreshRSS_Search { * @return string */ private function parseIntitleSearch($input) { - if (preg_match('/intitle:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + if (preg_match_all('/intitle:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { $this->intitle = $matches['search']; - return str_replace($matches[0], '', $input); + $input = str_replace($matches[0], '', $input); } - if (preg_match('/intitle:(?P\w*)/', $input, $matches)) { - $this->intitle = $matches['search']; - return str_replace($matches[0], '', $input); + if (preg_match_all('/intitle:(?P\w*)/', $input, $matches)) { + $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']); + $input = str_replace($matches[0], '', $input); } + $this->intitle = self::removeEmptyValues($this->intitle); return $input; } @@ -112,30 +117,32 @@ class FreshRSS_Search { * @return string */ private function parseAuthorSearch($input) { - if (preg_match('/author:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + if (preg_match_all('/author:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { $this->author = $matches['search']; - return str_replace($matches[0], '', $input); + $input = str_replace($matches[0], '', $input); } - if (preg_match('/author:(?P\w*)/', $input, $matches)) { - $this->author = $matches['search']; - return str_replace($matches[0], '', $input); + if (preg_match_all('/author:(?P\w*)/', $input, $matches)) { + $this->author = array_merge($this->author ? $this->author : array(), $matches['search']); + $input = str_replace($matches[0], '', $input); } + $this->author = self::removeEmptyValues($this->author); return $input; } /** * Parse the search string to find inurl keyword and the search related * to it. - * The search is the first word following the keyword except. + * The search is the first word following the keyword. * * @param string $input * @return string */ private function parseInurlSearch($input) { - if (preg_match('/inurl:(?P[^\s]*)/', $input, $matches)) { + if (preg_match_all('/inurl:(?P[^\s]*)/', $input, $matches)) { $this->inurl = $matches['search']; - return str_replace($matches[0], '', $input); + $input = str_replace($matches[0], '', $input); } + $this->inurl = self::removeEmptyValues($this->inurl); return $input; } @@ -148,9 +155,12 @@ class FreshRSS_Search { * @return string */ private function parseDateSearch($input) { - if (preg_match('/date:(?P[^\s]*)/', $input, $matches)) { - list($this->min_date, $this->max_date) = parseDateInterval($matches['search']); - return str_replace($matches[0], '', $input); + if (preg_match_all('/date:(?P[^\s]*)/', $input, $matches)) { + $input = str_replace($matches[0], '', $input); + $dates = self::removeEmptyValues($matches['search']); + if (!empty($dates[0])) { + list($this->min_date, $this->max_date) = parseDateInterval($dates[0]); + } } return $input; } @@ -164,9 +174,12 @@ class FreshRSS_Search { * @return string */ private function parsePubdateSearch($input) { - if (preg_match('/pubdate:(?P[^\s]*)/', $input, $matches)) { - list($this->min_pubdate, $this->max_pubdate) = parseDateInterval($matches['search']); - return str_replace($matches[0], '', $input); + if (preg_match_all('/pubdate:(?P[^\s]*)/', $input, $matches)) { + $input = str_replace($matches[0], '', $input); + $dates = self::removeEmptyValues($matches['search']); + if (!empty($dates[0])) { + list($this->min_pubdate, $this->max_pubdate) = parseDateInterval($dates[0]); + } } return $input; } @@ -182,8 +195,9 @@ class FreshRSS_Search { private function parseTagsSeach($input) { if (preg_match_all('/#(?P[^\s]+)/', $input, $matches)) { $this->tags = $matches['search']; - return str_replace($matches[0], '', $input); + $input = str_replace($matches[0], '', $input); } + $this->tags = self::removeEmptyValues($this->tags); return $input; } @@ -196,7 +210,7 @@ class FreshRSS_Search { * @return string */ private function parseSearch($input) { - $input = $this->cleanSearch($input); + $input = self::cleanSearch($input); if (strcmp($input, '') == 0) { return; } @@ -204,7 +218,7 @@ class FreshRSS_Search { $this->search = $matches['search']; $input = str_replace($matches[0], '', $input); } - $input = $this->cleanSearch($input); + $input = self::cleanSearch($input); if (strcmp($input, '') == 0) { return; } @@ -221,7 +235,7 @@ class FreshRSS_Search { * @param string $input * @return string */ - private function cleanSearch($input) { + private static function cleanSearch($input) { $input = preg_replace('/\s+/', ' ', $input); return trim($input); } -- cgit v1.2.3 From aadba9fb707d2d2bc6c05a84bb3a823895531137 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 1 Apr 2017 23:41:19 +0200 Subject: Negative searches https://github.com/FreshRSS/FreshRSS/issues/1381 Possibility to exclude authors, titles, tags, urls, words by prefixing them by ! or - (like Google Search): * !intitle:unwanted * -intitle:unwanted * -author:unwanted * -#unwanted * -unwanted And one can use many of each and combine them with positive searches --- app/Models/EntryDAO.php | 72 +++++++++++++++++++++-------- app/Models/Search.php | 120 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 159 insertions(+), 33 deletions(-) (limited to 'app/Models/Search.php') diff --git a/app/Models/EntryDAO.php b/app/Models/EntryDAO.php index 510755a2f..7e836097a 100644 --- a/app/Models/EntryDAO.php +++ b/app/Models/EntryDAO.php @@ -630,24 +630,6 @@ class FreshRSS_EntryDAO extends Minz_ModelPdo implements FreshRSS_Searchable { $search .= 'AND ' . $alias . 'id >= ' . $date_min . '000000 '; } if ($filter) { - if ($filter->getIntitle()) { - foreach ($filter->getIntitle() as $title) { - $search .= 'AND ' . $alias . 'title LIKE ? '; - $values[] = "%{$title}%"; - } - } - if ($filter->getInurl()) { - foreach ($filter->getInurl() as $url) { - $search .= 'AND CONCAT(' . $alias . 'link, ' . $alias . 'guid) LIKE ? '; - $values[] = "%{$url}%"; - } - } - if ($filter->getAuthor()) { - foreach ($filter->getAuthor() as $author) { - $search .= 'AND ' . $alias . 'author LIKE ? '; - $values[] = "%{$author}%"; - } - } if ($filter->getMinDate()) { $search .= 'AND ' . $alias . 'id >= ? '; $values[] = "{$filter->getMinDate()}000000"; @@ -664,19 +646,69 @@ class FreshRSS_EntryDAO extends Minz_ModelPdo implements FreshRSS_Searchable { $search .= 'AND ' . $alias . 'date <= ? '; $values[] = $filter->getMaxPubdate(); } + + if ($filter->getAuthor()) { + foreach ($filter->getAuthor() as $author) { + $search .= 'AND ' . $alias . 'author LIKE ? '; + $values[] = "%{$author}%"; + } + } + if ($filter->getIntitle()) { + foreach ($filter->getIntitle() as $title) { + $search .= 'AND ' . $alias . 'title LIKE ? '; + $values[] = "%{$title}%"; + } + } if ($filter->getTags()) { foreach ($filter->getTags() as $tag) { $search .= 'AND ' . $alias . 'tags LIKE ? '; $values[] = "%{$tag}%"; } } + if ($filter->getInurl()) { + foreach ($filter->getInurl() as $url) { + $search .= 'AND CONCAT(' . $alias . 'link, ' . $alias . 'guid) LIKE ? '; + $values[] = "%{$url}%"; + } + } + + if ($filter->getNotAuthor()) { + foreach ($filter->getNotAuthor() as $author) { + $search .= 'AND (NOT ' . $alias . 'author LIKE ?) '; + $values[] = "%{$author}%"; + } + } + if ($filter->getNotIntitle()) { + foreach ($filter->getNotIntitle() as $title) { + $search .= 'AND (NOT ' . $alias . 'title LIKE ?) '; + $values[] = "%{$title}%"; + } + } + if ($filter->getNotTags()) { + foreach ($filter->getNotTags() as $tag) { + $search .= 'AND (NOT ' . $alias . 'tags LIKE ?) '; + $values[] = "%{$tag}%"; + } + } + if ($filter->getNotInurl()) { + foreach ($filter->getNotInurl() as $url) { + $search .= 'AND (NOT CONCAT(' . $alias . 'link, ' . $alias . 'guid) LIKE ?) '; + $values[] = "%{$url}%"; + } + } + if ($filter->getSearch()) { - $search_values = $filter->getSearch(); - foreach ($search_values as $search_value) { + foreach ($filter->getSearch() as $search_value) { $search .= 'AND ' . $this->sqlconcat($alias . 'title', $this->isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ? '; $values[] = "%{$search_value}%"; } } + if ($filter->getNotSearch()) { + foreach ($filter->getNotSearch() as $search_value) { + $search .= 'AND (NOT ' . $this->sqlconcat($alias . 'title', $this->isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ?) '; + $values[] = "%{$search_value}%"; + } + } } return array($values, $search); } diff --git a/app/Models/Search.php b/app/Models/Search.php index 7b801f40b..2e89ea05b 100644 --- a/app/Models/Search.php +++ b/app/Models/Search.php @@ -23,18 +23,33 @@ class FreshRSS_Search { private $tags; private $search; + private $not_intitle; + private $not_inurl; + private $not_author; + private $not_tags; + private $not_search; + public function __construct($input) { - if (strcmp($input, '') == 0) { + if ($input == '') { return; } $this->raw_input = $input; + + $input = $this->parseNotIntitleSearch($input); + $input = $this->parseNotAuthorSearch($input); + $input = $this->parseNotInurlSearch($input); + $input = $this->parseNotTagsSeach($input); + + $input = $this->parsePubdateSearch($input); + $input = $this->parseDateSearch($input); + $input = $this->parseIntitleSearch($input); $input = $this->parseAuthorSearch($input); $input = $this->parseInurlSearch($input); - $input = $this->parsePubdateSearch($input); - $input = $this->parseDateSearch($input); $input = $this->parseTagsSeach($input); - $this->parseSearch($input); + + $input = $this->parseNotSearch($input); + $input = $this->parseSearch($input); } public function __toString() { @@ -48,6 +63,9 @@ class FreshRSS_Search { public function getIntitle() { return $this->intitle; } + public function getNotIntitle() { + return $this->not_intitle; + } public function getMinDate() { return $this->min_date; @@ -68,18 +86,30 @@ class FreshRSS_Search { public function getInurl() { return $this->inurl; } + public function getNotInurl() { + return $this->not_inurl; + } public function getAuthor() { return $this->author; } + public function getNotAuthor() { + return $this->not_author; + } public function getTags() { return $this->tags; } + public function getNotTags() { + return $this->not_tags; + } public function getSearch() { return $this->search; } + public function getNotSearch() { + return $this->not_search; + } private static function removeEmptyValues($anArray) { return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array(); @@ -94,11 +124,11 @@ class FreshRSS_Search { * @return string */ private function parseIntitleSearch($input) { - if (preg_match_all('/intitle:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + if (preg_match_all('/\bintitle:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { $this->intitle = $matches['search']; $input = str_replace($matches[0], '', $input); } - if (preg_match_all('/intitle:(?P\w*)/', $input, $matches)) { + if (preg_match_all('/\bintitle:(?P\w*)/', $input, $matches)) { $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']); $input = str_replace($matches[0], '', $input); } @@ -106,6 +136,19 @@ class FreshRSS_Search { return $input; } + private function parseNotIntitleSearch($input) { + if (preg_match_all('/[!-]intitle:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + $this->not_intitle = $matches['search']; + $input = str_replace($matches[0], '', $input); + } + if (preg_match_all('/[!-]intitle:(?P\w*)/', $input, $matches)) { + $this->not_intitle = array_merge($this->not_intitle ? $this->not_intitle : array(), $matches['search']); + $input = str_replace($matches[0], '', $input); + } + $this->not_intitle = self::removeEmptyValues($this->not_intitle); + return $input; + } + /** * Parse the search string to find author keyword and the search related * to it. @@ -117,11 +160,11 @@ class FreshRSS_Search { * @return string */ private function parseAuthorSearch($input) { - if (preg_match_all('/author:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + if (preg_match_all('/\bauthor:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { $this->author = $matches['search']; $input = str_replace($matches[0], '', $input); } - if (preg_match_all('/author:(?P\w*)/', $input, $matches)) { + if (preg_match_all('/\bauthor:(?P\w*)/', $input, $matches)) { $this->author = array_merge($this->author ? $this->author : array(), $matches['search']); $input = str_replace($matches[0], '', $input); } @@ -129,6 +172,19 @@ class FreshRSS_Search { return $input; } + private function parseNotAuthorSearch($input) { + if (preg_match_all('/[!-]author:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + $this->not_author = $matches['search']; + $input = str_replace($matches[0], '', $input); + } + if (preg_match_all('/[!-]author:(?P\w*)/', $input, $matches)) { + $this->not_author = array_merge($this->not_author ? $this->not_author : array(), $matches['search']); + $input = str_replace($matches[0], '', $input); + } + $this->not_author = self::removeEmptyValues($this->not_author); + return $input; + } + /** * Parse the search string to find inurl keyword and the search related * to it. @@ -138,7 +194,7 @@ class FreshRSS_Search { * @return string */ private function parseInurlSearch($input) { - if (preg_match_all('/inurl:(?P[^\s]*)/', $input, $matches)) { + if (preg_match_all('/\binurl:(?P[^\s]*)/', $input, $matches)) { $this->inurl = $matches['search']; $input = str_replace($matches[0], '', $input); } @@ -146,6 +202,15 @@ class FreshRSS_Search { return $input; } + private function parseNotInurlSearch($input) { + if (preg_match_all('/[!-]inurl:(?P[^\s]*)/', $input, $matches)) { + $this->not_inurl = $matches['search']; + $input = str_replace($matches[0], '', $input); + } + $this->not_inurl = self::removeEmptyValues($this->not_inurl); + return $input; + } + /** * Parse the search string to find date keyword and the search related * to it. @@ -155,7 +220,7 @@ class FreshRSS_Search { * @return string */ private function parseDateSearch($input) { - if (preg_match_all('/date:(?P[^\s]*)/', $input, $matches)) { + if (preg_match_all('/\bdate:(?P[^\s]*)/', $input, $matches)) { $input = str_replace($matches[0], '', $input); $dates = self::removeEmptyValues($matches['search']); if (!empty($dates[0])) { @@ -174,7 +239,7 @@ class FreshRSS_Search { * @return string */ private function parsePubdateSearch($input) { - if (preg_match_all('/pubdate:(?P[^\s]*)/', $input, $matches)) { + if (preg_match_all('/\bpubdate:(?P[^\s]*)/', $input, $matches)) { $input = str_replace($matches[0], '', $input); $dates = self::removeEmptyValues($matches['search']); if (!empty($dates[0])) { @@ -201,6 +266,15 @@ class FreshRSS_Search { return $input; } + private function parseNotTagsSeach($input) { + if (preg_match_all('/[!-]#(?P[^\s]+)/', $input, $matches)) { + $this->not_tags = $matches['search']; + $input = str_replace($matches[0], '', $input); + } + $this->not_tags = self::removeEmptyValues($this->not_tags); + return $input; + } + /** * Parse the search string to find search values. * Every word is a distinct search value, except when using a delimiter. @@ -211,7 +285,7 @@ class FreshRSS_Search { */ private function parseSearch($input) { $input = self::cleanSearch($input); - if (strcmp($input, '') == 0) { + if ($input == '') { return; } if (preg_match_all('/(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { @@ -219,7 +293,7 @@ class FreshRSS_Search { $input = str_replace($matches[0], '', $input); } $input = self::cleanSearch($input); - if (strcmp($input, '') == 0) { + if ($input == '') { return; } if (is_array($this->search)) { @@ -229,6 +303,26 @@ class FreshRSS_Search { } } + private function parseNotSearch($input) { + $input = self::cleanSearch($input); + if ($input == '') { + return; + } + if (preg_match_all('/[!-](?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + $this->not_search = $matches['search']; + $input = str_replace($matches[0], '', $input); + } + if ($input == '') { + return; + } + if (preg_match_all('/[!-](?P[^\s]+)/', $input, $matches)) { + $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : array(), $matches['search']); + $input = str_replace($matches[0], '', $input); + } + $this->not_search = self::removeEmptyValues($this->not_search); + return $input; + } + /** * Remove all unnecessary spaces in the search * -- cgit v1.2.3 From 3011bbc5e1360a6b01f3ea652eae509a0bb5f164 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 2 Apr 2017 11:55:20 +0200 Subject: Search allow double quotes `author:"some name"` --- app/Models/Search.php | 2 ++ 1 file changed, 2 insertions(+) (limited to 'app/Models/Search.php') diff --git a/app/Models/Search.php b/app/Models/Search.php index 2e89ea05b..5cc7f8e8d 100644 --- a/app/Models/Search.php +++ b/app/Models/Search.php @@ -35,6 +35,8 @@ class FreshRSS_Search { } $this->raw_input = $input; + $input = preg_replace('/:"(.*?)"/', ':"\1"', $input); + $input = $this->parseNotIntitleSearch($input); $input = $this->parseNotAuthorSearch($input); $input = $this->parseNotInurlSearch($input); -- cgit v1.2.3