From aadba9fb707d2d2bc6c05a84bb3a823895531137 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 1 Apr 2017 23:41:19 +0200 Subject: Negative searches https://github.com/FreshRSS/FreshRSS/issues/1381 Possibility to exclude authors, titles, tags, urls, words by prefixing them by ! or - (like Google Search): * !intitle:unwanted * -intitle:unwanted * -author:unwanted * -#unwanted * -unwanted And one can use many of each and combine them with positive searches --- app/Models/EntryDAO.php | 72 +++++++++++++++++++++-------- app/Models/Search.php | 120 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 159 insertions(+), 33 deletions(-) (limited to 'app') diff --git a/app/Models/EntryDAO.php b/app/Models/EntryDAO.php index 510755a2f..7e836097a 100644 --- a/app/Models/EntryDAO.php +++ b/app/Models/EntryDAO.php @@ -630,24 +630,6 @@ class FreshRSS_EntryDAO extends Minz_ModelPdo implements FreshRSS_Searchable { $search .= 'AND ' . $alias . 'id >= ' . $date_min . '000000 '; } if ($filter) { - if ($filter->getIntitle()) { - foreach ($filter->getIntitle() as $title) { - $search .= 'AND ' . $alias . 'title LIKE ? '; - $values[] = "%{$title}%"; - } - } - if ($filter->getInurl()) { - foreach ($filter->getInurl() as $url) { - $search .= 'AND CONCAT(' . $alias . 'link, ' . $alias . 'guid) LIKE ? '; - $values[] = "%{$url}%"; - } - } - if ($filter->getAuthor()) { - foreach ($filter->getAuthor() as $author) { - $search .= 'AND ' . $alias . 'author LIKE ? '; - $values[] = "%{$author}%"; - } - } if ($filter->getMinDate()) { $search .= 'AND ' . $alias . 'id >= ? '; $values[] = "{$filter->getMinDate()}000000"; @@ -664,19 +646,69 @@ class FreshRSS_EntryDAO extends Minz_ModelPdo implements FreshRSS_Searchable { $search .= 'AND ' . $alias . 'date <= ? '; $values[] = $filter->getMaxPubdate(); } + + if ($filter->getAuthor()) { + foreach ($filter->getAuthor() as $author) { + $search .= 'AND ' . $alias . 'author LIKE ? '; + $values[] = "%{$author}%"; + } + } + if ($filter->getIntitle()) { + foreach ($filter->getIntitle() as $title) { + $search .= 'AND ' . $alias . 'title LIKE ? '; + $values[] = "%{$title}%"; + } + } if ($filter->getTags()) { foreach ($filter->getTags() as $tag) { $search .= 'AND ' . $alias . 'tags LIKE ? '; $values[] = "%{$tag}%"; } } + if ($filter->getInurl()) { + foreach ($filter->getInurl() as $url) { + $search .= 'AND CONCAT(' . $alias . 'link, ' . $alias . 'guid) LIKE ? '; + $values[] = "%{$url}%"; + } + } + + if ($filter->getNotAuthor()) { + foreach ($filter->getNotAuthor() as $author) { + $search .= 'AND (NOT ' . $alias . 'author LIKE ?) '; + $values[] = "%{$author}%"; + } + } + if ($filter->getNotIntitle()) { + foreach ($filter->getNotIntitle() as $title) { + $search .= 'AND (NOT ' . $alias . 'title LIKE ?) '; + $values[] = "%{$title}%"; + } + } + if ($filter->getNotTags()) { + foreach ($filter->getNotTags() as $tag) { + $search .= 'AND (NOT ' . $alias . 'tags LIKE ?) '; + $values[] = "%{$tag}%"; + } + } + if ($filter->getNotInurl()) { + foreach ($filter->getNotInurl() as $url) { + $search .= 'AND (NOT CONCAT(' . $alias . 'link, ' . $alias . 'guid) LIKE ?) '; + $values[] = "%{$url}%"; + } + } + if ($filter->getSearch()) { - $search_values = $filter->getSearch(); - foreach ($search_values as $search_value) { + foreach ($filter->getSearch() as $search_value) { $search .= 'AND ' . $this->sqlconcat($alias . 'title', $this->isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ? '; $values[] = "%{$search_value}%"; } } + if ($filter->getNotSearch()) { + foreach ($filter->getNotSearch() as $search_value) { + $search .= 'AND (NOT ' . $this->sqlconcat($alias . 'title', $this->isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ?) '; + $values[] = "%{$search_value}%"; + } + } } return array($values, $search); } diff --git a/app/Models/Search.php b/app/Models/Search.php index 7b801f40b..2e89ea05b 100644 --- a/app/Models/Search.php +++ b/app/Models/Search.php @@ -23,18 +23,33 @@ class FreshRSS_Search { private $tags; private $search; + private $not_intitle; + private $not_inurl; + private $not_author; + private $not_tags; + private $not_search; + public function __construct($input) { - if (strcmp($input, '') == 0) { + if ($input == '') { return; } $this->raw_input = $input; + + $input = $this->parseNotIntitleSearch($input); + $input = $this->parseNotAuthorSearch($input); + $input = $this->parseNotInurlSearch($input); + $input = $this->parseNotTagsSeach($input); + + $input = $this->parsePubdateSearch($input); + $input = $this->parseDateSearch($input); + $input = $this->parseIntitleSearch($input); $input = $this->parseAuthorSearch($input); $input = $this->parseInurlSearch($input); - $input = $this->parsePubdateSearch($input); - $input = $this->parseDateSearch($input); $input = $this->parseTagsSeach($input); - $this->parseSearch($input); + + $input = $this->parseNotSearch($input); + $input = $this->parseSearch($input); } public function __toString() { @@ -48,6 +63,9 @@ class FreshRSS_Search { public function getIntitle() { return $this->intitle; } + public function getNotIntitle() { + return $this->not_intitle; + } public function getMinDate() { return $this->min_date; @@ -68,18 +86,30 @@ class FreshRSS_Search { public function getInurl() { return $this->inurl; } + public function getNotInurl() { + return $this->not_inurl; + } public function getAuthor() { return $this->author; } + public function getNotAuthor() { + return $this->not_author; + } public function getTags() { return $this->tags; } + public function getNotTags() { + return $this->not_tags; + } public function getSearch() { return $this->search; } + public function getNotSearch() { + return $this->not_search; + } private static function removeEmptyValues($anArray) { return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array(); @@ -94,11 +124,11 @@ class FreshRSS_Search { * @return string */ private function parseIntitleSearch($input) { - if (preg_match_all('/intitle:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + if (preg_match_all('/\bintitle:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { $this->intitle = $matches['search']; $input = str_replace($matches[0], '', $input); } - if (preg_match_all('/intitle:(?P\w*)/', $input, $matches)) { + if (preg_match_all('/\bintitle:(?P\w*)/', $input, $matches)) { $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']); $input = str_replace($matches[0], '', $input); } @@ -106,6 +136,19 @@ class FreshRSS_Search { return $input; } + private function parseNotIntitleSearch($input) { + if (preg_match_all('/[!-]intitle:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + $this->not_intitle = $matches['search']; + $input = str_replace($matches[0], '', $input); + } + if (preg_match_all('/[!-]intitle:(?P\w*)/', $input, $matches)) { + $this->not_intitle = array_merge($this->not_intitle ? $this->not_intitle : array(), $matches['search']); + $input = str_replace($matches[0], '', $input); + } + $this->not_intitle = self::removeEmptyValues($this->not_intitle); + return $input; + } + /** * Parse the search string to find author keyword and the search related * to it. @@ -117,11 +160,11 @@ class FreshRSS_Search { * @return string */ private function parseAuthorSearch($input) { - if (preg_match_all('/author:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + if (preg_match_all('/\bauthor:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { $this->author = $matches['search']; $input = str_replace($matches[0], '', $input); } - if (preg_match_all('/author:(?P\w*)/', $input, $matches)) { + if (preg_match_all('/\bauthor:(?P\w*)/', $input, $matches)) { $this->author = array_merge($this->author ? $this->author : array(), $matches['search']); $input = str_replace($matches[0], '', $input); } @@ -129,6 +172,19 @@ class FreshRSS_Search { return $input; } + private function parseNotAuthorSearch($input) { + if (preg_match_all('/[!-]author:(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + $this->not_author = $matches['search']; + $input = str_replace($matches[0], '', $input); + } + if (preg_match_all('/[!-]author:(?P\w*)/', $input, $matches)) { + $this->not_author = array_merge($this->not_author ? $this->not_author : array(), $matches['search']); + $input = str_replace($matches[0], '', $input); + } + $this->not_author = self::removeEmptyValues($this->not_author); + return $input; + } + /** * Parse the search string to find inurl keyword and the search related * to it. @@ -138,7 +194,7 @@ class FreshRSS_Search { * @return string */ private function parseInurlSearch($input) { - if (preg_match_all('/inurl:(?P[^\s]*)/', $input, $matches)) { + if (preg_match_all('/\binurl:(?P[^\s]*)/', $input, $matches)) { $this->inurl = $matches['search']; $input = str_replace($matches[0], '', $input); } @@ -146,6 +202,15 @@ class FreshRSS_Search { return $input; } + private function parseNotInurlSearch($input) { + if (preg_match_all('/[!-]inurl:(?P[^\s]*)/', $input, $matches)) { + $this->not_inurl = $matches['search']; + $input = str_replace($matches[0], '', $input); + } + $this->not_inurl = self::removeEmptyValues($this->not_inurl); + return $input; + } + /** * Parse the search string to find date keyword and the search related * to it. @@ -155,7 +220,7 @@ class FreshRSS_Search { * @return string */ private function parseDateSearch($input) { - if (preg_match_all('/date:(?P[^\s]*)/', $input, $matches)) { + if (preg_match_all('/\bdate:(?P[^\s]*)/', $input, $matches)) { $input = str_replace($matches[0], '', $input); $dates = self::removeEmptyValues($matches['search']); if (!empty($dates[0])) { @@ -174,7 +239,7 @@ class FreshRSS_Search { * @return string */ private function parsePubdateSearch($input) { - if (preg_match_all('/pubdate:(?P[^\s]*)/', $input, $matches)) { + if (preg_match_all('/\bpubdate:(?P[^\s]*)/', $input, $matches)) { $input = str_replace($matches[0], '', $input); $dates = self::removeEmptyValues($matches['search']); if (!empty($dates[0])) { @@ -201,6 +266,15 @@ class FreshRSS_Search { return $input; } + private function parseNotTagsSeach($input) { + if (preg_match_all('/[!-]#(?P[^\s]+)/', $input, $matches)) { + $this->not_tags = $matches['search']; + $input = str_replace($matches[0], '', $input); + } + $this->not_tags = self::removeEmptyValues($this->not_tags); + return $input; + } + /** * Parse the search string to find search values. * Every word is a distinct search value, except when using a delimiter. @@ -211,7 +285,7 @@ class FreshRSS_Search { */ private function parseSearch($input) { $input = self::cleanSearch($input); - if (strcmp($input, '') == 0) { + if ($input == '') { return; } if (preg_match_all('/(?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { @@ -219,7 +293,7 @@ class FreshRSS_Search { $input = str_replace($matches[0], '', $input); } $input = self::cleanSearch($input); - if (strcmp($input, '') == 0) { + if ($input == '') { return; } if (is_array($this->search)) { @@ -229,6 +303,26 @@ class FreshRSS_Search { } } + private function parseNotSearch($input) { + $input = self::cleanSearch($input); + if ($input == '') { + return; + } + if (preg_match_all('/[!-](?P[\'"])(?P.*)(?P=delim)/U', $input, $matches)) { + $this->not_search = $matches['search']; + $input = str_replace($matches[0], '', $input); + } + if ($input == '') { + return; + } + if (preg_match_all('/[!-](?P[^\s]+)/', $input, $matches)) { + $this->not_search = array_merge(is_array($this->not_search) ? $this->not_search : array(), $matches['search']); + $input = str_replace($matches[0], '', $input); + } + $this->not_search = self::removeEmptyValues($this->not_search); + return $input; + } + /** * Remove all unnecessary spaces in the search * -- cgit v1.2.3