From af27b6d300723883f1110eee103eb892ddf1056d Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 15 Sep 2018 13:06:29 +0200 Subject: Tags split improvement (#2023) * Tags split improvement Some feeds use a single category with comma-separated tags. Better handling of tags containing a space * Handle spaces in searches with + Can now search in tags containing spaces * Fix searches with spaces for title and author --- app/Models/Entry.php | 17 +++++------------ app/Models/Feed.php | 18 +++++++++++++----- app/Models/Search.php | 37 +++++++++++++++++++++++++++++-------- 3 files changed, 47 insertions(+), 25 deletions(-) (limited to 'app/Models') diff --git a/app/Models/Entry.php b/app/Models/Entry.php index ccbad5724..48a0b1bed 100644 --- a/app/Models/Entry.php +++ b/app/Models/Entry.php @@ -32,7 +32,7 @@ class FreshRSS_Entry extends Minz_Model { $this->_isFavorite($is_favorite); $this->_feedId($feedId); $tags = mb_strcut($tags, 0, 1023, 'UTF-8'); - $this->_tags(preg_split('/[\s#]/', $tags)); + $this->_tags($tags); $this->_guid($guid); } @@ -86,9 +86,9 @@ class FreshRSS_Entry extends Minz_Model { return $this->feedId; } } - public function tags($inString = false) { - if ($inString) { - return empty($this->tags) ? '' : '#' . implode(' #', $this->tags); + public function tags($asString = false) { + if ($asString) { + return $this->tags == '' ? '' : '#' . implode(' #', $this->tags); } else { return $this->tags; } @@ -162,15 +162,8 @@ class FreshRSS_Entry extends Minz_Model { public function _tags($value) { $this->hash = null; if (!is_array($value)) { - $value = array($value); + $value = preg_split('/\s*[#,]\s*/', $value, -1, PREG_SPLIT_NO_EMPTY); } - - foreach ($value as $key => $t) { - if (!$t) { - unset($value[$key]); - } - } - $this->tags = $value; } diff --git a/app/Models/Feed.php b/app/Models/Feed.php index ed381a867..cc96cde44 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -345,13 +345,21 @@ class FreshRSS_Feed extends Minz_Model { $link = $item->get_permalink(); $date = @strtotime($item->get_date()); - // gestion des tags (catégorie == tag) - $tags_tmp = $item->get_categories(); + //Tag processing (tag == category) + $categories = $item->get_categories(); $tags = array(); - if ($tags_tmp !== null) { - foreach ($tags_tmp as $tag) { - $tags[] = html_only_entity_decode($tag->get_label()); + if (is_array($categories)) { + foreach ($categories as $category) { + $text = html_only_entity_decode($category->get_label()); + //Some feeds use a single category with comma-separated tags + $labels = explode(',', $text); + if (is_array($labels)) { + foreach ($labels as $label) { + $tags[] = trim($label); + } + } } + $tags = array_unique($tags); } $content = html_only_entity_decode($item->get_content()); diff --git a/app/Models/Search.php b/app/Models/Search.php index 5cc7f8e8d..c338d63a1 100644 --- a/app/Models/Search.php +++ b/app/Models/Search.php @@ -40,7 +40,7 @@ class FreshRSS_Search { $input = $this->parseNotIntitleSearch($input); $input = $this->parseNotAuthorSearch($input); $input = $this->parseNotInurlSearch($input); - $input = $this->parseNotTagsSeach($input); + $input = $this->parseNotTagsSearch($input); $input = $this->parsePubdateSearch($input); $input = $this->parseDateSearch($input); @@ -48,7 +48,7 @@ class FreshRSS_Search { $input = $this->parseIntitleSearch($input); $input = $this->parseAuthorSearch($input); $input = $this->parseInurlSearch($input); - $input = $this->parseTagsSeach($input); + $input = $this->parseTagsSearch($input); $input = $this->parseNotSearch($input); $input = $this->parseSearch($input); @@ -117,6 +117,17 @@ class FreshRSS_Search { return is_array($anArray) ? array_filter($anArray, function($value) { return $value !== ''; }) : array(); } + private static function decodeSpaces($value) { + if (is_array($value)) { + for ($i = count($value) - 1; $i >= 0; $i--) { + $value[$i] = self::decodeSpaces($value[$i]); + } + } else { + $value = trim(str_replace('+', ' ', $value)); + } + return $value; + } + /** * Parse the search string to find intitle keyword and the search related * to it. @@ -130,11 +141,12 @@ class FreshRSS_Search { $this->intitle = $matches['search']; $input = str_replace($matches[0], '', $input); } - if (preg_match_all('/\bintitle:(?P\w*)/', $input, $matches)) { + if (preg_match_all('/\bintitle:(?P[\w+]*)/', $input, $matches)) { $this->intitle = array_merge($this->intitle ? $this->intitle : array(), $matches['search']); $input = str_replace($matches[0], '', $input); } $this->intitle = self::removeEmptyValues($this->intitle); + $this->intitle = self::decodeSpaces($this->intitle); return $input; } @@ -143,11 +155,12 @@ class FreshRSS_Search { $this->not_intitle = $matches['search']; $input = str_replace($matches[0], '', $input); } - if (preg_match_all('/[!-]intitle:(?P\w*)/', $input, $matches)) { + if (preg_match_all('/[!-]intitle:(?P[\w+]*)/', $input, $matches)) { $this->not_intitle = array_merge($this->not_intitle ? $this->not_intitle : array(), $matches['search']); $input = str_replace($matches[0], '', $input); } $this->not_intitle = self::removeEmptyValues($this->not_intitle); + $this->not_intitle = self::decodeSpaces($this->not_intitle); return $input; } @@ -166,11 +179,12 @@ class FreshRSS_Search { $this->author = $matches['search']; $input = str_replace($matches[0], '', $input); } - if (preg_match_all('/\bauthor:(?P\w*)/', $input, $matches)) { + if (preg_match_all('/\bauthor:(?P[\w+]*)/', $input, $matches)) { $this->author = array_merge($this->author ? $this->author : array(), $matches['search']); $input = str_replace($matches[0], '', $input); } $this->author = self::removeEmptyValues($this->author); + $this->author = self::decodeSpaces($this->author); return $input; } @@ -179,11 +193,12 @@ class FreshRSS_Search { $this->not_author = $matches['search']; $input = str_replace($matches[0], '', $input); } - if (preg_match_all('/[!-]author:(?P\w*)/', $input, $matches)) { + if (preg_match_all('/[!-]author:(?P[\w+]*)/', $input, $matches)) { $this->not_author = array_merge($this->not_author ? $this->not_author : array(), $matches['search']); $input = str_replace($matches[0], '', $input); } $this->not_author = self::removeEmptyValues($this->not_author); + $this->not_author = self::decodeSpaces($this->not_author); return $input; } @@ -201,6 +216,7 @@ class FreshRSS_Search { $input = str_replace($matches[0], '', $input); } $this->inurl = self::removeEmptyValues($this->inurl); + $this->inurl = self::decodeSpaces($this->inurl); return $input; } @@ -210,6 +226,7 @@ class FreshRSS_Search { $input = str_replace($matches[0], '', $input); } $this->not_inurl = self::removeEmptyValues($this->not_inurl); + $this->not_inurl = self::decodeSpaces($this->not_inurl); return $input; } @@ -259,21 +276,23 @@ class FreshRSS_Search { * @param string $input * @return string */ - private function parseTagsSeach($input) { + private function parseTagsSearch($input) { if (preg_match_all('/#(?P[^\s]+)/', $input, $matches)) { $this->tags = $matches['search']; $input = str_replace($matches[0], '', $input); } $this->tags = self::removeEmptyValues($this->tags); + $this->tags = self::decodeSpaces($this->tags); return $input; } - private function parseNotTagsSeach($input) { + private function parseNotTagsSearch($input) { if (preg_match_all('/[!-]#(?P[^\s]+)/', $input, $matches)) { $this->not_tags = $matches['search']; $input = str_replace($matches[0], '', $input); } $this->not_tags = self::removeEmptyValues($this->not_tags); + $this->not_tags = self::decodeSpaces($this->not_tags); return $input; } @@ -303,6 +322,7 @@ class FreshRSS_Search { } else { $this->search = explode(' ', $input); } + $this->search = self::decodeSpaces($this->search); } private function parseNotSearch($input) { @@ -322,6 +342,7 @@ class FreshRSS_Search { $input = str_replace($matches[0], '', $input); } $this->not_search = self::removeEmptyValues($this->not_search); + $this->not_search = self::decodeSpaces($this->not_search); return $input; } -- cgit v1.2.3