From 1a552bd60eab4a4b940d3896376b599e155d7da0 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Fri, 6 Sep 2024 09:35:58 +0200 Subject: Regex search (#6706) * Regex search fix https://github.com/FreshRSS/FreshRSS/issues/3549 * Fix PHPStan * Fix escape * Fix ungreedy * Initial support for regex search in PostgreSQL and MySQL * Improvements, support MySQL * Fix multiline * Add support for SQLite * A few tests * Added author: and inurl: support, documentation * author example * Remove \b for now * Disable regex sanitization for now * Fix getInurlRegex * getNotInurlRegex * Quotes for inurl: * Fix test * Fix quoted tags + regex for tags https://github.com/FreshRSS/FreshRSS/issues/6761 * Fix wrong regex detection * Add MariaDB * Fix logic * Increase requirements for MySQL and MariaDB Check support for multiline mode in MySQL * Remove sanitizeRegexes() * Allow searching HTML code Allow searching for instance `/
/`
Fix https://github.com/FreshRSS/FreshRSS/issues/6775#issuecomment-2331769883

* Doc regex search HTML

* Fix Doctype
---
 app/Models/Entry.php | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

(limited to 'app/Models/Entry.php')

diff --git a/app/Models/Entry.php b/app/Models/Entry.php
index 4b331419b..415bc0235 100644
--- a/app/Models/Entry.php
+++ b/app/Models/Entry.php
@@ -631,27 +631,60 @@ HTML;
 						$ok &= stripos(implode(';', $this->authors), $author) !== false;
 					}
 				}
+				if ($ok && $filter->getAuthorRegex()) {
+					foreach ($filter->getAuthorRegex() as $author) {
+						$ok &= preg_match($author, implode("\n", $this->authors)) === 1;
+					}
+				}
 				if ($ok && $filter->getNotAuthor()) {
 					foreach ($filter->getNotAuthor() as $author) {
 						$ok &= stripos(implode(';', $this->authors), $author) === false;
 					}
 				}
+				if ($ok && $filter->getNotAuthorRegex()) {
+					foreach ($filter->getNotAuthorRegex() as $author) {
+						$ok &= preg_match($author, implode("\n", $this->authors)) === 0;
+					}
+				}
 				if ($ok && $filter->getIntitle()) {
 					foreach ($filter->getIntitle() as $title) {
 						$ok &= stripos($this->title, $title) !== false;
 					}
 				}
+				if ($ok && $filter->getIntitleRegex()) {
+					foreach ($filter->getIntitleRegex() as $title) {
+						$ok &= preg_match($title, $this->title) === 1;
+					}
+				}
 				if ($ok && $filter->getNotIntitle()) {
 					foreach ($filter->getNotIntitle() as $title) {
 						$ok &= stripos($this->title, $title) === false;
 					}
 				}
+				if ($ok && $filter->getNotIntitleRegex()) {
+					foreach ($filter->getNotIntitleRegex() as $title) {
+						$ok &= preg_match($title, $this->title) === 0;
+					}
+				}
 				if ($ok && $filter->getTags()) {
 					foreach ($filter->getTags() as $tag2) {
 						$found = false;
 						foreach ($this->tags as $tag1) {
 							if (strcasecmp($tag1, $tag2) === 0) {
 								$found = true;
+								break;
+							}
+						}
+						$ok &= $found;
+					}
+				}
+				if ($ok && $filter->getTagsRegex()) {
+					foreach ($filter->getTagsRegex() as $tag2) {
+						$found = false;
+						foreach ($this->tags as $tag1) {
+							if (preg_match($tag2, $tag1) === 1) {
+								$found = true;
+								break;
 							}
 						}
 						$ok &= $found;
@@ -663,6 +696,19 @@ HTML;
 						foreach ($this->tags as $tag1) {
 							if (strcasecmp($tag1, $tag2) === 0) {
 								$found = true;
+								break;
+							}
+						}
+						$ok &= !$found;
+					}
+				}
+				if ($ok && $filter->getNotTagsRegex()) {
+					foreach ($filter->getNotTagsRegex() as $tag2) {
+						$found = false;
+						foreach ($this->tags as $tag1) {
+							if (preg_match($tag2, $tag1) === 1) {
+								$found = true;
+								break;
 							}
 						}
 						$ok &= !$found;
@@ -673,11 +719,21 @@ HTML;
 						$ok &= stripos($this->link, $url) !== false;
 					}
 				}
+				if ($ok && $filter->getInurlRegex()) {
+					foreach ($filter->getInurlRegex() as $url) {
+						$ok &= preg_match($url, $this->link) === 1;
+					}
+				}
 				if ($ok && $filter->getNotInurl()) {
 					foreach ($filter->getNotInurl() as $url) {
 						$ok &= stripos($this->link, $url) === false;
 					}
 				}
+				if ($ok && $filter->getNotInurlRegex()) {
+					foreach ($filter->getNotInurlRegex() as $url) {
+						$ok &= preg_match($url, $this->link) === 0;
+					}
+				}
 				if ($ok && $filter->getSearch()) {
 					foreach ($filter->getSearch() as $needle) {
 						$ok &= (stripos($this->title, $needle) !== false || stripos($this->content, $needle) !== false);
@@ -688,6 +744,16 @@ HTML;
 						$ok &= (stripos($this->title, $needle) === false && stripos($this->content, $needle) === false);
 					}
 				}
+				if ($ok && $filter->getSearchRegex()) {
+					foreach ($filter->getSearchRegex() as $needle) {
+						$ok &= (preg_match($needle, $this->title) === 1 || preg_match($needle, $this->content) === 1);
+					}
+				}
+				if ($ok && $filter->getNotSearchRegex()) {
+					foreach ($filter->getNotSearchRegex() as $needle) {
+						$ok &= (preg_match($needle, $this->title) === 0 && preg_match($needle, $this->content) === 0);
+					}
+				}
 				if ($ok) {
 					return true;
 				}
-- 
cgit v1.2.3