Refactor some cURL options and use CURLOPT_USERPWD (#6177)

* Refactor some cURL options and use CURLOPT_USERPWD fix https://github.com/FreshRSS/FreshRSS/issues/6176 * Fixes
author: Alexandre Alapetite <alexandre@alapetite.fr> 2024-03-10 23:04:17 +0100
committer: GitHub <noreply@github.com> 2024-03-10 23:04:17 +0100
commit: d0072b9fb73a6582c98c7b5a44daf2d6ca39636e (patch)
tree: bf1992ca2ca7c6ae65c5a63bdb66bf2f4b7850a6 /app/Models/Entry.php
parent: 01eaaed9bb2268bc1d0509ca4f33d4b075634c9a (diff)
1 files changed, 16 insertions, 13 deletions
diff --git a/app/Models/Entry.php b/app/Models/Entry.php
index c782f4c94..e5e859a1d 100644
--- a/app/Models/Entry.php
+++ b/app/Models/Entry.php
@@ -690,12 +690,18 @@ HTML;
 	}
 
 	/**
-	 * @param array<string,mixed> $attributes
+	 * @param string $url Overridden URL. Will default to the entry URL.
 	 * @throws Minz_Exception
 	 */
-	public static function getContentByParsing(string $url, string $path, array $attributes = [], int $maxRedirs = 3): string {
-		$cachePath = FreshRSS_Feed::cacheFilename($url, $attributes, FreshRSS_Feed::KIND_HTML_XPATH);
-		$html = httpGet($url, $cachePath, 'html', $attributes);
+	public function getContentByParsing(string $url = '', int $maxRedirs = 3): string {
+		$url = $url ?: htmlspecialchars_decode($this->link(), ENT_QUOTES);
+		$feed = $this->feed();
+		if ($url === '' || $feed === null || $feed->pathEntries() === '') {
+			return '';
+		}
+
+		$cachePath = $feed->cacheFilename($url . '#' . $feed->pathEntries());
+		$html = httpGet($url, $cachePath, 'html');
 		if (strlen($html) > 0) {
 			$doc = new DOMDocument();
 			$doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
@@ -709,7 +715,7 @@ HTML;
 						$refresh = preg_replace('/^[0-9.; ]*\s*(url\s*=)?\s*/i', '', trim($meta->getAttribute('content')));
 						$refresh = SimplePie_Misc::absolutize_url($refresh, $url);
 						if ($refresh != false && $refresh !== $url) {
-							return self::getContentByParsing($refresh, $path, $attributes, $maxRedirs - 1);
+							return $this->getContentByParsing($refresh, $maxRedirs - 1);
 						}
 					}
 				}
@@ -724,11 +730,12 @@ HTML;
 			}
 
 			$content = '';
-			$nodes = $xpath->query((new Gt\CssXPath\Translator($path))->asXPath());
+			$nodes = $xpath->query((new Gt\CssXPath\Translator($feed->pathEntries()))->asXPath());
 			if ($nodes != false) {
+				$path_entries_filter = $feed->attributeString('path_entries_filter');
 				foreach ($nodes as $node) {
-					if (!empty($attributes['path_entries_filter'])) {
-						$filterednodes = $xpath->query((new Gt\CssXPath\Translator($attributes['path_entries_filter']))->asXPath(), $node) ?: [];
+					if ($path_entries_filter != null) {
+						$filterednodes = $xpath->query((new Gt\CssXPath\Translator($path_entries_filter))->asXPath(), $node) ?: [];
 						foreach ($filterednodes as $filterednode) {
 							if ($filterednode->parentNode === null) {
 								continue;
@@ -760,11 +767,7 @@ HTML;
 			} else {
 				try {
 					// The article is not yet in the database, so let’s fetch it
-					$fullContent = self::getContentByParsing(
-						htmlspecialchars_decode($this->link(), ENT_QUOTES),
-						htmlspecialchars_decode($feed->pathEntries(), ENT_QUOTES),
-						$feed->attributes()
-					);
+					$fullContent = $this->getContentByParsing();
 					if ('' !== $fullContent) {
 						$fullContent = "<!-- FULLCONTENT start //-->{$fullContent}<!-- FULLCONTENT end //-->";
 						$originalContent = $this->originalContent();
author	Alexandre Alapetite <alexandre@alapetite.fr>	2024-03-10 23:04:17 +0100
committer	GitHub <noreply@github.com>	2024-03-10 23:04:17 +0100
commit	d0072b9fb73a6582c98c7b5a44daf2d6ca39636e (patch)
tree	bf1992ca2ca7c6ae65c5a63bdb66bf2f4b7850a6 /app/Models/Entry.php
parent	01eaaed9bb2268bc1d0509ca4f33d4b075634c9a (diff)