From 1fe66ad020ca8f0560bb9c6e311852ed77228f78 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Mon, 28 Feb 2022 20:22:43 +0100 Subject: Implement Web scraping "HTML + XPath" (#4220) * More PHP type hints for Fever Follow-up of https://github.com/FreshRSS/FreshRSS/pull/4201 Related to https://github.com/FreshRSS/FreshRSS/issues/4200 * Detail * Draft * Progress * More draft * Fix thumbnail PHP type hint https://github.com/FreshRSS/FreshRSS/issues/4215 * More types * A bit more * Refactor FreshRSS_Entry::fromArray * Progress * Starts to work * Categories * Fonctional * Layout update * Fix relative URLs * Cache system * Forgotten files * Remove a debug line * Automatic form validation of XPath expressions * data-leave-validation * Fix reload action * Simpler examples * Fix column type for PostgreSQL * Enforce HTTP encoding * Readme * Fix get full content * target="_blank" * gitignore * htmlspecialchars_utf8 * Implement HTML And fix/revert `xml:base` support in SimplePie https://github.com/simplepie/simplepie/commit/e49c578817aa504d8d05cd7f33857aeda9d41908 * SimplePie upstream PR merged https://github.com/simplepie/simplepie/pull/723 --- app/Controllers/subscriptionController.php | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'app/Controllers/subscriptionController.php') diff --git a/app/Controllers/subscriptionController.php b/app/Controllers/subscriptionController.php index 7d2c58714..8fa468b8e 100644 --- a/app/Controllers/subscriptionController.php +++ b/app/Controllers/subscriptionController.php @@ -192,8 +192,26 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController { $feed->_filtersAction('read', preg_split('/[\n\r]+/', Minz_Request::param('filteractions_read', ''))); + $feed_kind = Minz_Request::param('feed_kind', FreshRSS_Feed::KIND_RSS); + if ($feed_kind == FreshRSS_Feed::KIND_HTML_XPATH) { + $xPathSettings = []; + if (Minz_Request::param('xPathFeedTitle', '') != '') $xPathSettings['feedTitle'] = Minz_Request::param('xPathFeedTitle', '', true); + if (Minz_Request::param('xPathItem', '') != '') $xPathSettings['item'] = Minz_Request::param('xPathItem', '', true); + if (Minz_Request::param('xPathItemTitle', '') != '') $xPathSettings['itemTitle'] = Minz_Request::param('xPathItemTitle', '', true); + if (Minz_Request::param('xPathItemContent', '') != '') $xPathSettings['itemContent'] = Minz_Request::param('xPathItemContent', '', true); + if (Minz_Request::param('xPathItemUri', '') != '') $xPathSettings['itemUri'] = Minz_Request::param('xPathItemUri', '', true); + if (Minz_Request::param('xPathItemAuthor', '') != '') $xPathSettings['itemAuthor'] = Minz_Request::param('xPathItemAuthor', '', true); + if (Minz_Request::param('xPathItemTimestamp', '') != '') $xPathSettings['itemTimestamp'] = Minz_Request::param('xPathItemTimestamp', '', true); + if (Minz_Request::param('xPathItemThumbnail', '') != '') $xPathSettings['itemThumbnail'] = Minz_Request::param('xPathItemThumbnail', '', true); + if (Minz_Request::param('xPathItemCategories', '') != '') $xPathSettings['itemCategories'] = Minz_Request::param('xPathItemCategories', '', true); + if (!empty($xPathSettings)) { + $feed->_attributes('xpath', $xPathSettings); + } + } + $values = array( 'name' => Minz_Request::param('name', ''), + 'kind' => $feed_kind, 'description' => sanitizeHTML(Minz_Request::param('description', '', true)), 'website' => checkUrl(Minz_Request::param('website', '')), 'url' => checkUrl(Minz_Request::param('url', '')), -- cgit v1.2.3