summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alexandre Alapetite <alexandre@alapetite.fr> 2022-05-12 22:15:10 +0200
committerGravatar GitHub <noreply@github.com> 2022-05-12 22:15:10 +0200
commit4a87206f2898665e99953590536cedc6c5505f05 (patch)
tree398f53769048460071194d398c61c7e847f22d7e
parent9d1930d9adb4f56ae12209d3d01f4a1ed1af8503 (diff)
OPML export/import of some proprietary FreshRSS attributes (#4342)
* OPML export/import of some proprietary FreshRSS attributes #fix https://github.com/FreshRSS/FreshRSS/issues/4077 And one of the TODOs of https://github.com/FreshRSS/FreshRSS/pull/4220 XPath options, CSS Selector, and action filters * Bump library patch version * OPML namespace + documentation * Add example
-rw-r--r--app/Models/BooleanSearch.php5
-rw-r--r--app/Models/Feed.php12
-rw-r--r--app/Models/FeedDAO.php4
-rw-r--r--app/Models/FilterAction.php3
-rw-r--r--app/Services/ExportService.php4
-rw-r--r--app/Services/ImportService.php31
-rw-r--r--app/views/helpers/export/opml.phtml32
-rw-r--r--docs/en/developers/OPML.md74
-rw-r--r--lib/lib_opml.php31
9 files changed, 179 insertions, 17 deletions
diff --git a/app/Models/BooleanSearch.php b/app/Models/BooleanSearch.php
index e2f99d524..774e42073 100644
--- a/app/Models/BooleanSearch.php
+++ b/app/Models/BooleanSearch.php
@@ -5,6 +5,7 @@
*/
class FreshRSS_BooleanSearch {
+ /** @var string */
private $raw_input = '';
private $searches = array();
@@ -54,11 +55,11 @@ class FreshRSS_BooleanSearch {
return null;
}
- public function __toString() {
+ public function __toString(): string {
return $this->getRawInput();
}
- public function getRawInput() {
+ public function getRawInput(): string {
return $this->raw_input;
}
}
diff --git a/app/Models/Feed.php b/app/Models/Feed.php
index e0d26046b..b5b6fdfd8 100644
--- a/app/Models/Feed.php
+++ b/app/Models/Feed.php
@@ -245,7 +245,7 @@ class FreshRSS_Feed extends Minz_Model {
}
$this->url = $value;
}
- public function _kind($value) {
+ public function _kind(int $value) {
$this->kind = $value;
}
public function _category($value) {
@@ -567,8 +567,8 @@ class FreshRSS_Feed extends Minz_Model {
$feedSourceUrl = preg_replace('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $feedSourceUrl);
}
- // Same naming conventions than https://github.com/RSS-Bridge/rss-bridge/wiki/XPathAbstract
- // https://github.com/RSS-Bridge/rss-bridge/wiki/The-collectData-function
+ // Same naming conventions than https://rss-bridge.github.io/rss-bridge/Bridge_API/XPathAbstract.html
+ // https://rss-bridge.github.io/rss-bridge/Bridge_API/BridgeAbstract.html#collectdata
/** @var array<string,string> */
$xPathSettings = $this->attributes('xpath');
$xPathFeedTitle = $xPathSettings['feedTitle'] ?? '';
@@ -758,7 +758,8 @@ class FreshRSS_Feed extends Minz_Model {
}
}
- public function filtersAction(string $action) {
+ /** @return array<FreshRSS_BooleanSearch> */
+ public function filtersAction(string $action): array {
$action = trim($action);
if ($action == '') {
return array();
@@ -775,6 +776,9 @@ class FreshRSS_Feed extends Minz_Model {
return $filters;
}
+ /**
+ * @param array<string> $filters
+ */
public function _filtersAction(string $action, $filters) {
$action = trim($action);
if ($action == '' || !is_array($filters)) {
diff --git a/app/Models/FeedDAO.php b/app/Models/FeedDAO.php
index c4a0b1429..89e667813 100644
--- a/app/Models/FeedDAO.php
+++ b/app/Models/FeedDAO.php
@@ -104,6 +104,7 @@ class FreshRSS_FeedDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
'website' => $feed->website(),
'description' => $feed->description(),
'lastUpdate' => 0,
+ 'pathEntries' => $feed->pathEntries(),
'httpAuth' => $feed->httpAuth(),
'attributes' => $feed->attributes(),
);
@@ -384,6 +385,9 @@ SQL;
return false;
}
+ /**
+ * @return array<FreshRSS_Feed>
+ */
public function listByCategory(int $cat): array {
$sql = 'SELECT * FROM `_feed` WHERE category=?';
$stm = $this->pdo->prepare($sql);
diff --git a/app/Models/FilterAction.php b/app/Models/FilterAction.php
index 23a45d14e..394b573a4 100644
--- a/app/Models/FilterAction.php
+++ b/app/Models/FilterAction.php
@@ -2,6 +2,7 @@
class FreshRSS_FilterAction {
+ /** @var FreshRSS_BooleanSearch */
private $booleanSearch = null;
private $actions = null;
@@ -33,7 +34,7 @@ class FreshRSS_FilterAction {
'actions' => $this->actions,
);
}
- return '';
+ return [];
}
public static function fromJSON($json) {
diff --git a/app/Services/ExportService.php b/app/Services/ExportService.php
index 7069ccec1..a80f20ce4 100644
--- a/app/Services/ExportService.php
+++ b/app/Services/ExportService.php
@@ -19,6 +19,10 @@ class FreshRSS_Export_Service {
/** @var FreshRSS_TagDAO */
private $tag_dao;
+ const FRSS_NAMESPACE = 'https://freshrss.org/opml';
+ const TYPE_HTML_XPATH = 'HTML+XPath';
+ const TYPE_RSS_ATOM = 'rss';
+
/**
* Initialize the service for the given user.
*
diff --git a/app/Services/ImportService.php b/app/Services/ImportService.php
index 7d1bb1c7c..7e7cccfdb 100644
--- a/app/Services/ImportService.php
+++ b/app/Services/ImportService.php
@@ -148,6 +148,37 @@ class FreshRSS_Import_Service {
$feed->_website($website);
$feed->_description($description);
+ switch ($feed_elt['type'] ?? '') {
+ case FreshRSS_Export_Service::TYPE_HTML_XPATH:
+ $feed->_kind(FreshRSS_Feed::KIND_HTML_XPATH);
+ break;
+ case FreshRSS_Export_Service::TYPE_RSS_ATOM:
+ default:
+ $feed->_kind(FreshRSS_Feed::KIND_RSS);
+ break;
+ }
+
+ $xPathSettings = [];
+ foreach ($feed_elt as $key => $value) {
+ if (is_array($value) && !empty($value['value']) && ($value['namespace'] ?? '') === FreshRSS_Export_Service::FRSS_NAMESPACE) {
+ switch ($key) {
+ case 'cssFullContent': $feed->_pathEntries($value['value']); break;
+ case 'filtersActionRead': $feed->_filtersAction('read', preg_split('/[\n\r]+/', $value['value'])); break;
+ case 'xPathItem': $xPathSettings['item'] = $value['value']; break;
+ case 'xPathItemTitle': $xPathSettings['itemTitle'] = $value['value']; break;
+ case 'xPathItemContent': $xPathSettings['itemContent'] = $value['value']; break;
+ case 'xPathItemUri': $xPathSettings['itemUri'] = $value['value']; break;
+ case 'xPathItemAuthor': $xPathSettings['itemAuthor'] = $value['value']; break;
+ case 'xPathItemTimestamp': $xPathSettings['itemTimestamp'] = $value['value']; break;
+ case 'xPathItemThumbnail': $xPathSettings['itemThumbnail'] = $value['value']; break;
+ case 'xPathItemCategories': $xPathSettings['itemCategories'] = $value['value']; break;
+ }
+ }
+ }
+ if (!empty($xPathSettings)) {
+ $feed->_attributes('xpath', $xPathSettings);
+ }
+
// Call the extension hook
$feed = Minz_ExtensionManager::callHook('feed_before_insert', $feed);
if ($feed != null) {
diff --git a/app/views/helpers/export/opml.phtml b/app/views/helpers/export/opml.phtml
index 971dc1445..458ea0e58 100644
--- a/app/views/helpers/export/opml.phtml
+++ b/app/views/helpers/export/opml.phtml
@@ -15,14 +15,40 @@ foreach ($this->categories as $key => $cat) {
'@outlines' => array()
);
+ /** @var FreshRSS_Feed $feed */
foreach ($cat['feeds'] as $feed) {
- $opml_array['body'][$key]['@outlines'][] = array(
+ $outline = [
'text' => htmlspecialchars_decode($feed->name(), ENT_QUOTES),
- 'type' => 'rss',
+ 'type' => FreshRSS_Export_Service::TYPE_RSS_ATOM,
'xmlUrl' => htmlspecialchars_decode($feed->url(), ENT_QUOTES),
'htmlUrl' => htmlspecialchars_decode($feed->website(), ENT_QUOTES),
'description' => htmlspecialchars_decode($feed->description(), ENT_QUOTES),
- );
+ ];
+ if ($feed->kind() === FreshRSS_Feed::KIND_HTML_XPATH) {
+ $outline['type'] = FreshRSS_Export_Service::TYPE_HTML_XPATH;
+ /** @var array<string,string> */
+ $xPathSettings = $feed->attributes('xpath');
+ $outline['frss:xPathItem'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['item'] ?? null];
+ $outline['frss:xPathItemTitle'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemTitle'] ?? null];
+ $outline['frss:xPathItemContent'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemContent'] ?? null];
+ $outline['frss:xPathItemUri'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemUri'] ?? null];
+ $outline['frss:xPathItemAuthor'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemAuthor'] ?? null];
+ $outline['frss:xPathItemTimestamp'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemTimestamp'] ?? null];
+ $outline['frss:xPathItemThumbnail'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemThumbnail'] ?? null];
+ $outline['frss:xPathItemCategories'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $xPathSettings['itemCategories'] ?? null];
+ }
+ if (!empty($feed->filtersAction('read'))) {
+ $filters = '';
+ foreach ($feed->filtersAction('read') as $filterRead) {
+ $filters .= $filterRead->getRawInput() . "\n";
+ }
+ $filters = trim($filters);
+ $outline['frss:filtersActionRead'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $filters];
+ }
+ if ($feed->pathEntries() != '') {
+ $outline['frss:cssFullContent'] = ['namespace' => FreshRSS_Export_Service::FRSS_NAMESPACE, 'value' => $feed->pathEntries()];
+ }
+ $opml_array['body'][$key]['@outlines'][] = $outline;
}
}
diff --git a/docs/en/developers/OPML.md b/docs/en/developers/OPML.md
new file mode 100644
index 000000000..59a59a748
--- /dev/null
+++ b/docs/en/developers/OPML.md
@@ -0,0 +1,74 @@
+# OPML in FreshRSS
+
+FreshRSS supports the [OPML](https://en.wikipedia.org/wiki/OPML) format to export and import lists of RSS/Atom feeds in a standard way, compatible with several other RSS aggregators.
+
+However, FreshRSS also supports several additional features not covered by the basic OPML specification.
+Luckily, the [OPML specification](http://opml.org/spec2.opml) allows extensions:
+
+> *An OPML file may contain elements and attributes not described on this page, only if those elements are defined in a namespace.*
+
+and:
+
+> *OPML can also be extended by the addition of new values for the type attribute.*
+
+## FreshRSS OPML extension
+
+FreshRSS uses the XML namespace <https://freshrss.org/opml> to export/import extended information not covered by the basic OPML specification.
+
+The list of the custom FreshRSS attributes can be seen in [the source code](https://github.com/FreshRSS/FreshRSS/blob/edge/app/views/helpers/export/opml.phtml), and here is an overview:
+
+### HTML+XPath
+
+* `<outline type="HTML+XPath" ...`: Additional type of source, which is not RSS/Atom, but HTML Web Scraping using [XPath](https://www.w3.org/TR/xpath-10/) 1.0.
+
+> ℹ️ [XPath 1.0](https://en.wikipedia.org/wiki/XPath) is a standard query language, which FreshRSS supports to enable [Web scraping](https://en.wikipedia.org/wiki/Web_scraping).
+
+The following attributes are using similar naming conventions than [RSS-Bridge](https://rss-bridge.github.io/rss-bridge/Bridge_API/XPathAbstract.html).
+
+* `frss:xPathItem`: XPath expression for extracting the feed items from the source page.
+ * Example: `//div[@class="news-item"]`
+* `frss:xPathItemTitle`: XPath expression for extracting the feed title from the source page.
+ * Example: `descendant::h2`
+* `frss:xPathItemContent`: XPath expression for extracting an item’s content from the item context.
+ * Example: `.`
+* `frss:xPathItemUri`: XPath expression for extracting an item link from the item context.
+ * Example: `descendant::a/@href`
+* `frss:xPathItemAuthor`: XPath expression for extracting an item author from the item context.
+ * Example: `"Anonymous"`
+* `frss:xPathItemTimestamp`: XPath expression for extracting an item timestamp from the item context. The result will be parsed by [`strtotime()`](https://php.net/strtotime).
+* `frss:xPathItemThumbnail`: XPath expression for extracting an item’s thumbnail (image) URL from the item context.
+ * Example: `descendant::img/@src`
+* `frss:xPathItemCategories`: XPath expression for extracting a list of categories (tags) from the item context.
+
+### Miscellaneous
+
+* `frss:cssFullContent`: [CSS Selector](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors) to enable the download and extraction of the matching HTML section of each articles’ Web address.
+ * Example: `div.main`
+* `frss:filtersActionRead`: List (separated by a new line) of search queries to automatically mark a new article as read.
+
+### Example
+
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<opml version="2.0">
+ <head>
+ <title>FreshRSS OPML extension example</title>
+ </head>
+ <body>
+ <outline xmlns:frss="https://freshrss.org/opml"
+ text="Example"
+ type="HTML+XPath"
+ xmlUrl="https://www.example.net/page.html"
+ htmlUrl="https://www.example.net/page.html"
+ description="Example of Web scraping"
+ frss:xPathItem="//a[contains(@href, '/interesting/')]/ancestor::article"
+ frss:xPathItemTitle="descendant::h2"
+ frss:xPathItemContent="."
+ frss:xPathItemUri="descendant::a[string-length(@href)&gt;0]/@href"
+ frss:xPathItemThumbnail="descendant::img/@src"
+ frss:cssFullContent="article"
+ frss:filtersActionRead="intitle:⚡️ OR intitle:🔥&#10;something"
+ />
+ </body>
+</opml>
+```
diff --git a/lib/lib_opml.php b/lib/lib_opml.php
index b62f988c4..04b747a05 100644
--- a/lib/lib_opml.php
+++ b/lib/lib_opml.php
@@ -12,7 +12,7 @@
*
* @author Marien Fressinaud <dev@marienfressinaud.fr>
* @link https://github.com/marienfressinaud/lib_opml
- * @version 0.2-FreshRSS~1.5.1
+ * @version 0.2-FreshRSS~1.20.0
* @license public domain
*
* Usages:
@@ -91,8 +91,20 @@ function libopml_parse_outline($outline_xml, $strict = true) {
// An outline may contain any kind of attributes but "text" attribute is
// required !
$text_is_present = false;
- foreach ($outline_xml->attributes() as $key => $value) {
- $outline[$key] = (string)$value;
+
+ $elem = dom_import_simplexml($outline_xml);
+ /** @var DOMAttr $attr */
+ foreach ($elem->attributes as $attr) {
+ $key = $attr->localName;
+
+ if ($attr->namespaceURI == '') {
+ $outline[$key] = $attr->value;
+ } else {
+ $outline[$key] = [
+ 'namespace' => $attr->namespaceURI,
+ 'value' => $attr->value,
+ ];
+ }
if ($key === 'text') {
$text_is_present = true;
@@ -257,17 +269,22 @@ function libopml_render_outline($parent_elt, $outline, $strict) {
foreach ($value as $outline_child) {
libopml_render_outline($outline_elt, $outline_child, $strict);
}
- } elseif (is_array($value)) {
+ } elseif (is_array($value) && !isset($value['namespace'])) {
throw new LibOPML_Exception(
- 'Type of outline elements cannot be array: ' . $key
+ 'Type of outline elements cannot be array (except for providing a namespace): ' . $key
);
} else {
// Detect text attribute is present, that's good :)
if ($key === 'text') {
$text_is_present = true;
}
-
- $outline_elt->addAttribute($key, $value);
+ if (is_array($value)) {
+ if (!empty($value['namespace']) && !empty($value['value'])) {
+ $outline_elt->addAttribute($key, $value['value'], $value['namespace']);
+ }
+ } else {
+ $outline_elt->addAttribute($key, $value);
+ }
}
}