From d6fd78b96837969dc36ff852a3288fd45f8f9e61 Mon Sep 17 00:00:00 2001 From: Artur Weigandt Date: Mon, 16 May 2022 12:04:43 +0200 Subject: Allow CssXPath updates with composer (#4368) * Install CssXPath with composer * Fix code style in tests * fix pathnames for tests and linting * add irrelevant files to .gitignore * Alphabetic order * let composer sort the packages alphabetically Co-authored-by: Alexandre Alapetite --- lib/.gitignore | 6 + lib/CssXPath/CssXPathException.php | 6 - lib/CssXPath/LICENSE | 21 -- lib/CssXPath/NotYetImplementedException.php | 4 - lib/CssXPath/README.md | 53 ---- lib/CssXPath/Translator.php | 323 --------------------- lib/composer.json | 2 + lib/lib_rss.php | 7 +- lib/phpgt/cssxpath/LICENSE | 21 ++ lib/phpgt/cssxpath/README.md | 53 ++++ lib/phpgt/cssxpath/src/CssXPathException.php | 6 + .../cssxpath/src/NotYetImplementedException.php | 4 + lib/phpgt/cssxpath/src/Translator.php | 323 +++++++++++++++++++++ 13 files changed, 420 insertions(+), 409 deletions(-) delete mode 100644 lib/CssXPath/CssXPathException.php delete mode 100644 lib/CssXPath/LICENSE delete mode 100644 lib/CssXPath/NotYetImplementedException.php delete mode 100644 lib/CssXPath/README.md delete mode 100644 lib/CssXPath/Translator.php create mode 100644 lib/phpgt/cssxpath/LICENSE create mode 100644 lib/phpgt/cssxpath/README.md create mode 100644 lib/phpgt/cssxpath/src/CssXPathException.php create mode 100644 lib/phpgt/cssxpath/src/NotYetImplementedException.php create mode 100644 lib/phpgt/cssxpath/src/Translator.php (limited to 'lib') diff --git a/lib/.gitignore b/lib/.gitignore index f1e6fdd7e..b599767a7 100644 --- a/lib/.gitignore +++ b/lib/.gitignore @@ -1,6 +1,12 @@ autoload.php composer.lock composer/ +phpgt/cssxpath/.github/ +phpgt/cssxpath/.gitignore +phpgt/cssxpath/.scrutinizer.yml +phpgt/cssxpath/composer.json +phpgt/cssxpath/CONTRIBUTING.md +phpgt/cssxpath/test/ phpmailer/phpmailer/*oauth* phpmailer/phpmailer/COMMITMENT* phpmailer/phpmailer/composer.* diff --git a/lib/CssXPath/CssXPathException.php b/lib/CssXPath/CssXPathException.php deleted file mode 100644 index bd7798243..000000000 --- a/lib/CssXPath/CssXPathException.php +++ /dev/null @@ -1,6 +0,0 @@ - - Build status - - - Code quality - - - Code coverage - - - Current version - - - PHP.Gt/CssXPath documentation - - -Example usage -------------- - - -```php -use Gt\CssXPath\Translator; - -$html = << - - - - -HTML; - -$document = new DOMDocument(); -$document->loadHTML($html); - -$xpath = new DOMXPath($document); -$inputElementList = $xpath->query(new Translator("form>label>input"); -``` - -[qsa]: https://developer.mozilla.org/en-US/docs/Web/API/Document/querySelectorAll -[gt-dom]: https://www.php.gt/dom diff --git a/lib/CssXPath/Translator.php b/lib/CssXPath/Translator.php deleted file mode 100644 index 140909099..000000000 --- a/lib/CssXPath/Translator.php +++ /dev/null @@ -1,323 +0,0 @@ -\*)' - . '|(:(?P[\w-]*))' - . '|\(*(?P["\']*[\w\h-]*["\']*)\)' - . '|(?P[\w-]*)' - . '|(?P\s*>\s*)' - . '|(#(?P[\w-]*))' - . '|(\.(?P[\w-]*))' - . '|(?P\s*\+\s*)' - . "|(\[(?P[\w-]*)((?P[=~$]+)(?P(.+\[\]'?)|[^\]]+))*\])+" - . '|(?P\s+)' - . '/'; - - const EQUALS_EXACT = "="; - const EQUALS_CONTAINS_WORD = "~="; - const EQUALS_ENDS_WITH = "$="; - const EQUALS_CONTAINS = "*="; - const EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED = "|="; - const EQUALS_STARTS_WITH = "^="; - - /** @var string */ - protected $cssSelector; - /** @var string */ - protected $prefix; - - public function __construct(string $cssSelector, string $prefix = ".//") { - $this->cssSelector = $cssSelector; - $this->prefix = $prefix; - } - - public function __toString():string { - return $this->asXPath(); - } - - public function asXPath():string { - return $this->convert($this->cssSelector); - } - - protected function convert(string $css):string { - $cssArray = preg_split( - '/(["\']).*?\1(*SKIP)(*F)|,/', - $css - ); - $xPathArray = []; - - foreach($cssArray as $input) { - $output = $this->convertSingleSelector(trim($input)); - $xPathArray []= $output; - } - - return implode(" | ", $xPathArray); - } - - protected function convertSingleSelector(string $css):string { - $thread = $this->preg_match_collated(self::cssRegex, $css); - $thread = array_values($thread); - - $xpath = [$this->prefix]; - $prevType = ""; - foreach($thread as $threadKey => $currentThreadItem) { - $next = isset($thread[$threadKey + 1]) - ? $thread[$threadKey + 1] - : false; - - switch ($currentThreadItem["type"]) { - case "star": - case "element": - $xpath []= $currentThreadItem['content']; - break; - - case "pseudo": - $specifier = ""; - if ($next && $next["type"] == "pseudospecifier") { - $specifier = "{$next['content']}"; - } - - switch ($currentThreadItem["content"]) { - case "disabled": - case "checked": - case "selected": - array_push( - $xpath, - "[@{$currentThreadItem['content']}]" - ); - break; - - case "text": - array_push( - $xpath, - '[@type="text"]' - ); - break; - - case "contains": - if(empty($specifier)) { - continue 3; - } - - array_push( - $xpath, - "[contains(text(),$specifier)]" - ); - break; - - case "first-child": - $prev = count($xpath) - 1; - $xpath[$prev] = '*[1]/self::' . $xpath[$prev]; - break; - - case "nth-child": - if (empty($specifier)) { - continue 3; - } - - $prev = count($xpath) - 1; - $previous = $xpath[$prev]; - - if (substr($previous, -1, 1) === "]") { - $xpath[$prev] = str_replace( - "]", - " and position() = $specifier]", - $xpath[$prev] - ); - } - else { - array_push( - $xpath, - "[$specifier]" - ); - } - break; - case "nth-of-type": - if (empty($specifier)) { - continue 3; - } - - $prev = count($xpath) - 1; - $previous = $xpath[$prev]; - - if(substr($previous, -1, 1) === "]") { - array_push( - $xpath, - "[$specifier]" - ); - } - else { - array_push( - $xpath, - "[$specifier]" - ); - } - break; - } - break; - - case "child": - array_push($xpath, "/"); - break; - - case "id": - array_push( - $xpath, - ($prevType != "element" ? '*' : '') - . "[@id='{$currentThreadItem['content']}']" - ); - break; - - case "class": - // https://devhints.io/xpath#class-check - array_push( - $xpath, - (($prevType != "element" && $prevType != "class") ? '*' : '') - . "[contains(concat(' ',normalize-space(@class),' '),' {$currentThreadItem['content']} ')]" - ); - break; - - case "sibling": - array_push( - $xpath, - "/following-sibling::*[1]/self::" - ); - break; - - case "attribute": - if(!$prevType) { - array_push($xpath, "*"); - } - - /** @var null|array> $detail */ - $detail = $currentThreadItem["detail"] ?? null; - $detailType = $detail[0] ?? null; - $detailValue = $detail[1] ?? null; - - if(!$detailType - || $detailType["type"] !== "attribute_equals") { - array_push( - $xpath, - "[@{$currentThreadItem['content']}]" - ); - continue 2; - } - - $valueString = trim( - $detailValue["content"], - " '\"" - ); - - $equalsType = $detailType["content"]; - switch ($equalsType) { - case self::EQUALS_EXACT: - array_push( - $xpath, - "[@{$currentThreadItem['content']}=\"{$valueString}\"]" - ); - break; - - case self::EQUALS_CONTAINS: - throw new NotYetImplementedException(); - - case self::EQUALS_CONTAINS_WORD: - array_push( - $xpath, - "[" - . "contains(" - . "concat(\" \",@{$currentThreadItem['content']},\" \")," - . "concat(\" \",\"{$valueString}\",\" \")" - . ")" - . "]" - ); - break; - - case self::EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED: - throw new NotYetImplementedException(); - - case self::EQUALS_STARTS_WITH: - throw new NotYetImplementedException(); - - case self::EQUALS_ENDS_WITH: - array_push( - $xpath, - "[" - . "substring(" - . "@{$currentThreadItem['content']}," - . "string-length(@{$currentThreadItem['content']}) - " - . "string-length(\"{$valueString}\") + 1)" - . "=\"{$valueString}\"" - . "]" - ); - break; - } - break; - - case "descendant": - array_push($xpath, "//"); - break; - } - - $prevType = $currentThreadItem["type"]; - } - - return implode("", $xpath); - } - - /** @return array> */ - protected function preg_match_collated( - string $regex, - string $string, - callable $transform = null - ):array { - preg_match_all( - $regex, - $string, - $matches, - PREG_PATTERN_ORDER - ); - - $set = []; - foreach($matches[0] as $k => $v) { - if(!empty($v)) { - $set[$k] = null; - } - } - - foreach($matches as $k => $m) { - if(is_numeric($k)) { - continue; - } - - foreach($m as $i => $match) { - if($match === "") { - continue; - } - - $toSet = null; - - if($transform) { - $toSet = $transform($k, $match); - } - else { - $toSet = ["type" => $k, "content" => $match]; - } - - if(!isset($set[$i])) { - $set[$i] = $toSet; - } - else { - if(!isset($set[$i]["detail"])) { - $set[$i]["detail"] = []; - } - - array_push($set[$i]["detail"], $toSet); - } - } - } - - return $set; - } -} diff --git a/lib/composer.json b/lib/composer.json index 3d307f6e8..6fdc90ef4 100644 --- a/lib/composer.json +++ b/lib/composer.json @@ -6,9 +6,11 @@ "license": "AGPL-3.0", "require": { "php": ">=7.0.0", + "phpgt/cssxpath": "v1.1.4", "phpmailer/phpmailer": "6.6.0" }, "config": { + "sort-packages": true, "vendor-dir": "./" }, "scripts": { diff --git a/lib/lib_rss.php b/lib/lib_rss.php index a4a9df36a..96a9cb2a8 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -52,8 +52,11 @@ function classAutoloader($class) { include(LIB_PATH . '/' . str_replace('_', '/', $class) . '.php'); } elseif (strpos($class, 'SimplePie') === 0) { include(LIB_PATH . '/SimplePie/' . str_replace('_', '/', $class) . '.php'); - } elseif (strpos($class, 'CssXPath') !== false) { - include(LIB_PATH . '/CssXPath/' . basename(str_replace('\\', '/', $class)) . '.php'); + } elseif (str_starts_with($class, 'Gt\\CssXPath\\')) { + $prefix = 'Gt\\CssXPath\\'; + $base_dir = LIB_PATH . '/phpgt/cssxpath/src/'; + $relative_class_name = substr($class, strlen($prefix)); + require $base_dir . str_replace('\\', '/', $relative_class_name) . '.php'; } elseif (str_starts_with($class, 'PHPMailer\\PHPMailer\\')) { $prefix = 'PHPMailer\\PHPMailer\\'; $base_dir = LIB_PATH . '/phpmailer/phpmailer/src/'; diff --git a/lib/phpgt/cssxpath/LICENSE b/lib/phpgt/cssxpath/LICENSE new file mode 100644 index 000000000..12b514c97 --- /dev/null +++ b/lib/phpgt/cssxpath/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright © PHP.Gt contributors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lib/phpgt/cssxpath/README.md b/lib/phpgt/cssxpath/README.md new file mode 100644 index 000000000..9082c8bbf --- /dev/null +++ b/lib/phpgt/cssxpath/README.md @@ -0,0 +1,53 @@ +Translate CSS selectors to XPath queries. +========================================= + +A lightweight and dependency free CSS to XPath translator. This repository is used to bring modern DOM functionality like [`querySelectorAll()`][qsa] to PHP in the [PHP.Gt/Dom][gt-dom] project. + +*** + + + Build status + + + Code quality + + + Code coverage + + + Current version + + + PHP.Gt/CssXPath documentation + + +Example usage +------------- + + +```php +use Gt\CssXPath\Translator; + +$html = << + + + + +HTML; + +$document = new DOMDocument(); +$document->loadHTML($html); + +$xpath = new DOMXPath($document); +$inputElementList = $xpath->query(new Translator("form>label>input"); +``` + +[qsa]: https://developer.mozilla.org/en-US/docs/Web/API/Document/querySelectorAll +[gt-dom]: https://www.php.gt/dom diff --git a/lib/phpgt/cssxpath/src/CssXPathException.php b/lib/phpgt/cssxpath/src/CssXPathException.php new file mode 100644 index 000000000..81ad3ac9d --- /dev/null +++ b/lib/phpgt/cssxpath/src/CssXPathException.php @@ -0,0 +1,6 @@ +\*)' + . '|(:(?P[\w-]*))' + . '|\(*(?P["\']*[\w\h-]*["\']*)\)' + . '|(?P[\w-]*)' + . '|(?P\s*>\s*)' + . '|(#(?P[\w-]*))' + . '|(\.(?P[\w-]*))' + . '|(?P\s*\+\s*)' + . "|(\[(?P[\w-]*)((?P[=~$]+)(?P(.+\[\]'?)|[^\]]+))*\])+" + . '|(?P\s+)' + . '/'; + + const EQUALS_EXACT = "="; + const EQUALS_CONTAINS_WORD = "~="; + const EQUALS_ENDS_WITH = "$="; + const EQUALS_CONTAINS = "*="; + const EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED = "|="; + const EQUALS_STARTS_WITH = "^="; + + /** @var string */ + protected $cssSelector; + /** @var string */ + protected $prefix; + + public function __construct(string $cssSelector, string $prefix = ".//") { + $this->cssSelector = $cssSelector; + $this->prefix = $prefix; + } + + public function __toString():string { + return $this->asXPath(); + } + + public function asXPath():string { + return $this->convert($this->cssSelector); + } + + protected function convert(string $css):string { + $cssArray = preg_split( + '/(["\']).*?\1(*SKIP)(*F)|,/', + $css + ); + $xPathArray = []; + + foreach($cssArray as $input) { + $output = $this->convertSingleSelector(trim($input)); + $xPathArray []= $output; + } + + return implode(" | ", $xPathArray); + } + + protected function convertSingleSelector(string $css):string { + $thread = $this->preg_match_collated(self::cssRegex, $css); + $thread = array_values($thread); + + $xpath = [$this->prefix]; + $prevType = ""; + foreach($thread as $threadKey => $currentThreadItem) { + $next = isset($thread[$threadKey + 1]) + ? $thread[$threadKey + 1] + : false; + + switch ($currentThreadItem["type"]) { + case "star": + case "element": + $xpath []= $currentThreadItem['content']; + break; + + case "pseudo": + $specifier = ""; + if ($next && $next["type"] == "pseudospecifier") { + $specifier = "{$next['content']}"; + } + + switch ($currentThreadItem["content"]) { + case "disabled": + case "checked": + case "selected": + array_push( + $xpath, + "[@{$currentThreadItem['content']}]" + ); + break; + + case "text": + array_push( + $xpath, + '[@type="text"]' + ); + break; + + case "contains": + if(empty($specifier)) { + continue 3; + } + + array_push( + $xpath, + "[contains(text(),$specifier)]" + ); + break; + + case "first-child": + $prev = count($xpath) - 1; + $xpath[$prev] = '*[1]/self::' . $xpath[$prev]; + break; + + case "nth-child": + if (empty($specifier)) { + continue 3; + } + + $prev = count($xpath) - 1; + $previous = $xpath[$prev]; + + if (substr($previous, -1, 1) === "]") { + $xpath[$prev] = str_replace( + "]", + " and position() = $specifier]", + $xpath[$prev] + ); + } + else { + array_push( + $xpath, + "[$specifier]" + ); + } + break; + case "nth-of-type": + if (empty($specifier)) { + continue 3; + } + + $prev = count($xpath) - 1; + $previous = $xpath[$prev]; + + if(substr($previous, -1, 1) === "]") { + array_push( + $xpath, + "[$specifier]" + ); + } + else { + array_push( + $xpath, + "[$specifier]" + ); + } + break; + } + break; + + case "child": + array_push($xpath, "/"); + break; + + case "id": + array_push( + $xpath, + ($prevType != "element" ? '*' : '') + . "[@id='{$currentThreadItem['content']}']" + ); + break; + + case "class": + // https://devhints.io/xpath#class-check + array_push( + $xpath, + (($prevType != "element" && $prevType != "class") ? '*' : '') + . "[contains(concat(' ',normalize-space(@class),' '),' {$currentThreadItem['content']} ')]" + ); + break; + + case "sibling": + array_push( + $xpath, + "/following-sibling::*[1]/self::" + ); + break; + + case "attribute": + if(!$prevType) { + array_push($xpath, "*"); + } + + /** @var null|array> $detail */ + $detail = $currentThreadItem["detail"] ?? null; + $detailType = $detail[0] ?? null; + $detailValue = $detail[1] ?? null; + + if(!$detailType + || $detailType["type"] !== "attribute_equals") { + array_push( + $xpath, + "[@{$currentThreadItem['content']}]" + ); + continue 2; + } + + $valueString = trim( + $detailValue["content"], + " '\"" + ); + + $equalsType = $detailType["content"]; + switch ($equalsType) { + case self::EQUALS_EXACT: + array_push( + $xpath, + "[@{$currentThreadItem['content']}=\"{$valueString}\"]" + ); + break; + + case self::EQUALS_CONTAINS: + throw new NotYetImplementedException(); + + case self::EQUALS_CONTAINS_WORD: + array_push( + $xpath, + "[" + . "contains(" + . "concat(\" \",@{$currentThreadItem['content']},\" \")," + . "concat(\" \",\"{$valueString}\",\" \")" + . ")" + . "]" + ); + break; + + case self::EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED: + throw new NotYetImplementedException(); + + case self::EQUALS_STARTS_WITH: + throw new NotYetImplementedException(); + + case self::EQUALS_ENDS_WITH: + array_push( + $xpath, + "[" + . "substring(" + . "@{$currentThreadItem['content']}," + . "string-length(@{$currentThreadItem['content']}) - " + . "string-length(\"{$valueString}\") + 1)" + . "=\"{$valueString}\"" + . "]" + ); + break; + } + break; + + case "descendant": + array_push($xpath, "//"); + break; + } + + $prevType = $currentThreadItem["type"]; + } + + return implode("", $xpath); + } + + /** @return array> */ + protected function preg_match_collated( + string $regex, + string $string, + callable $transform = null + ):array { + preg_match_all( + $regex, + $string, + $matches, + PREG_PATTERN_ORDER + ); + + $set = []; + foreach($matches[0] as $k => $v) { + if(!empty($v)) { + $set[$k] = null; + } + } + + foreach($matches as $k => $m) { + if(is_numeric($k)) { + continue; + } + + foreach($m as $i => $match) { + if($match === "") { + continue; + } + + $toSet = null; + + if($transform) { + $toSet = $transform($k, $match); + } + else { + $toSet = ["type" => $k, "content" => $match]; + } + + if(!isset($set[$i])) { + $set[$i] = $toSet; + } + else { + if(!isset($set[$i]["detail"])) { + $set[$i]["detail"] = []; + } + + array_push($set[$i]["detail"], $toSet); + } + } + } + + return $set; + } +} -- cgit v1.2.3