From d6fd78b96837969dc36ff852a3288fd45f8f9e61 Mon Sep 17 00:00:00 2001 From: Artur Weigandt Date: Mon, 16 May 2022 12:04:43 +0200 Subject: Allow CssXPath updates with composer (#4368) * Install CssXPath with composer * Fix code style in tests * fix pathnames for tests and linting * add irrelevant files to .gitignore * Alphabetic order * let composer sort the packages alphabetically Co-authored-by: Alexandre Alapetite --- lib/phpgt/cssxpath/LICENSE | 21 ++ lib/phpgt/cssxpath/README.md | 53 ++++ lib/phpgt/cssxpath/src/CssXPathException.php | 6 + .../cssxpath/src/NotYetImplementedException.php | 4 + lib/phpgt/cssxpath/src/Translator.php | 323 +++++++++++++++++++++ 5 files changed, 407 insertions(+) create mode 100644 lib/phpgt/cssxpath/LICENSE create mode 100644 lib/phpgt/cssxpath/README.md create mode 100644 lib/phpgt/cssxpath/src/CssXPathException.php create mode 100644 lib/phpgt/cssxpath/src/NotYetImplementedException.php create mode 100644 lib/phpgt/cssxpath/src/Translator.php (limited to 'lib/phpgt') diff --git a/lib/phpgt/cssxpath/LICENSE b/lib/phpgt/cssxpath/LICENSE new file mode 100644 index 000000000..12b514c97 --- /dev/null +++ b/lib/phpgt/cssxpath/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright © PHP.Gt contributors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lib/phpgt/cssxpath/README.md b/lib/phpgt/cssxpath/README.md new file mode 100644 index 000000000..9082c8bbf --- /dev/null +++ b/lib/phpgt/cssxpath/README.md @@ -0,0 +1,53 @@ +Translate CSS selectors to XPath queries. +========================================= + +A lightweight and dependency free CSS to XPath translator. This repository is used to bring modern DOM functionality like [`querySelectorAll()`][qsa] to PHP in the [PHP.Gt/Dom][gt-dom] project. + +*** + + + Build status + + + Code quality + + + Code coverage + + + Current version + + + PHP.Gt/CssXPath documentation + + +Example usage +------------- + + +```php +use Gt\CssXPath\Translator; + +$html = << + + + + +HTML; + +$document = new DOMDocument(); +$document->loadHTML($html); + +$xpath = new DOMXPath($document); +$inputElementList = $xpath->query(new Translator("form>label>input"); +``` + +[qsa]: https://developer.mozilla.org/en-US/docs/Web/API/Document/querySelectorAll +[gt-dom]: https://www.php.gt/dom diff --git a/lib/phpgt/cssxpath/src/CssXPathException.php b/lib/phpgt/cssxpath/src/CssXPathException.php new file mode 100644 index 000000000..81ad3ac9d --- /dev/null +++ b/lib/phpgt/cssxpath/src/CssXPathException.php @@ -0,0 +1,6 @@ +\*)' + . '|(:(?P[\w-]*))' + . '|\(*(?P["\']*[\w\h-]*["\']*)\)' + . '|(?P[\w-]*)' + . '|(?P\s*>\s*)' + . '|(#(?P[\w-]*))' + . '|(\.(?P[\w-]*))' + . '|(?P\s*\+\s*)' + . "|(\[(?P[\w-]*)((?P[=~$]+)(?P(.+\[\]'?)|[^\]]+))*\])+" + . '|(?P\s+)' + . '/'; + + const EQUALS_EXACT = "="; + const EQUALS_CONTAINS_WORD = "~="; + const EQUALS_ENDS_WITH = "$="; + const EQUALS_CONTAINS = "*="; + const EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED = "|="; + const EQUALS_STARTS_WITH = "^="; + + /** @var string */ + protected $cssSelector; + /** @var string */ + protected $prefix; + + public function __construct(string $cssSelector, string $prefix = ".//") { + $this->cssSelector = $cssSelector; + $this->prefix = $prefix; + } + + public function __toString():string { + return $this->asXPath(); + } + + public function asXPath():string { + return $this->convert($this->cssSelector); + } + + protected function convert(string $css):string { + $cssArray = preg_split( + '/(["\']).*?\1(*SKIP)(*F)|,/', + $css + ); + $xPathArray = []; + + foreach($cssArray as $input) { + $output = $this->convertSingleSelector(trim($input)); + $xPathArray []= $output; + } + + return implode(" | ", $xPathArray); + } + + protected function convertSingleSelector(string $css):string { + $thread = $this->preg_match_collated(self::cssRegex, $css); + $thread = array_values($thread); + + $xpath = [$this->prefix]; + $prevType = ""; + foreach($thread as $threadKey => $currentThreadItem) { + $next = isset($thread[$threadKey + 1]) + ? $thread[$threadKey + 1] + : false; + + switch ($currentThreadItem["type"]) { + case "star": + case "element": + $xpath []= $currentThreadItem['content']; + break; + + case "pseudo": + $specifier = ""; + if ($next && $next["type"] == "pseudospecifier") { + $specifier = "{$next['content']}"; + } + + switch ($currentThreadItem["content"]) { + case "disabled": + case "checked": + case "selected": + array_push( + $xpath, + "[@{$currentThreadItem['content']}]" + ); + break; + + case "text": + array_push( + $xpath, + '[@type="text"]' + ); + break; + + case "contains": + if(empty($specifier)) { + continue 3; + } + + array_push( + $xpath, + "[contains(text(),$specifier)]" + ); + break; + + case "first-child": + $prev = count($xpath) - 1; + $xpath[$prev] = '*[1]/self::' . $xpath[$prev]; + break; + + case "nth-child": + if (empty($specifier)) { + continue 3; + } + + $prev = count($xpath) - 1; + $previous = $xpath[$prev]; + + if (substr($previous, -1, 1) === "]") { + $xpath[$prev] = str_replace( + "]", + " and position() = $specifier]", + $xpath[$prev] + ); + } + else { + array_push( + $xpath, + "[$specifier]" + ); + } + break; + case "nth-of-type": + if (empty($specifier)) { + continue 3; + } + + $prev = count($xpath) - 1; + $previous = $xpath[$prev]; + + if(substr($previous, -1, 1) === "]") { + array_push( + $xpath, + "[$specifier]" + ); + } + else { + array_push( + $xpath, + "[$specifier]" + ); + } + break; + } + break; + + case "child": + array_push($xpath, "/"); + break; + + case "id": + array_push( + $xpath, + ($prevType != "element" ? '*' : '') + . "[@id='{$currentThreadItem['content']}']" + ); + break; + + case "class": + // https://devhints.io/xpath#class-check + array_push( + $xpath, + (($prevType != "element" && $prevType != "class") ? '*' : '') + . "[contains(concat(' ',normalize-space(@class),' '),' {$currentThreadItem['content']} ')]" + ); + break; + + case "sibling": + array_push( + $xpath, + "/following-sibling::*[1]/self::" + ); + break; + + case "attribute": + if(!$prevType) { + array_push($xpath, "*"); + } + + /** @var null|array> $detail */ + $detail = $currentThreadItem["detail"] ?? null; + $detailType = $detail[0] ?? null; + $detailValue = $detail[1] ?? null; + + if(!$detailType + || $detailType["type"] !== "attribute_equals") { + array_push( + $xpath, + "[@{$currentThreadItem['content']}]" + ); + continue 2; + } + + $valueString = trim( + $detailValue["content"], + " '\"" + ); + + $equalsType = $detailType["content"]; + switch ($equalsType) { + case self::EQUALS_EXACT: + array_push( + $xpath, + "[@{$currentThreadItem['content']}=\"{$valueString}\"]" + ); + break; + + case self::EQUALS_CONTAINS: + throw new NotYetImplementedException(); + + case self::EQUALS_CONTAINS_WORD: + array_push( + $xpath, + "[" + . "contains(" + . "concat(\" \",@{$currentThreadItem['content']},\" \")," + . "concat(\" \",\"{$valueString}\",\" \")" + . ")" + . "]" + ); + break; + + case self::EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED: + throw new NotYetImplementedException(); + + case self::EQUALS_STARTS_WITH: + throw new NotYetImplementedException(); + + case self::EQUALS_ENDS_WITH: + array_push( + $xpath, + "[" + . "substring(" + . "@{$currentThreadItem['content']}," + . "string-length(@{$currentThreadItem['content']}) - " + . "string-length(\"{$valueString}\") + 1)" + . "=\"{$valueString}\"" + . "]" + ); + break; + } + break; + + case "descendant": + array_push($xpath, "//"); + break; + } + + $prevType = $currentThreadItem["type"]; + } + + return implode("", $xpath); + } + + /** @return array> */ + protected function preg_match_collated( + string $regex, + string $string, + callable $transform = null + ):array { + preg_match_all( + $regex, + $string, + $matches, + PREG_PATTERN_ORDER + ); + + $set = []; + foreach($matches[0] as $k => $v) { + if(!empty($v)) { + $set[$k] = null; + } + } + + foreach($matches as $k => $m) { + if(is_numeric($k)) { + continue; + } + + foreach($m as $i => $match) { + if($match === "") { + continue; + } + + $toSet = null; + + if($transform) { + $toSet = $transform($k, $match); + } + else { + $toSet = ["type" => $k, "content" => $match]; + } + + if(!isset($set[$i])) { + $set[$i] = $toSet; + } + else { + if(!isset($set[$i]["detail"])) { + $set[$i]["detail"] = []; + } + + array_push($set[$i]["detail"], $toSet); + } + } + } + + return $set; + } +} -- cgit v1.2.3