aboutsummaryrefslogtreecommitdiff
path: root/lib/phpgt
diff options
context:
space:
mode:
Diffstat (limited to 'lib/phpgt')
-rw-r--r--lib/phpgt/cssxpath/LICENSE21
-rw-r--r--lib/phpgt/cssxpath/README.md53
-rw-r--r--lib/phpgt/cssxpath/src/CssXPathException.php6
-rw-r--r--lib/phpgt/cssxpath/src/NotYetImplementedException.php4
-rw-r--r--lib/phpgt/cssxpath/src/Translator.php323
5 files changed, 407 insertions, 0 deletions
diff --git a/lib/phpgt/cssxpath/LICENSE b/lib/phpgt/cssxpath/LICENSE
new file mode 100644
index 000000000..12b514c97
--- /dev/null
+++ b/lib/phpgt/cssxpath/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright © PHP.Gt contributors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/lib/phpgt/cssxpath/README.md b/lib/phpgt/cssxpath/README.md
new file mode 100644
index 000000000..9082c8bbf
--- /dev/null
+++ b/lib/phpgt/cssxpath/README.md
@@ -0,0 +1,53 @@
+Translate CSS selectors to XPath queries.
+=========================================
+
+A lightweight and dependency free CSS to XPath translator. This repository is used to bring modern DOM functionality like [`querySelectorAll()`][qsa] to PHP in the [PHP.Gt/Dom][gt-dom] project.
+
+***
+
+<a href="https://github.com/PhpGt/CssXPath/actions" target="_blank">
+ <img src="https://badge.status.php.gt/cssxpath-build.svg" alt="Build status" />
+</a>
+<a href="https://scrutinizer-ci.com/g/PhpGt/CssXPath" target="_blank">
+ <img src="https://badge.status.php.gt/cssxpath-quality.svg" alt="Code quality" />
+</a>
+<a href="https://scrutinizer-ci.com/g/PhpGt/CssXPath" target="_blank">
+ <img src="https://badge.status.php.gt/cssxpath-coverage.svg" alt="Code coverage" />
+</a>
+<a href="https://packagist.org/packages/PhpGt/CssXPath" target="_blank">
+ <img src="https://badge.status.php.gt/cssxpath-version.svg" alt="Current version" />
+</a>
+<a href="http://www.php.gt/cssxpath" target="_blank">
+ <img src="https://badge.status.php.gt/cssxpath-docs.svg" alt="PHP.Gt/CssXPath documentation" />
+</a>
+
+Example usage
+-------------
+
+
+```php
+use Gt\CssXPath\Translator;
+
+$html = <<<HTML
+<form>
+ <label>
+ Name
+ <input name="name" />
+ </label>
+ <label>
+ Code:
+ <input name="code" />
+ </label>
+ <button name="do" value="submit">Submit code</button>
+</form>
+HTML;
+
+$document = new DOMDocument();
+$document->loadHTML($html);
+
+$xpath = new DOMXPath($document);
+$inputElementList = $xpath->query(new Translator("form>label>input");
+```
+
+[qsa]: https://developer.mozilla.org/en-US/docs/Web/API/Document/querySelectorAll
+[gt-dom]: https://www.php.gt/dom
diff --git a/lib/phpgt/cssxpath/src/CssXPathException.php b/lib/phpgt/cssxpath/src/CssXPathException.php
new file mode 100644
index 000000000..81ad3ac9d
--- /dev/null
+++ b/lib/phpgt/cssxpath/src/CssXPathException.php
@@ -0,0 +1,6 @@
+<?php
+namespace Gt\CssXPath;
+
+use RuntimeException;
+
+class CssXPathException extends RuntimeException {} \ No newline at end of file
diff --git a/lib/phpgt/cssxpath/src/NotYetImplementedException.php b/lib/phpgt/cssxpath/src/NotYetImplementedException.php
new file mode 100644
index 000000000..39d12b3bc
--- /dev/null
+++ b/lib/phpgt/cssxpath/src/NotYetImplementedException.php
@@ -0,0 +1,4 @@
+<?php
+namespace Gt\CssXPath;
+
+class NotYetImplementedException extends CssXPathException {} \ No newline at end of file
diff --git a/lib/phpgt/cssxpath/src/Translator.php b/lib/phpgt/cssxpath/src/Translator.php
new file mode 100644
index 000000000..140909099
--- /dev/null
+++ b/lib/phpgt/cssxpath/src/Translator.php
@@ -0,0 +1,323 @@
+<?php /** @noinspection HtmlDeprecatedTag */
+namespace Gt\CssXPath;
+
+class Translator {
+ const cssRegex =
+ '/'
+ . '(?P<star>\*)'
+ . '|(:(?P<pseudo>[\w-]*))'
+ . '|\(*(?P<pseudospecifier>["\']*[\w\h-]*["\']*)\)'
+ . '|(?P<element>[\w-]*)'
+ . '|(?P<child>\s*>\s*)'
+ . '|(#(?P<id>[\w-]*))'
+ . '|(\.(?P<class>[\w-]*))'
+ . '|(?P<sibling>\s*\+\s*)'
+ . "|(\[(?P<attribute>[\w-]*)((?P<attribute_equals>[=~$]+)(?P<attribute_value>(.+\[\]'?)|[^\]]+))*\])+"
+ . '|(?P<descendant>\s+)'
+ . '/';
+
+ const EQUALS_EXACT = "=";
+ const EQUALS_CONTAINS_WORD = "~=";
+ const EQUALS_ENDS_WITH = "$=";
+ const EQUALS_CONTAINS = "*=";
+ const EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED = "|=";
+ const EQUALS_STARTS_WITH = "^=";
+
+ /** @var string */
+ protected $cssSelector;
+ /** @var string */
+ protected $prefix;
+
+ public function __construct(string $cssSelector, string $prefix = ".//") {
+ $this->cssSelector = $cssSelector;
+ $this->prefix = $prefix;
+ }
+
+ public function __toString():string {
+ return $this->asXPath();
+ }
+
+ public function asXPath():string {
+ return $this->convert($this->cssSelector);
+ }
+
+ protected function convert(string $css):string {
+ $cssArray = preg_split(
+ '/(["\']).*?\1(*SKIP)(*F)|,/',
+ $css
+ );
+ $xPathArray = [];
+
+ foreach($cssArray as $input) {
+ $output = $this->convertSingleSelector(trim($input));
+ $xPathArray []= $output;
+ }
+
+ return implode(" | ", $xPathArray);
+ }
+
+ protected function convertSingleSelector(string $css):string {
+ $thread = $this->preg_match_collated(self::cssRegex, $css);
+ $thread = array_values($thread);
+
+ $xpath = [$this->prefix];
+ $prevType = "";
+ foreach($thread as $threadKey => $currentThreadItem) {
+ $next = isset($thread[$threadKey + 1])
+ ? $thread[$threadKey + 1]
+ : false;
+
+ switch ($currentThreadItem["type"]) {
+ case "star":
+ case "element":
+ $xpath []= $currentThreadItem['content'];
+ break;
+
+ case "pseudo":
+ $specifier = "";
+ if ($next && $next["type"] == "pseudospecifier") {
+ $specifier = "{$next['content']}";
+ }
+
+ switch ($currentThreadItem["content"]) {
+ case "disabled":
+ case "checked":
+ case "selected":
+ array_push(
+ $xpath,
+ "[@{$currentThreadItem['content']}]"
+ );
+ break;
+
+ case "text":
+ array_push(
+ $xpath,
+ '[@type="text"]'
+ );
+ break;
+
+ case "contains":
+ if(empty($specifier)) {
+ continue 3;
+ }
+
+ array_push(
+ $xpath,
+ "[contains(text(),$specifier)]"
+ );
+ break;
+
+ case "first-child":
+ $prev = count($xpath) - 1;
+ $xpath[$prev] = '*[1]/self::' . $xpath[$prev];
+ break;
+
+ case "nth-child":
+ if (empty($specifier)) {
+ continue 3;
+ }
+
+ $prev = count($xpath) - 1;
+ $previous = $xpath[$prev];
+
+ if (substr($previous, -1, 1) === "]") {
+ $xpath[$prev] = str_replace(
+ "]",
+ " and position() = $specifier]",
+ $xpath[$prev]
+ );
+ }
+ else {
+ array_push(
+ $xpath,
+ "[$specifier]"
+ );
+ }
+ break;
+ case "nth-of-type":
+ if (empty($specifier)) {
+ continue 3;
+ }
+
+ $prev = count($xpath) - 1;
+ $previous = $xpath[$prev];
+
+ if(substr($previous, -1, 1) === "]") {
+ array_push(
+ $xpath,
+ "[$specifier]"
+ );
+ }
+ else {
+ array_push(
+ $xpath,
+ "[$specifier]"
+ );
+ }
+ break;
+ }
+ break;
+
+ case "child":
+ array_push($xpath, "/");
+ break;
+
+ case "id":
+ array_push(
+ $xpath,
+ ($prevType != "element" ? '*' : '')
+ . "[@id='{$currentThreadItem['content']}']"
+ );
+ break;
+
+ case "class":
+ // https://devhints.io/xpath#class-check
+ array_push(
+ $xpath,
+ (($prevType != "element" && $prevType != "class") ? '*' : '')
+ . "[contains(concat(' ',normalize-space(@class),' '),' {$currentThreadItem['content']} ')]"
+ );
+ break;
+
+ case "sibling":
+ array_push(
+ $xpath,
+ "/following-sibling::*[1]/self::"
+ );
+ break;
+
+ case "attribute":
+ if(!$prevType) {
+ array_push($xpath, "*");
+ }
+
+ /** @var null|array<int, array<string, string>> $detail */
+ $detail = $currentThreadItem["detail"] ?? null;
+ $detailType = $detail[0] ?? null;
+ $detailValue = $detail[1] ?? null;
+
+ if(!$detailType
+ || $detailType["type"] !== "attribute_equals") {
+ array_push(
+ $xpath,
+ "[@{$currentThreadItem['content']}]"
+ );
+ continue 2;
+ }
+
+ $valueString = trim(
+ $detailValue["content"],
+ " '\""
+ );
+
+ $equalsType = $detailType["content"];
+ switch ($equalsType) {
+ case self::EQUALS_EXACT:
+ array_push(
+ $xpath,
+ "[@{$currentThreadItem['content']}=\"{$valueString}\"]"
+ );
+ break;
+
+ case self::EQUALS_CONTAINS:
+ throw new NotYetImplementedException();
+
+ case self::EQUALS_CONTAINS_WORD:
+ array_push(
+ $xpath,
+ "["
+ . "contains("
+ . "concat(\" \",@{$currentThreadItem['content']},\" \"),"
+ . "concat(\" \",\"{$valueString}\",\" \")"
+ . ")"
+ . "]"
+ );
+ break;
+
+ case self::EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED:
+ throw new NotYetImplementedException();
+
+ case self::EQUALS_STARTS_WITH:
+ throw new NotYetImplementedException();
+
+ case self::EQUALS_ENDS_WITH:
+ array_push(
+ $xpath,
+ "["
+ . "substring("
+ . "@{$currentThreadItem['content']},"
+ . "string-length(@{$currentThreadItem['content']}) - "
+ . "string-length(\"{$valueString}\") + 1)"
+ . "=\"{$valueString}\""
+ . "]"
+ );
+ break;
+ }
+ break;
+
+ case "descendant":
+ array_push($xpath, "//");
+ break;
+ }
+
+ $prevType = $currentThreadItem["type"];
+ }
+
+ return implode("", $xpath);
+ }
+
+ /** @return array<int, array<string, string>> */
+ protected function preg_match_collated(
+ string $regex,
+ string $string,
+ callable $transform = null
+ ):array {
+ preg_match_all(
+ $regex,
+ $string,
+ $matches,
+ PREG_PATTERN_ORDER
+ );
+
+ $set = [];
+ foreach($matches[0] as $k => $v) {
+ if(!empty($v)) {
+ $set[$k] = null;
+ }
+ }
+
+ foreach($matches as $k => $m) {
+ if(is_numeric($k)) {
+ continue;
+ }
+
+ foreach($m as $i => $match) {
+ if($match === "") {
+ continue;
+ }
+
+ $toSet = null;
+
+ if($transform) {
+ $toSet = $transform($k, $match);
+ }
+ else {
+ $toSet = ["type" => $k, "content" => $match];
+ }
+
+ if(!isset($set[$i])) {
+ $set[$i] = $toSet;
+ }
+ else {
+ if(!isset($set[$i]["detail"])) {
+ $set[$i]["detail"] = [];
+ }
+
+ array_push($set[$i]["detail"], $toSet);
+ }
+ }
+ }
+
+ return $set;
+ }
+}