Parser.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\CssSelector\Parser;
  11. use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
  12. use Symfony\Component\CssSelector\Node;
  13. use Symfony\Component\CssSelector\Parser\Tokenizer\Tokenizer;
  14. /**
  15. * CSS selector parser.
  16. *
  17. * This component is a port of the Python cssselect library,
  18. * which is copyright Ian Bicking, @see https://github.com/scrapy/cssselect.
  19. *
  20. * @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com>
  21. *
  22. * @internal
  23. */
  24. class Parser implements ParserInterface
  25. {
  26. private Tokenizer $tokenizer;
  27. public function __construct(?Tokenizer $tokenizer = null)
  28. {
  29. $this->tokenizer = $tokenizer ?? new Tokenizer();
  30. }
  31. public function parse(string $source): array
  32. {
  33. $reader = new Reader($source);
  34. $stream = $this->tokenizer->tokenize($reader);
  35. return $this->parseSelectorList($stream);
  36. }
  37. /**
  38. * Parses the arguments for ":nth-child()" and friends.
  39. *
  40. * @param Token[] $tokens
  41. *
  42. * @throws SyntaxErrorException
  43. */
  44. public static function parseSeries(array $tokens): array
  45. {
  46. foreach ($tokens as $token) {
  47. if ($token->isString()) {
  48. throw SyntaxErrorException::stringAsFunctionArgument();
  49. }
  50. }
  51. $joined = trim(implode('', array_map(fn (Token $token) => $token->getValue(), $tokens)));
  52. $int = function ($string) {
  53. if (!is_numeric($string)) {
  54. throw SyntaxErrorException::stringAsFunctionArgument();
  55. }
  56. return (int) $string;
  57. };
  58. switch (true) {
  59. case 'odd' === $joined:
  60. return [2, 1];
  61. case 'even' === $joined:
  62. return [2, 0];
  63. case 'n' === $joined:
  64. return [1, 0];
  65. case !str_contains($joined, 'n'):
  66. return [0, $int($joined)];
  67. }
  68. $split = explode('n', $joined);
  69. $first = $split[0] ?? null;
  70. return [
  71. $first ? ('-' === $first || '+' === $first ? $int($first.'1') : $int($first)) : 1,
  72. isset($split[1]) && $split[1] ? $int($split[1]) : 0,
  73. ];
  74. }
  75. private function parseSelectorList(TokenStream $stream, bool $isArgument = false): array
  76. {
  77. $stream->skipWhitespace();
  78. $selectors = [];
  79. while (true) {
  80. if ($isArgument && $stream->getPeek()->isDelimiter([')'])) {
  81. break;
  82. }
  83. $selectors[] = $this->parserSelectorNode($stream, $isArgument);
  84. if ($stream->getPeek()->isDelimiter([','])) {
  85. $stream->getNext();
  86. $stream->skipWhitespace();
  87. } else {
  88. break;
  89. }
  90. }
  91. return $selectors;
  92. }
  93. private function parserSelectorNode(TokenStream $stream, bool $isArgument = false): Node\SelectorNode
  94. {
  95. [$result, $pseudoElement] = $this->parseSimpleSelector($stream, false, $isArgument);
  96. while (true) {
  97. $stream->skipWhitespace();
  98. $peek = $stream->getPeek();
  99. if (
  100. $peek->isFileEnd()
  101. || $peek->isDelimiter([','])
  102. || ($isArgument && $peek->isDelimiter([')']))
  103. ) {
  104. break;
  105. }
  106. if (null !== $pseudoElement) {
  107. throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
  108. }
  109. if ($peek->isDelimiter(['+', '>', '~'])) {
  110. $combinator = $stream->getNext()->getValue();
  111. $stream->skipWhitespace();
  112. } else {
  113. $combinator = ' ';
  114. }
  115. [$nextSelector, $pseudoElement] = $this->parseSimpleSelector($stream, false, $isArgument);
  116. $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
  117. }
  118. return new Node\SelectorNode($result, $pseudoElement);
  119. }
  120. /**
  121. * Parses next simple node (hash, class, pseudo, negation).
  122. *
  123. * @throws SyntaxErrorException
  124. */
  125. private function parseSimpleSelector(TokenStream $stream, bool $insideNegation = false, bool $isArgument = false): array
  126. {
  127. $stream->skipWhitespace();
  128. $selectorStart = \count($stream->getUsed());
  129. $result = $this->parseElementNode($stream);
  130. $pseudoElement = null;
  131. while (true) {
  132. $peek = $stream->getPeek();
  133. if ($peek->isWhitespace()
  134. || $peek->isFileEnd()
  135. || $peek->isDelimiter([',', '+', '>', '~'])
  136. || ($isArgument && $peek->isDelimiter([')']))
  137. ) {
  138. break;
  139. }
  140. if (null !== $pseudoElement) {
  141. throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
  142. }
  143. if ($peek->isHash()) {
  144. $result = new Node\HashNode($result, $stream->getNext()->getValue());
  145. } elseif ($peek->isDelimiter(['.'])) {
  146. $stream->getNext();
  147. $result = new Node\ClassNode($result, $stream->getNextIdentifier());
  148. } elseif ($peek->isDelimiter(['['])) {
  149. $stream->getNext();
  150. $result = $this->parseAttributeNode($result, $stream);
  151. } elseif ($peek->isDelimiter([':'])) {
  152. $stream->getNext();
  153. if ($stream->getPeek()->isDelimiter([':'])) {
  154. $stream->getNext();
  155. $pseudoElement = $stream->getNextIdentifier();
  156. continue;
  157. }
  158. $identifier = $stream->getNextIdentifier();
  159. if (\in_array(strtolower($identifier), ['first-line', 'first-letter', 'before', 'after'])) {
  160. // Special case: CSS 2.1 pseudo-elements can have a single ':'.
  161. // Any new pseudo-element must have two.
  162. $pseudoElement = $identifier;
  163. continue;
  164. }
  165. if (!$stream->getPeek()->isDelimiter(['('])) {
  166. $result = new Node\PseudoNode($result, $identifier);
  167. if ('Pseudo[Element[*]:scope]' === $result->__toString()) {
  168. $used = \count($stream->getUsed());
  169. if (!(2 === $used
  170. || 3 === $used && $stream->getUsed()[0]->isWhiteSpace()
  171. || $used >= 3 && $stream->getUsed()[$used - 3]->isDelimiter([','])
  172. || $used >= 4
  173. && $stream->getUsed()[$used - 3]->isWhiteSpace()
  174. && $stream->getUsed()[$used - 4]->isDelimiter([','])
  175. )) {
  176. throw SyntaxErrorException::notAtTheStartOfASelector('scope');
  177. }
  178. }
  179. continue;
  180. }
  181. $stream->getNext();
  182. $stream->skipWhitespace();
  183. if ('not' === strtolower($identifier)) {
  184. if ($insideNegation) {
  185. throw SyntaxErrorException::nestedNot();
  186. }
  187. [$argument, $argumentPseudoElement] = $this->parseSimpleSelector($stream, true, true);
  188. $next = $stream->getNext();
  189. if (null !== $argumentPseudoElement) {
  190. throw SyntaxErrorException::pseudoElementFound($argumentPseudoElement, 'inside ::not()');
  191. }
  192. if (!$next->isDelimiter([')'])) {
  193. throw SyntaxErrorException::unexpectedToken('")"', $next);
  194. }
  195. $result = new Node\NegationNode($result, $argument);
  196. } elseif ('is' === strtolower($identifier)) {
  197. $selectors = $this->parseSelectorList($stream, true);
  198. $next = $stream->getNext();
  199. if (!$next->isDelimiter([')'])) {
  200. throw SyntaxErrorException::unexpectedToken('")"', $next);
  201. }
  202. $result = new Node\MatchingNode($result, $selectors);
  203. } elseif ('where' === strtolower($identifier)) {
  204. $selectors = $this->parseSelectorList($stream, true);
  205. $next = $stream->getNext();
  206. if (!$next->isDelimiter([')'])) {
  207. throw SyntaxErrorException::unexpectedToken('")"', $next);
  208. }
  209. $result = new Node\SpecificityAdjustmentNode($result, $selectors);
  210. } else {
  211. $arguments = [];
  212. $next = null;
  213. while (true) {
  214. $stream->skipWhitespace();
  215. $next = $stream->getNext();
  216. if ($next->isIdentifier()
  217. || $next->isString()
  218. || $next->isNumber()
  219. || $next->isDelimiter(['+', '-'])
  220. ) {
  221. $arguments[] = $next;
  222. } elseif ($next->isDelimiter([')'])) {
  223. break;
  224. } else {
  225. throw SyntaxErrorException::unexpectedToken('an argument', $next);
  226. }
  227. }
  228. if (!$arguments) {
  229. throw SyntaxErrorException::unexpectedToken('at least one argument', $next);
  230. }
  231. $result = new Node\FunctionNode($result, $identifier, $arguments);
  232. }
  233. } else {
  234. throw SyntaxErrorException::unexpectedToken('selector', $peek);
  235. }
  236. }
  237. if (\count($stream->getUsed()) === $selectorStart) {
  238. throw SyntaxErrorException::unexpectedToken('selector', $stream->getPeek());
  239. }
  240. return [$result, $pseudoElement];
  241. }
  242. private function parseElementNode(TokenStream $stream): Node\ElementNode
  243. {
  244. $peek = $stream->getPeek();
  245. if ($peek->isIdentifier() || $peek->isDelimiter(['*'])) {
  246. if ($peek->isIdentifier()) {
  247. $namespace = $stream->getNext()->getValue();
  248. } else {
  249. $stream->getNext();
  250. $namespace = null;
  251. }
  252. if ($stream->getPeek()->isDelimiter(['|'])) {
  253. $stream->getNext();
  254. $element = $stream->getNextIdentifierOrStar();
  255. } else {
  256. $element = $namespace;
  257. $namespace = null;
  258. }
  259. } else {
  260. $element = $namespace = null;
  261. }
  262. return new Node\ElementNode($namespace, $element);
  263. }
  264. private function parseAttributeNode(Node\NodeInterface $selector, TokenStream $stream): Node\AttributeNode
  265. {
  266. $stream->skipWhitespace();
  267. $attribute = $stream->getNextIdentifierOrStar();
  268. if (null === $attribute && !$stream->getPeek()->isDelimiter(['|'])) {
  269. throw SyntaxErrorException::unexpectedToken('"|"', $stream->getPeek());
  270. }
  271. if ($stream->getPeek()->isDelimiter(['|'])) {
  272. $stream->getNext();
  273. if ($stream->getPeek()->isDelimiter(['='])) {
  274. $namespace = null;
  275. $stream->getNext();
  276. $operator = '|=';
  277. } else {
  278. $namespace = $attribute;
  279. $attribute = $stream->getNextIdentifier();
  280. $operator = null;
  281. }
  282. } else {
  283. $namespace = $operator = null;
  284. }
  285. if (null === $operator) {
  286. $stream->skipWhitespace();
  287. $next = $stream->getNext();
  288. if ($next->isDelimiter([']'])) {
  289. return new Node\AttributeNode($selector, $namespace, $attribute, 'exists', null);
  290. } elseif ($next->isDelimiter(['='])) {
  291. $operator = '=';
  292. } elseif ($next->isDelimiter(['^', '$', '*', '~', '|', '!'])
  293. && $stream->getPeek()->isDelimiter(['='])
  294. ) {
  295. $operator = $next->getValue().'=';
  296. $stream->getNext();
  297. } else {
  298. throw SyntaxErrorException::unexpectedToken('operator', $next);
  299. }
  300. }
  301. $stream->skipWhitespace();
  302. $value = $stream->getNext();
  303. if ($value->isNumber()) {
  304. // if the value is a number, it's casted into a string
  305. $value = new Token(Token::TYPE_STRING, (string) $value->getValue(), $value->getPosition());
  306. }
  307. if (!($value->isIdentifier() || $value->isString())) {
  308. throw SyntaxErrorException::unexpectedToken('string or identifier', $value);
  309. }
  310. $stream->skipWhitespace();
  311. $next = $stream->getNext();
  312. if (!$next->isDelimiter([']'])) {
  313. throw SyntaxErrorException::unexpectedToken('"]"', $next);
  314. }
  315. return new Node\AttributeNode($selector, $namespace, $attribute, $operator, $value->getValue());
  316. }
  317. }