TokenizerEscaping.php 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\CssSelector\Parser\Tokenizer;
  11. /**
  12. * CSS selector tokenizer escaping applier.
  13. *
  14. * This component is a port of the Python cssselect library,
  15. * which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect.
  16. *
  17. * @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com>
  18. *
  19. * @internal
  20. */
  21. class TokenizerEscaping
  22. {
  23. public function __construct(
  24. private TokenizerPatterns $patterns,
  25. ) {
  26. }
  27. public function escapeUnicode(string $value): string
  28. {
  29. $value = $this->replaceUnicodeSequences($value);
  30. return preg_replace($this->patterns->getSimpleEscapePattern(), '$1', $value);
  31. }
  32. public function escapeUnicodeAndNewLine(string $value): string
  33. {
  34. $value = preg_replace($this->patterns->getNewLineEscapePattern(), '', $value);
  35. return $this->escapeUnicode($value);
  36. }
  37. private function replaceUnicodeSequences(string $value): string
  38. {
  39. return preg_replace_callback($this->patterns->getUnicodeEscapePattern(), function ($match) {
  40. $c = hexdec($match[1]);
  41. if (0x80 > $c %= 0x200000) {
  42. return \chr($c);
  43. }
  44. if (0x800 > $c) {
  45. return \chr(0xC0 | $c >> 6).\chr(0x80 | $c & 0x3F);
  46. }
  47. if (0x10000 > $c) {
  48. return \chr(0xE0 | $c >> 12).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
  49. }
  50. return '';
  51. }, $value);
  52. }
  53. }