ByteString.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\String;
  11. use Random\Randomizer;
  12. use Symfony\Component\String\Exception\ExceptionInterface;
  13. use Symfony\Component\String\Exception\InvalidArgumentException;
  14. use Symfony\Component\String\Exception\RuntimeException;
  15. /**
  16. * Represents a binary-safe string of bytes.
  17. *
  18. * @author Nicolas Grekas <p@tchwork.com>
  19. * @author Hugo Hamon <hugohamon@neuf.fr>
  20. *
  21. * @throws ExceptionInterface
  22. */
  23. class ByteString extends AbstractString
  24. {
  25. private const ALPHABET_ALPHANUMERIC = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz';
  26. public function __construct(string $string = '')
  27. {
  28. $this->string = $string;
  29. }
  30. /*
  31. * The following method was derived from code of the Hack Standard Library (v4.40 - 2020-05-03)
  32. *
  33. * https://github.com/hhvm/hsl/blob/80a42c02f036f72a42f0415e80d6b847f4bf62d5/src/random/private.php#L16
  34. *
  35. * Code subject to the MIT license (https://github.com/hhvm/hsl/blob/master/LICENSE).
  36. *
  37. * Copyright (c) 2004-2020, Facebook, Inc. (https://www.facebook.com/)
  38. */
  39. public static function fromRandom(int $length = 16, ?string $alphabet = null): self
  40. {
  41. if ($length <= 0) {
  42. throw new InvalidArgumentException(\sprintf('A strictly positive length is expected, "%d" given.', $length));
  43. }
  44. $alphabet ??= self::ALPHABET_ALPHANUMERIC;
  45. $alphabetSize = \strlen($alphabet);
  46. $bits = (int) ceil(log($alphabetSize, 2.0));
  47. if ($bits <= 0 || $bits > 56) {
  48. throw new InvalidArgumentException('The length of the alphabet must in the [2^1, 2^56] range.');
  49. }
  50. if (\PHP_VERSION_ID >= 80300) {
  51. return new static((new Randomizer())->getBytesFromString($alphabet, $length));
  52. }
  53. $ret = '';
  54. while ($length > 0) {
  55. $urandomLength = (int) ceil(2 * $length * $bits / 8.0);
  56. $data = random_bytes($urandomLength);
  57. $unpackedData = 0;
  58. $unpackedBits = 0;
  59. for ($i = 0; $i < $urandomLength && $length > 0; ++$i) {
  60. // Unpack 8 bits
  61. $unpackedData = ($unpackedData << 8) | \ord($data[$i]);
  62. $unpackedBits += 8;
  63. // While we have enough bits to select a character from the alphabet, keep
  64. // consuming the random data
  65. for (; $unpackedBits >= $bits && $length > 0; $unpackedBits -= $bits) {
  66. $index = ($unpackedData & ((1 << $bits) - 1));
  67. $unpackedData >>= $bits;
  68. // Unfortunately, the alphabet size is not necessarily a power of two.
  69. // Worst case, it is 2^k + 1, which means we need (k+1) bits and we
  70. // have around a 50% chance of missing as k gets larger
  71. if ($index < $alphabetSize) {
  72. $ret .= $alphabet[$index];
  73. --$length;
  74. }
  75. }
  76. }
  77. }
  78. return new static($ret);
  79. }
  80. public function bytesAt(int $offset): array
  81. {
  82. $str = $this->string[$offset] ?? '';
  83. return '' === $str ? [] : [\ord($str)];
  84. }
  85. public function append(string ...$suffix): static
  86. {
  87. $str = clone $this;
  88. $str->string .= 1 >= \count($suffix) ? ($suffix[0] ?? '') : implode('', $suffix);
  89. return $str;
  90. }
  91. public function camel(): static
  92. {
  93. $str = clone $this;
  94. $parts = explode(' ', trim(ucwords(preg_replace('/[^a-zA-Z0-9\x7f-\xff]++/', ' ', $this->string))));
  95. $parts[0] = 1 !== \strlen($parts[0]) && ctype_upper($parts[0]) ? $parts[0] : lcfirst($parts[0]);
  96. $str->string = implode('', $parts);
  97. return $str;
  98. }
  99. public function chunk(int $length = 1): array
  100. {
  101. if (1 > $length) {
  102. throw new InvalidArgumentException('The chunk length must be greater than zero.');
  103. }
  104. if ('' === $this->string) {
  105. return [];
  106. }
  107. $str = clone $this;
  108. $chunks = [];
  109. foreach (str_split($this->string, $length) as $chunk) {
  110. $str->string = $chunk;
  111. $chunks[] = clone $str;
  112. }
  113. return $chunks;
  114. }
  115. public function endsWith(string|iterable|AbstractString $suffix): bool
  116. {
  117. if ($suffix instanceof AbstractString) {
  118. $suffix = $suffix->string;
  119. } elseif (!\is_string($suffix)) {
  120. return parent::endsWith($suffix);
  121. }
  122. return '' !== $suffix && \strlen($this->string) >= \strlen($suffix) && 0 === substr_compare($this->string, $suffix, -\strlen($suffix), null, $this->ignoreCase);
  123. }
  124. public function equalsTo(string|iterable|AbstractString $string): bool
  125. {
  126. if ($string instanceof AbstractString) {
  127. $string = $string->string;
  128. } elseif (!\is_string($string)) {
  129. return parent::equalsTo($string);
  130. }
  131. if ('' !== $string && $this->ignoreCase) {
  132. return 0 === strcasecmp($string, $this->string);
  133. }
  134. return $string === $this->string;
  135. }
  136. public function folded(): static
  137. {
  138. $str = clone $this;
  139. $str->string = strtolower($str->string);
  140. return $str;
  141. }
  142. public function indexOf(string|iterable|AbstractString $needle, int $offset = 0): ?int
  143. {
  144. if ($needle instanceof AbstractString) {
  145. $needle = $needle->string;
  146. } elseif (!\is_string($needle)) {
  147. return parent::indexOf($needle, $offset);
  148. }
  149. if ('' === $needle) {
  150. return null;
  151. }
  152. $i = $this->ignoreCase ? stripos($this->string, $needle, $offset) : strpos($this->string, $needle, $offset);
  153. return false === $i ? null : $i;
  154. }
  155. public function indexOfLast(string|iterable|AbstractString $needle, int $offset = 0): ?int
  156. {
  157. if ($needle instanceof AbstractString) {
  158. $needle = $needle->string;
  159. } elseif (!\is_string($needle)) {
  160. return parent::indexOfLast($needle, $offset);
  161. }
  162. if ('' === $needle) {
  163. return null;
  164. }
  165. $i = $this->ignoreCase ? strripos($this->string, $needle, $offset) : strrpos($this->string, $needle, $offset);
  166. return false === $i ? null : $i;
  167. }
  168. public function isUtf8(): bool
  169. {
  170. return '' === $this->string || preg_match('//u', $this->string);
  171. }
  172. public function join(array $strings, ?string $lastGlue = null): static
  173. {
  174. $str = clone $this;
  175. $tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : '';
  176. $str->string = implode($this->string, $strings).$tail;
  177. return $str;
  178. }
  179. public function length(): int
  180. {
  181. return \strlen($this->string);
  182. }
  183. public function lower(): static
  184. {
  185. $str = clone $this;
  186. $str->string = strtolower($str->string);
  187. return $str;
  188. }
  189. public function match(string $regexp, int $flags = 0, int $offset = 0): array
  190. {
  191. $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
  192. if ($this->ignoreCase) {
  193. $regexp .= 'i';
  194. }
  195. set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m));
  196. try {
  197. if (false === $match($regexp, $this->string, $matches, $flags | \PREG_UNMATCHED_AS_NULL, $offset)) {
  198. throw new RuntimeException('Matching failed with error: '.preg_last_error_msg());
  199. }
  200. } finally {
  201. restore_error_handler();
  202. }
  203. return $matches;
  204. }
  205. public function padBoth(int $length, string $padStr = ' '): static
  206. {
  207. $str = clone $this;
  208. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_BOTH);
  209. return $str;
  210. }
  211. public function padEnd(int $length, string $padStr = ' '): static
  212. {
  213. $str = clone $this;
  214. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_RIGHT);
  215. return $str;
  216. }
  217. public function padStart(int $length, string $padStr = ' '): static
  218. {
  219. $str = clone $this;
  220. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_LEFT);
  221. return $str;
  222. }
  223. public function prepend(string ...$prefix): static
  224. {
  225. $str = clone $this;
  226. $str->string = (1 >= \count($prefix) ? ($prefix[0] ?? '') : implode('', $prefix)).$str->string;
  227. return $str;
  228. }
  229. public function replace(string $from, string $to): static
  230. {
  231. $str = clone $this;
  232. if ('' !== $from) {
  233. $str->string = $this->ignoreCase ? str_ireplace($from, $to, $this->string) : str_replace($from, $to, $this->string);
  234. }
  235. return $str;
  236. }
  237. public function replaceMatches(string $fromRegexp, string|callable $to): static
  238. {
  239. if ($this->ignoreCase) {
  240. $fromRegexp .= 'i';
  241. }
  242. $replace = \is_array($to) || $to instanceof \Closure ? 'preg_replace_callback' : 'preg_replace';
  243. set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m));
  244. try {
  245. if (null === $string = $replace($fromRegexp, $to, $this->string)) {
  246. $lastError = preg_last_error();
  247. foreach (get_defined_constants(true)['pcre'] as $k => $v) {
  248. if ($lastError === $v && str_ends_with($k, '_ERROR')) {
  249. throw new RuntimeException('Matching failed with '.$k.'.');
  250. }
  251. }
  252. throw new RuntimeException('Matching failed with unknown error code.');
  253. }
  254. } finally {
  255. restore_error_handler();
  256. }
  257. $str = clone $this;
  258. $str->string = $string;
  259. return $str;
  260. }
  261. public function reverse(): static
  262. {
  263. $str = clone $this;
  264. $str->string = strrev($str->string);
  265. return $str;
  266. }
  267. public function slice(int $start = 0, ?int $length = null): static
  268. {
  269. $str = clone $this;
  270. $str->string = substr($this->string, $start, $length ?? \PHP_INT_MAX);
  271. return $str;
  272. }
  273. public function snake(): static
  274. {
  275. $str = $this->camel();
  276. $str->string = strtolower(preg_replace(['/([A-Z]+)([A-Z][a-z])/', '/([a-z\d])([A-Z])/'], '\1_\2', $str->string));
  277. return $str;
  278. }
  279. public function splice(string $replacement, int $start = 0, ?int $length = null): static
  280. {
  281. $str = clone $this;
  282. $str->string = substr_replace($this->string, $replacement, $start, $length ?? \PHP_INT_MAX);
  283. return $str;
  284. }
  285. public function split(string $delimiter, ?int $limit = null, ?int $flags = null): array
  286. {
  287. if (1 > $limit ??= \PHP_INT_MAX) {
  288. throw new InvalidArgumentException('Split limit must be a positive integer.');
  289. }
  290. if ('' === $delimiter) {
  291. throw new InvalidArgumentException('Split delimiter is empty.');
  292. }
  293. if (null !== $flags) {
  294. return parent::split($delimiter, $limit, $flags);
  295. }
  296. $str = clone $this;
  297. $chunks = $this->ignoreCase
  298. ? preg_split('{'.preg_quote($delimiter).'}iD', $this->string, $limit)
  299. : explode($delimiter, $this->string, $limit);
  300. foreach ($chunks as &$chunk) {
  301. $str->string = $chunk;
  302. $chunk = clone $str;
  303. }
  304. return $chunks;
  305. }
  306. public function startsWith(string|iterable|AbstractString $prefix): bool
  307. {
  308. if ($prefix instanceof AbstractString) {
  309. $prefix = $prefix->string;
  310. } elseif (!\is_string($prefix)) {
  311. return parent::startsWith($prefix);
  312. }
  313. return '' !== $prefix && 0 === ($this->ignoreCase ? strncasecmp($this->string, $prefix, \strlen($prefix)) : strncmp($this->string, $prefix, \strlen($prefix)));
  314. }
  315. public function title(bool $allWords = false): static
  316. {
  317. $str = clone $this;
  318. $str->string = $allWords ? ucwords($str->string) : ucfirst($str->string);
  319. return $str;
  320. }
  321. public function toUnicodeString(?string $fromEncoding = null): UnicodeString
  322. {
  323. return new UnicodeString($this->toCodePointString($fromEncoding)->string);
  324. }
  325. public function toCodePointString(?string $fromEncoding = null): CodePointString
  326. {
  327. $u = new CodePointString();
  328. if (\in_array($fromEncoding, [null, 'utf8', 'utf-8', 'UTF8', 'UTF-8'], true) && preg_match('//u', $this->string)) {
  329. $u->string = $this->string;
  330. return $u;
  331. }
  332. set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m));
  333. try {
  334. try {
  335. $validEncoding = false !== mb_detect_encoding($this->string, $fromEncoding ?? 'Windows-1252', true);
  336. } catch (InvalidArgumentException $e) {
  337. if (!\function_exists('iconv')) {
  338. throw $e;
  339. }
  340. $u->string = iconv($fromEncoding ?? 'Windows-1252', 'UTF-8', $this->string);
  341. return $u;
  342. }
  343. } finally {
  344. restore_error_handler();
  345. }
  346. if (!$validEncoding) {
  347. throw new InvalidArgumentException(\sprintf('Invalid "%s" string.', $fromEncoding ?? 'Windows-1252'));
  348. }
  349. $u->string = mb_convert_encoding($this->string, 'UTF-8', $fromEncoding ?? 'Windows-1252');
  350. return $u;
  351. }
  352. public function trim(string $chars = " \t\n\r\0\x0B\x0C"): static
  353. {
  354. $str = clone $this;
  355. $str->string = trim($str->string, $chars);
  356. return $str;
  357. }
  358. public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C"): static
  359. {
  360. $str = clone $this;
  361. $str->string = rtrim($str->string, $chars);
  362. return $str;
  363. }
  364. public function trimStart(string $chars = " \t\n\r\0\x0B\x0C"): static
  365. {
  366. $str = clone $this;
  367. $str->string = ltrim($str->string, $chars);
  368. return $str;
  369. }
  370. public function upper(): static
  371. {
  372. $str = clone $this;
  373. $str->string = strtoupper($str->string);
  374. return $str;
  375. }
  376. public function width(bool $ignoreAnsiDecoration = true): int
  377. {
  378. $string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
  379. return (new CodePointString($string))->width($ignoreAnsiDecoration);
  380. }
  381. }