745 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			PHP
		
	
	
			
		
		
	
	
			745 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			PHP
		
	
	
| <?php
 | |
| 
 | |
| /*
 | |
|  * This file is part of the Symfony package.
 | |
|  *
 | |
|  * (c) Fabien Potencier <fabien@symfony.com>
 | |
|  *
 | |
|  * For the full copyright and license information, please view the LICENSE
 | |
|  * file that was distributed with this source code.
 | |
|  */
 | |
| 
 | |
| namespace Symfony\Polyfill\Iconv;
 | |
| 
 | |
| /**
 | |
|  * iconv implementation in pure PHP, UTF-8 centric.
 | |
|  *
 | |
|  * Implemented:
 | |
|  * - iconv              - Convert string to requested character encoding
 | |
|  * - iconv_mime_decode  - Decodes a MIME header field
 | |
|  * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once
 | |
|  * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension
 | |
|  * - iconv_set_encoding - Set current setting for character encoding conversion
 | |
|  * - iconv_mime_encode  - Composes a MIME header field
 | |
|  * - iconv_strlen       - Returns the character count of string
 | |
|  * - iconv_strpos       - Finds position of first occurrence of a needle within a haystack
 | |
|  * - iconv_strrpos      - Finds the last occurrence of a needle within a haystack
 | |
|  * - iconv_substr       - Cut out part of a string
 | |
|  *
 | |
|  * Charsets available for conversion are defined by files
 | |
|  * in the charset/ directory and by Iconv::$alias below.
 | |
|  * You're welcome to send back any addition you make.
 | |
|  *
 | |
|  * @author Nicolas Grekas <p@tchwork.com>
 | |
|  *
 | |
|  * @internal
 | |
|  */
 | |
| final class Iconv
 | |
| {
 | |
|     public const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string';
 | |
|     public const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed';
 | |
| 
 | |
|     public static $inputEncoding = 'utf-8';
 | |
|     public static $outputEncoding = 'utf-8';
 | |
|     public static $internalEncoding = 'utf-8';
 | |
| 
 | |
|     private static $alias = [
 | |
|         'utf8' => 'utf-8',
 | |
|         'ascii' => 'us-ascii',
 | |
|         'tis-620' => 'iso-8859-11',
 | |
|         'cp1250' => 'windows-1250',
 | |
|         'cp1251' => 'windows-1251',
 | |
|         'cp1252' => 'windows-1252',
 | |
|         'cp1253' => 'windows-1253',
 | |
|         'cp1254' => 'windows-1254',
 | |
|         'cp1255' => 'windows-1255',
 | |
|         'cp1256' => 'windows-1256',
 | |
|         'cp1257' => 'windows-1257',
 | |
|         'cp1258' => 'windows-1258',
 | |
|         'shift-jis' => 'cp932',
 | |
|         'shift_jis' => 'cp932',
 | |
|         'latin1' => 'iso-8859-1',
 | |
|         'latin2' => 'iso-8859-2',
 | |
|         'latin3' => 'iso-8859-3',
 | |
|         'latin4' => 'iso-8859-4',
 | |
|         'latin5' => 'iso-8859-9',
 | |
|         'latin6' => 'iso-8859-10',
 | |
|         'latin7' => 'iso-8859-13',
 | |
|         'latin8' => 'iso-8859-14',
 | |
|         'latin9' => 'iso-8859-15',
 | |
|         'latin10' => 'iso-8859-16',
 | |
|         'iso8859-1' => 'iso-8859-1',
 | |
|         'iso8859-2' => 'iso-8859-2',
 | |
|         'iso8859-3' => 'iso-8859-3',
 | |
|         'iso8859-4' => 'iso-8859-4',
 | |
|         'iso8859-5' => 'iso-8859-5',
 | |
|         'iso8859-6' => 'iso-8859-6',
 | |
|         'iso8859-7' => 'iso-8859-7',
 | |
|         'iso8859-8' => 'iso-8859-8',
 | |
|         'iso8859-9' => 'iso-8859-9',
 | |
|         'iso8859-10' => 'iso-8859-10',
 | |
|         'iso8859-11' => 'iso-8859-11',
 | |
|         'iso8859-12' => 'iso-8859-12',
 | |
|         'iso8859-13' => 'iso-8859-13',
 | |
|         'iso8859-14' => 'iso-8859-14',
 | |
|         'iso8859-15' => 'iso-8859-15',
 | |
|         'iso8859-16' => 'iso-8859-16',
 | |
|         'iso_8859-1' => 'iso-8859-1',
 | |
|         'iso_8859-2' => 'iso-8859-2',
 | |
|         'iso_8859-3' => 'iso-8859-3',
 | |
|         'iso_8859-4' => 'iso-8859-4',
 | |
|         'iso_8859-5' => 'iso-8859-5',
 | |
|         'iso_8859-6' => 'iso-8859-6',
 | |
|         'iso_8859-7' => 'iso-8859-7',
 | |
|         'iso_8859-8' => 'iso-8859-8',
 | |
|         'iso_8859-9' => 'iso-8859-9',
 | |
|         'iso_8859-10' => 'iso-8859-10',
 | |
|         'iso_8859-11' => 'iso-8859-11',
 | |
|         'iso_8859-12' => 'iso-8859-12',
 | |
|         'iso_8859-13' => 'iso-8859-13',
 | |
|         'iso_8859-14' => 'iso-8859-14',
 | |
|         'iso_8859-15' => 'iso-8859-15',
 | |
|         'iso_8859-16' => 'iso-8859-16',
 | |
|         'iso88591' => 'iso-8859-1',
 | |
|         'iso88592' => 'iso-8859-2',
 | |
|         'iso88593' => 'iso-8859-3',
 | |
|         'iso88594' => 'iso-8859-4',
 | |
|         'iso88595' => 'iso-8859-5',
 | |
|         'iso88596' => 'iso-8859-6',
 | |
|         'iso88597' => 'iso-8859-7',
 | |
|         'iso88598' => 'iso-8859-8',
 | |
|         'iso88599' => 'iso-8859-9',
 | |
|         'iso885910' => 'iso-8859-10',
 | |
|         'iso885911' => 'iso-8859-11',
 | |
|         'iso885912' => 'iso-8859-12',
 | |
|         'iso885913' => 'iso-8859-13',
 | |
|         'iso885914' => 'iso-8859-14',
 | |
|         'iso885915' => 'iso-8859-15',
 | |
|         'iso885916' => 'iso-8859-16',
 | |
|     ];
 | |
|     private static $translitMap = [];
 | |
|     private static $convertMap = [];
 | |
|     private static $errorHandler;
 | |
|     private static $lastError;
 | |
| 
 | |
|     private static $ulenMask = ["\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4];
 | |
|     private static $isValidUtf8;
 | |
| 
 | |
|     public static function iconv($inCharset, $outCharset, $str)
 | |
|     {
 | |
|         $str = (string) $str;
 | |
|         if ('' === $str) {
 | |
|             return '';
 | |
|         }
 | |
| 
 | |
|         // Prepare for //IGNORE and //TRANSLIT
 | |
| 
 | |
|         $translit = $ignore = '';
 | |
| 
 | |
|         $outCharset = strtolower($outCharset);
 | |
|         $inCharset = strtolower($inCharset);
 | |
| 
 | |
|         if ('' === $outCharset) {
 | |
|             $outCharset = 'iso-8859-1';
 | |
|         }
 | |
|         if ('' === $inCharset) {
 | |
|             $inCharset = 'iso-8859-1';
 | |
|         }
 | |
| 
 | |
|         do {
 | |
|             $loop = false;
 | |
| 
 | |
|             if ('//translit' === substr($outCharset, -10)) {
 | |
|                 $loop = $translit = true;
 | |
|                 $outCharset = substr($outCharset, 0, -10);
 | |
|             }
 | |
| 
 | |
|             if ('//ignore' === substr($outCharset, -8)) {
 | |
|                 $loop = $ignore = true;
 | |
|                 $outCharset = substr($outCharset, 0, -8);
 | |
|             }
 | |
|         } while ($loop);
 | |
| 
 | |
|         do {
 | |
|             $loop = false;
 | |
| 
 | |
|             if ('//translit' === substr($inCharset, -10)) {
 | |
|                 $loop = true;
 | |
|                 $inCharset = substr($inCharset, 0, -10);
 | |
|             }
 | |
| 
 | |
|             if ('//ignore' === substr($inCharset, -8)) {
 | |
|                 $loop = true;
 | |
|                 $inCharset = substr($inCharset, 0, -8);
 | |
|             }
 | |
|         } while ($loop);
 | |
| 
 | |
|         if (isset(self::$alias[$inCharset])) {
 | |
|             $inCharset = self::$alias[$inCharset];
 | |
|         }
 | |
|         if (isset(self::$alias[$outCharset])) {
 | |
|             $outCharset = self::$alias[$outCharset];
 | |
|         }
 | |
| 
 | |
|         // Load charset maps
 | |
| 
 | |
|         if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap))
 | |
|           || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) {
 | |
|             trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset));
 | |
| 
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         if ('utf-8' !== $inCharset) {
 | |
|             // Convert input to UTF-8
 | |
|             $result = '';
 | |
|             if (self::mapToUtf8($result, $inMap, $str, $ignore)) {
 | |
|                 $str = $result;
 | |
|             } else {
 | |
|                 $str = false;
 | |
|             }
 | |
|             self::$isValidUtf8 = true;
 | |
|         } else {
 | |
|             self::$isValidUtf8 = preg_match('//u', $str);
 | |
| 
 | |
|             if (!self::$isValidUtf8 && !$ignore) {
 | |
|                 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
 | |
| 
 | |
|                 return false;
 | |
|             }
 | |
| 
 | |
|             if ('utf-8' === $outCharset) {
 | |
|                 // UTF-8 validation
 | |
|                 $str = self::utf8ToUtf8($str, $ignore);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if ('utf-8' !== $outCharset && false !== $str) {
 | |
|             // Convert output to UTF-8
 | |
|             $result = '';
 | |
|             if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) {
 | |
|                 return $result;
 | |
|             }
 | |
| 
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         return $str;
 | |
|     }
 | |
| 
 | |
|     public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null)
 | |
|     {
 | |
|         if (null === $charset) {
 | |
|             $charset = self::$internalEncoding;
 | |
|         }
 | |
| 
 | |
|         if (false !== strpos($str, "\r")) {
 | |
|             $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
 | |
|         }
 | |
|         $str = explode("\n\n", $str, 2);
 | |
| 
 | |
|         $headers = [];
 | |
| 
 | |
|         $str = preg_split('/\n(?![ \t])/', $str[0]);
 | |
|         foreach ($str as $str) {
 | |
|             $str = self::iconv_mime_decode($str, $mode, $charset);
 | |
|             if (false === $str) {
 | |
|                 return false;
 | |
|             }
 | |
|             $str = explode(':', $str, 2);
 | |
| 
 | |
|             if (2 === \count($str)) {
 | |
|                 if (isset($headers[$str[0]])) {
 | |
|                     if (!\is_array($headers[$str[0]])) {
 | |
|                         $headers[$str[0]] = [$headers[$str[0]]];
 | |
|                     }
 | |
|                     $headers[$str[0]][] = ltrim($str[1]);
 | |
|                 } else {
 | |
|                     $headers[$str[0]] = ltrim($str[1]);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return $headers;
 | |
|     }
 | |
| 
 | |
|     public static function iconv_mime_decode($str, $mode = 0, $charset = null)
 | |
|     {
 | |
|         if (null === $charset) {
 | |
|             $charset = self::$internalEncoding;
 | |
|         }
 | |
|         if (\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
 | |
|             $charset .= '//IGNORE';
 | |
|         }
 | |
| 
 | |
|         if (false !== strpos($str, "\r")) {
 | |
|             $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
 | |
|         }
 | |
|         $str = preg_split('/\n(?![ \t])/', rtrim($str), 2);
 | |
|         $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0]));
 | |
|         $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
 | |
| 
 | |
|         $result = self::iconv('utf-8', $charset, $str[0]);
 | |
|         if (false === $result) {
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         $i = 1;
 | |
|         $len = \count($str);
 | |
| 
 | |
|         while ($i < $len) {
 | |
|             $c = strtolower($str[$i]);
 | |
|             if ((\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode)
 | |
|               && 'utf-8' !== $c
 | |
|               && !isset(self::$alias[$c])
 | |
|               && !self::loadMap('from.', $c, $d)) {
 | |
|                 $d = false;
 | |
|             } elseif ('B' === strtoupper($str[$i + 1])) {
 | |
|                 $d = base64_decode($str[$i + 2]);
 | |
|             } else {
 | |
|                 $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% '));
 | |
|             }
 | |
| 
 | |
|             if (false !== $d) {
 | |
|                 if ('' !== $d) {
 | |
|                     if ('' === $d = self::iconv($c, $charset, $d)) {
 | |
|                         $str[$i + 3] = substr($str[$i + 3], 1);
 | |
|                     } else {
 | |
|                         $result .= $d;
 | |
|                     }
 | |
|                 }
 | |
|                 $d = self::iconv('utf-8', $charset, $str[$i + 3]);
 | |
|                 if ('' !== trim($d)) {
 | |
|                     $result .= $d;
 | |
|                 }
 | |
|             } elseif (\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
 | |
|                 $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}";
 | |
|             } else {
 | |
|                 $result = false;
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             $i += 4;
 | |
|         }
 | |
| 
 | |
|         return $result;
 | |
|     }
 | |
| 
 | |
|     public static function iconv_get_encoding($type = 'all')
 | |
|     {
 | |
|         switch ($type) {
 | |
|             case 'input_encoding': return self::$inputEncoding;
 | |
|             case 'output_encoding': return self::$outputEncoding;
 | |
|             case 'internal_encoding': return self::$internalEncoding;
 | |
|         }
 | |
| 
 | |
|         return [
 | |
|             'input_encoding' => self::$inputEncoding,
 | |
|             'output_encoding' => self::$outputEncoding,
 | |
|             'internal_encoding' => self::$internalEncoding,
 | |
|         ];
 | |
|     }
 | |
| 
 | |
|     public static function iconv_set_encoding($type, $charset)
 | |
|     {
 | |
|         switch ($type) {
 | |
|             case 'input_encoding': self::$inputEncoding = $charset; break;
 | |
|             case 'output_encoding': self::$outputEncoding = $charset; break;
 | |
|             case 'internal_encoding': self::$internalEncoding = $charset; break;
 | |
|             default: return false;
 | |
|         }
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null)
 | |
|     {
 | |
|         if (!\is_array($pref)) {
 | |
|             $pref = [];
 | |
|         }
 | |
| 
 | |
|         $pref += [
 | |
|             'scheme' => 'B',
 | |
|             'input-charset' => self::$internalEncoding,
 | |
|             'output-charset' => self::$internalEncoding,
 | |
|             'line-length' => 76,
 | |
|             'line-break-chars' => "\r\n",
 | |
|         ];
 | |
| 
 | |
|         if (preg_match('/[\x80-\xFF]/', $fieldName)) {
 | |
|             $fieldName = '';
 | |
|         }
 | |
| 
 | |
|         $scheme = strtoupper(substr($pref['scheme'], 0, 1));
 | |
|         $in = strtolower($pref['input-charset']);
 | |
|         $out = strtolower($pref['output-charset']);
 | |
| 
 | |
|         if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) {
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         preg_match_all('/./us', $fieldValue, $chars);
 | |
| 
 | |
|         $chars = $chars[0] ?? [];
 | |
| 
 | |
|         $lineBreak = (int) $pref['line-length'];
 | |
|         $lineStart = "=?{$pref['output-charset']}?{$scheme}?";
 | |
|         $lineLength = \strlen($fieldName) + 2 + \strlen($lineStart) + 2;
 | |
|         $lineOffset = \strlen($lineStart) + 3;
 | |
|         $lineData = '';
 | |
| 
 | |
|         $fieldValue = [];
 | |
| 
 | |
|         $Q = 'Q' === $scheme;
 | |
| 
 | |
|         foreach ($chars as $c) {
 | |
|             if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) {
 | |
|                 return false;
 | |
|             }
 | |
| 
 | |
|             $o = $Q
 | |
|                 ? $c = preg_replace_callback(
 | |
|                     '/[=_\?\x00-\x1F\x80-\xFF]/',
 | |
|                     [__CLASS__, 'qpByteCallback'],
 | |
|                     $c
 | |
|                 )
 | |
|                 : base64_encode($lineData.$c);
 | |
| 
 | |
|             if (isset($o[$lineBreak - $lineLength])) {
 | |
|                 if (!$Q) {
 | |
|                     $lineData = base64_encode($lineData);
 | |
|                 }
 | |
|                 $fieldValue[] = $lineStart.$lineData.'?=';
 | |
|                 $lineLength = $lineOffset;
 | |
|                 $lineData = '';
 | |
|             }
 | |
| 
 | |
|             $lineData .= $c;
 | |
|             $Q && $lineLength += \strlen($c);
 | |
|         }
 | |
| 
 | |
|         if ('' !== $lineData) {
 | |
|             if (!$Q) {
 | |
|                 $lineData = base64_encode($lineData);
 | |
|             }
 | |
|             $fieldValue[] = $lineStart.$lineData.'?=';
 | |
|         }
 | |
| 
 | |
|         return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue);
 | |
|     }
 | |
| 
 | |
|     public static function iconv_strlen($s, $encoding = null)
 | |
|     {
 | |
|         static $hasXml = null;
 | |
|         if (null === $hasXml) {
 | |
|             $hasXml = \extension_loaded('xml');
 | |
|         }
 | |
| 
 | |
|         if ($hasXml) {
 | |
|             return self::strlen1($s, $encoding);
 | |
|         }
 | |
| 
 | |
|         return self::strlen2($s, $encoding);
 | |
|     }
 | |
| 
 | |
|     public static function strlen1($s, $encoding = null)
 | |
|     {
 | |
|         if (null === $encoding) {
 | |
|             $encoding = self::$internalEncoding;
 | |
|         }
 | |
|         if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         return \strlen(utf8_decode($s));
 | |
|     }
 | |
| 
 | |
|     public static function strlen2($s, $encoding = null)
 | |
|     {
 | |
|         if (null === $encoding) {
 | |
|             $encoding = self::$internalEncoding;
 | |
|         }
 | |
|         if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         $ulenMask = self::$ulenMask;
 | |
| 
 | |
|         $i = 0;
 | |
|         $j = 0;
 | |
|         $len = \strlen($s);
 | |
| 
 | |
|         while ($i < $len) {
 | |
|             $u = $s[$i] & "\xF0";
 | |
|             $i += $ulenMask[$u] ?? 1;
 | |
|             ++$j;
 | |
|         }
 | |
| 
 | |
|         return $j;
 | |
|     }
 | |
| 
 | |
|     public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null)
 | |
|     {
 | |
|         if (null === $encoding) {
 | |
|             $encoding = self::$internalEncoding;
 | |
|         }
 | |
| 
 | |
|         if (0 !== stripos($encoding, 'utf-8')) {
 | |
|             if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
 | |
|                 return false;
 | |
|             }
 | |
|             if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if ($offset = (int) $offset) {
 | |
|             $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8');
 | |
|         }
 | |
|         $pos = strpos($haystack, $needle);
 | |
| 
 | |
|         return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0));
 | |
|     }
 | |
| 
 | |
|     public static function iconv_strrpos($haystack, $needle, $encoding = null)
 | |
|     {
 | |
|         if (null === $encoding) {
 | |
|             $encoding = self::$internalEncoding;
 | |
|         }
 | |
| 
 | |
|         if (0 !== stripos($encoding, 'utf-8')) {
 | |
|             if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
 | |
|                 return false;
 | |
|             }
 | |
|             if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false;
 | |
| 
 | |
|         return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8');
 | |
|     }
 | |
| 
 | |
|     public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null)
 | |
|     {
 | |
|         if (null === $encoding) {
 | |
|             $encoding = self::$internalEncoding;
 | |
|         }
 | |
|         if (0 !== stripos($encoding, 'utf-8')) {
 | |
|             $encoding = null;
 | |
|         } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) {
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         $s = (string) $s;
 | |
|         $slen = self::iconv_strlen($s, 'utf-8');
 | |
|         $start = (int) $start;
 | |
| 
 | |
|         if (0 > $start) {
 | |
|             $start += $slen;
 | |
|         }
 | |
|         if (0 > $start) {
 | |
|             if (\PHP_VERSION_ID < 80000) {
 | |
|                 return false;
 | |
|             }
 | |
| 
 | |
|             $start = 0;
 | |
|         }
 | |
|         if ($start >= $slen) {
 | |
|             return \PHP_VERSION_ID >= 80000 ? '' : false;
 | |
|         }
 | |
| 
 | |
|         $rx = $slen - $start;
 | |
| 
 | |
|         if (0 > $length) {
 | |
|             $length += $rx;
 | |
|         }
 | |
|         if (0 === $length) {
 | |
|             return '';
 | |
|         }
 | |
|         if (0 > $length) {
 | |
|             return \PHP_VERSION_ID >= 80000 ? '' : false;
 | |
|         }
 | |
| 
 | |
|         if ($length > $rx) {
 | |
|             $length = $rx;
 | |
|         }
 | |
| 
 | |
|         $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u';
 | |
| 
 | |
|         $s = preg_match($rx, $s, $s) ? $s[1] : '';
 | |
| 
 | |
|         if (null === $encoding) {
 | |
|             return $s;
 | |
|         }
 | |
| 
 | |
|         return self::iconv('utf-8', $encoding, $s);
 | |
|     }
 | |
| 
 | |
|     private static function loadMap($type, $charset, &$map)
 | |
|     {
 | |
|         if (!isset(self::$convertMap[$type.$charset])) {
 | |
|             if (false === $map = self::getData($type.$charset)) {
 | |
|                 if ('to.' === $type && self::loadMap('from.', $charset, $map)) {
 | |
|                     $map = array_flip($map);
 | |
|                 } else {
 | |
|                     return false;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             self::$convertMap[$type.$charset] = $map;
 | |
|         } else {
 | |
|             $map = self::$convertMap[$type.$charset];
 | |
|         }
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     private static function utf8ToUtf8($str, $ignore)
 | |
|     {
 | |
|         $ulenMask = self::$ulenMask;
 | |
|         $valid = self::$isValidUtf8;
 | |
| 
 | |
|         $u = $str;
 | |
|         $i = $j = 0;
 | |
|         $len = \strlen($str);
 | |
| 
 | |
|         while ($i < $len) {
 | |
|             if ($str[$i] < "\x80") {
 | |
|                 $u[$j++] = $str[$i++];
 | |
|             } else {
 | |
|                 $ulen = $str[$i] & "\xF0";
 | |
|                 $ulen = $ulenMask[$ulen] ?? 1;
 | |
|                 $uchr = substr($str, $i, $ulen);
 | |
| 
 | |
|                 if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) {
 | |
|                     if ($ignore) {
 | |
|                         ++$i;
 | |
|                         continue;
 | |
|                     }
 | |
| 
 | |
|                     trigger_error(self::ERROR_ILLEGAL_CHARACTER);
 | |
| 
 | |
|                     return false;
 | |
|                 }
 | |
| 
 | |
|                 $i += $ulen;
 | |
| 
 | |
|                 $u[$j++] = $uchr[0];
 | |
| 
 | |
|                 isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1])
 | |
|                     && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2])
 | |
|                     && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return substr($u, 0, $j);
 | |
|     }
 | |
| 
 | |
|     private static function mapToUtf8(&$result, array $map, $str, $ignore)
 | |
|     {
 | |
|         $len = \strlen($str);
 | |
|         for ($i = 0; $i < $len; ++$i) {
 | |
|             if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) {
 | |
|                 $result .= $map[$str[$i].$str[++$i]];
 | |
|             } elseif (isset($map[$str[$i]])) {
 | |
|                 $result .= $map[$str[$i]];
 | |
|             } elseif (!$ignore) {
 | |
|                 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
 | |
| 
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     private static function mapFromUtf8(&$result, array $map, $str, $ignore, $translit)
 | |
|     {
 | |
|         $ulenMask = self::$ulenMask;
 | |
|         $valid = self::$isValidUtf8;
 | |
| 
 | |
|         if ($translit && !self::$translitMap) {
 | |
|             self::$translitMap = self::getData('translit');
 | |
|         }
 | |
| 
 | |
|         $i = 0;
 | |
|         $len = \strlen($str);
 | |
| 
 | |
|         while ($i < $len) {
 | |
|             if ($str[$i] < "\x80") {
 | |
|                 $uchr = $str[$i++];
 | |
|             } else {
 | |
|                 $ulen = $str[$i] & "\xF0";
 | |
|                 $ulen = $ulenMask[$ulen] ?? 1;
 | |
|                 $uchr = substr($str, $i, $ulen);
 | |
| 
 | |
|                 if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) {
 | |
|                     ++$i;
 | |
|                     continue;
 | |
|                 }
 | |
| 
 | |
|                 $i += $ulen;
 | |
|             }
 | |
| 
 | |
|             if (isset($map[$uchr])) {
 | |
|                 $result .= $map[$uchr];
 | |
|             } elseif ($translit) {
 | |
|                 if (isset(self::$translitMap[$uchr])) {
 | |
|                     $uchr = self::$translitMap[$uchr];
 | |
|                 } elseif ($uchr >= "\xC3\x80") {
 | |
|                     $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD);
 | |
| 
 | |
|                     if ($uchr[0] < "\x80") {
 | |
|                         $uchr = $uchr[0];
 | |
|                     } elseif ($ignore) {
 | |
|                         continue;
 | |
|                     } else {
 | |
|                         return false;
 | |
|                     }
 | |
|                 } elseif ($ignore) {
 | |
|                     continue;
 | |
|                 } else {
 | |
|                     return false;
 | |
|                 }
 | |
| 
 | |
|                 $str = $uchr.substr($str, $i);
 | |
|                 $len = \strlen($str);
 | |
|                 $i = 0;
 | |
|             } elseif (!$ignore) {
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     private static function qpByteCallback(array $m)
 | |
|     {
 | |
|         return '='.strtoupper(dechex(\ord($m[0])));
 | |
|     }
 | |
| 
 | |
|     private static function pregOffset($offset)
 | |
|     {
 | |
|         $rx = [];
 | |
|         $offset = (int) $offset;
 | |
| 
 | |
|         while ($offset > 65535) {
 | |
|             $rx[] = '.{65535}';
 | |
|             $offset -= 65535;
 | |
|         }
 | |
| 
 | |
|         return implode('', $rx).'.{'.$offset.'}';
 | |
|     }
 | |
| 
 | |
|     private static function getData($file)
 | |
|     {
 | |
|         if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) {
 | |
|             return require $file;
 | |
|         }
 | |
| 
 | |
|         return false;
 | |
|     }
 | |
| }
 |