small file size optimization
authorMartin Kutschker <martin.t.kutschker@blackbox.net>
Thu, 10 Jun 2004 10:33:41 +0000 (10:33 +0000)
committerMartin Kutschker <martin.t.kutschker@blackbox.net>
Thu, 10 Jun 2004 10:33:41 +0000 (10:33 +0000)
git-svn-id: https://svn.typo3.org/TYPO3v4/Core/trunk@366 709f56b5-9817-0410-a4d7-c38de5d9e867

t3lib/class.t3lib_cs.php
t3lib/unidata/Translit.txt [new file with mode: 0644]

index 3cdc145..7289315 100755 (executable)
@@ -1446,32 +1446,15 @@ class t3lib_cs {
                                return mb_strtoupper($str,'utf-8');
                        }
                } elseif ($charset == 'utf-8')  {
-                       return $this->utf8_conv_case($string,$case);
+                       return $this->utf8_char_mapping($string,'case',$case);
                } elseif (isset($this->eucBasedSets[$charset])) {
-                       return $this->euc_conv_case($string,$case,$charset);
-               }
-
-               // treat everything else as single-byte encoding
-               if (!$this->initCaseFolding($charset))  return $string; // do nothing
-               $out = '';
-               $caseConv =& $this->caseFolding[$charset][$case];
-
-               for($i=0; isset($string{$i}); $i++)     {
-                       $c = $string{$i};
-                       $cc = $caseConv[$c];
-                       if ($cc)        {
-                               $out .= $cc;
-                       } else {
-                               $out .= $c;
-                       }
+                       return $this->euc_char_mapping($string,$charset,'case',$case);
+               } else {
+                               // treat everything else as single-byte encoding
+                       return $this->sb_char_mapping($string,'case',$case);
                }
 
-               // is a simple strtr() faster or slower than the code above?
-               // perhaps faster for small single-byte tables but slower for large multi-byte tables?
-               //
-               // return strtr($string,$this->caseFolding[$charset][$case]);
-
-               return $out;
+               return $string;
        }
 
        /**
@@ -1483,20 +1466,65 @@ class t3lib_cs {
         */
        function specCharsToASCII($charset,$string)     {
                if ($charset == 'utf-8')        {
-                       return $this->utf8_toASCII($string);
+                       return $this->utf8_char_mapping($string,'ascii');
                } elseif (isset($this->eucBasedSets[$charset])) {
-                       return $this->euc_toASCII($string,$charset);
+                       return $this->euc_char_mapping($string,$charset,'ascii');
+               } else {
+                               // treat everything else as single-byte encoding
+                       return $this->sb_char_mapping($string,$charset,'ascii');
                }
 
-               // treat everything else as single-byte encoding
-               if (!$this->initToASCII($charset))      return $string; // do nothing
-               $out = '';
-               $ascii =& $this->toASCII[$charset];
+               return $string;
+       }
+
+
+
+
+
+
+
+
+
+
+
+
+       /********************************************
+        *
+        * Internal string operation functions
+        *
+        ********************************************/
+
+       /**
+        * Maps all characters of a string in a single byte charset.
+        *
+        * @param       string          the string
+        * @param       string          the charset
+        * @param       string          mode: 'case' (case folding) or 'ascii' (ASCII transliteration)
+        * @param       string          'case': conversion 'toLower' or 'toUpper'
+        * @return      string          the converted string
+        * @author      Martin Kutschker <martin.t.kutschker@blackbox.net>
+        */
+       function sb_char_mapping($str,$charset,$mode,$opt='')   {
+               switch($mode)   {
+                       case 'case':
+                               if (!$this->initCaseFolding($charset))  return $str;    // do nothing
+                               $map =& $this->caseFolding[$charset][$opt];
+                               break;
 
-               for($i=0; isset($string{$i}); $i++)     {
-                       $c = $string{$i};
-                       if (isset($ascii[$c]))  {
-                               $out .= $ascii[$c];
+                       case 'ascii':
+                               if (!$this->initToASCII($charset))      return $str;    // do nothing
+                               $map =& $this->toASCII[$charset];
+                               break;
+
+                       default:
+                               return $str;
+               }
+
+               $out = '';
+               for($i=0; isset($str{$i}); $i++)        {
+                       $c = $str{$i};
+                       if (isset($map[$c]))    {
+                               $out .= $map[$c];
                        } else {
                                $out .= $c;
                        }
@@ -1514,8 +1542,6 @@ class t3lib_cs {
 
 
 
-
-
        /********************************************
         *
         * Internal UTF-8 string operation functions
@@ -1708,54 +1734,31 @@ class t3lib_cs {
        }
 
        /**
-        * Translates all characters of an UTF-8 string into their respective case values.
-        * Unit-tested by Kasper
+        * Maps all characters of an UTF-8 string.
         *
         * @param       string          UTF-8 string
-        * @param       string          conversion: 'toLower' or 'toUpper'
+        * @param       string          mode: 'case' (case folding) or 'ascii' (ASCII transliteration)
+        * @param       string          'case': conversion 'toLower' or 'toUpper'
         * @return      string          the converted string
         * @author      Martin Kutschker <martin.t.kutschker@blackbox.net>
-        * @see strtolower(), strtoupper(), mb_convert_case()
         */
-       function utf8_conv_case($str,$case)     {
-               if (!$this->initUnicodeData('case'))    return $str;    // do nothing
+       function utf8_char_mapping($str,$mode,$opt='')  {
+               if (!$this->initUnicodeData($mode))     return $str;    // do nothing
 
                $out = '';
-               $caseConv =& $this->caseFolding['utf-8'][$case];
+               switch($mode)   {
+                       case 'case':
+                               $map =& $this->caseFolding['utf-8'][$opt];
+                               break;
 
-               for($i=0; isset($str{$i}); $i++)        {
-                       $c = ord($str{$i});
-                       if (!($c & 0x80))       // single-byte (0xxxxxx)
-                               $mbc = $str{$i};
-                       elseif (($c & 0xC0) == 0xC0)    {       // multi-byte starting byte (11xxxxxx)
-                               for ($bc=0; $c & 0x80; $c = $c << 1) { $bc++; } // calculate number of bytes
-                               $mbc = substr($str,$i,$bc);
-                               $i += $bc-1;
-                       }
+                       case 'ascii':
+                               $map =& $this->toASCII['utf-8'];
+                               break;
 
-                       if (isset($caseConv[$mbc]))     {
-                               $out .= $caseConv[$mbc];
-                       } else {
-                               $out .= $mbc;
-                       }
+                       default:
+                               return $str;
                }
 
-               return $out;
-       }
-
-       /**
-        * Converts chars with accents, umlauts or composed to ASCII equivalents.
-        *
-        * @param       string          Input string to convert
-        * @return      string          The converted string
-        * @author      Martin Kutschker <martin.t.kutschker@blackbox.net>
-        */
-       function utf8_toASCII($str)     {
-               if (!$this->initUnicodeData('ascii'))   return $str;    // do nothing
-
-               $out = '';
-               $toASCII =& $this->toASCII['utf-8'];
-
                for($i=0; isset($str{$i}); $i++)        {
                        $c = ord($str{$i});
                        if (!($c & 0x80))       // single-byte (0xxxxxx)
@@ -1766,8 +1769,8 @@ class t3lib_cs {
                                $i += $bc-1;
                        }
 
-                       if (isset($toASCII[$mbc]))      {
-                               $out .= $toASCII[$mbc];
+                       if (isset($map[$mbc]))  {
+                               $out .= $map[$mbc];
                        } else {
                                $out .= $mbc;
                        }
@@ -1792,6 +1795,7 @@ class t3lib_cs {
 
 
 
+
        /********************************************
         *
         * Internal EUC string operation functions
@@ -1927,66 +1931,37 @@ class t3lib_cs {
        }
 
        /**
-        * Translates all characters of a string in the EUC charset family into their respective case values.
+        * Maps all characters of a string in the EUC charset family.
         *
         * @param       string          EUC multibyte character string
-        * @param       string          conversion: 'toLower' or 'toUpper'
         * @param       string          the charset
+        * @param       string          mode: 'case' (case folding) or 'ascii' (ASCII transliteration)
+        * @param       string          'case': conversion 'toLower' or 'toUpper'
         * @return      string          the converted string
         * @author      Martin Kutschker <martin.t.kutschker@blackbox.net>
-        * @see strtolower(), strtoupper(), mb_convert_case()
         */
-       function euc_conv_case($str,$case,$charset)     {
-               if (!$this->initCaseFolding($charset))  return $str;    // do nothing
-
-               $sjis = ($charset == 'shift_jis');
-               $out = '';
-               $caseConv =& $this->caseFolding[$charset][$case];
-               for($i=0; isset($str{$i}); $i++)        {
-                       $mbc = $str{$i};
-                       $c = ord($mbc);
+       function euc_char_mapping($str,$charset,$mode,$opt='')  {
+               switch($mode)   {
+                       case 'case':
+                               if (!$this->initCaseFolding($charset))  return $str;    // do nothing
+                               $map =& $this->caseFolding[$charset][$opt];
+                               break;
 
-                       if ($sjis)      {
-                               if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0))  {       // a double-byte char
-                                       $mbc = substr($str,$i,2);
-                                       $i++;
-                               }
-                       }
-                       else    {
-                               if ($c >= 0x80) {       // a double-byte char
-                                       $mbc = substr($str,$i,2);
-                                       $i++;
-                               }
-                       }
+                       case 'ascii':
+                               if (!$this->initToASCII($charset))      return $str;    // do nothing
+                               $map =& $this->toASCII[$charset];
+                               break;
 
-                       if (isset($caseConv[$mbc]))     {
-                               $out .= $caseConv[$mbc];
-                       } else {
-                               $out .= $mbc;
-                       }
+                       default:
+                               return $str;
                }
 
-               return $out;
-       }
-
-       /**
-        * Converts chars with accents, umlauts or composed to ASCII equivalents.
-        *
-        * @param       string          Input string to convert
-        * @param       string          The charset
-        * @return      string          The converted string
-        * @author      Martin Kutschker <martin.t.kutschker@blackbox.net>
-        */
-       function euc_toASCII($str,$charset)     {
-               if (!$this->initToASCII($charset))      return $str;    // do nothing
-
                $sjis = ($charset == 'shift_jis');
                $out = '';
-               $toASCII =& $this->toASCII[$charset];
-
                for($i=0; isset($str{$i}); $i++)        {
                        $mbc = $str{$i};
                        $c = ord($mbc);
+
                        if ($sjis)      {
                                if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0))  {       // a double-byte char
                                        $mbc = substr($str,$i,2);
@@ -2000,8 +1975,8 @@ class t3lib_cs {
                                }
                        }
 
-                       if (isset($toASCII[$mbc]))      {
-                               $out .= $toASCII[$mbc];
+                       if (isset($map[$mbc]))  {
+                               $out .= $map[$mbc];
                        } else {
                                $out .= $mbc;
                        }
diff --git a/t3lib/unidata/Translit.txt b/t3lib/unidata/Translit.txt
new file mode 100644 (file)
index 0000000..cce9ccf
--- /dev/null
@@ -0,0 +1,329 @@
+00A5; 0079 0065 006E;  YEN SIGN => yen
+00A6; 007C;            BROKEN BAR => |
+00AB; 003C 003C;       LEFT-POINTING DOUBLE ANGLE QUOTATION MARK => <<
+00A9; 0028 0063 0029;  COPYRIGHT SIGN => (c)
+00AE; 0028 0052 0029;  REGISTERED SIGN => (R)
+00B1; 002B 002F 002D;  PLUS-MINUS SIGN => +/-
+00B5; 0075;            MICRO SIGN => u
+00B7; 002A;            MIDDLE DOT => *
+00BB; 003E 003E;       RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK => <<
+00C4; 0041 0045;       LATIN CAPITAL LETTER A WITH DIAERESIS => AE (German)
+00C5; 0041 0041;       LATIN CAPITAL LETTER A WITH RING ABOVE => AA (Danish)
+00C6; 0041 0045;       LATIN CAPITAL LETTER AE => AE (Danish)
+00D6; 004F 0045;       LATIN CAPITAL LETTER O WITH DIAERESIS => OE (German)
+00D7; 002A;            MULTIPLICATION SIGN => *
+00D8; 004F 0045;       LATIN CAPITAL LETTER O WITH STROKE => OE (Danish)
+00DC; 0055 0045;       LATIN CAPITAL LETTER U WITH DIAERESIS => UE (German)
+00E4; 0061 0065;       LATIN SMALL LETTER A WITH DIAERESIS => ae (German)
+00E5; 0061 0061;       LATIN SMALL LETTER A WITH RING ABOVE => aa (Danish)
+00DF; 0073 0073;       LATIN SMALL LETTER SHARP S => ss (German)
+00E6; 0061 0065;       LATIN SMALL LETTER AE => ae (Danish)
+00F6; 006F 0065;       LATIN SMALL LETTER O WITH DIAERESIS => oe (German)
+00F7; 002F;            DIVISION SIGN => /
+00F8; 006F 0065;       LATIN SMALL LETTER O WITH STROKE => oe (Danish)
+00FC; 0075 0065;       LATIN SMALL LETTER U WITH DIAERESIS => ue (German)
+0131; 0069;            LATIN SMALL LETTER DOTLESS I => i
+0152; 004F 0045;       LATIN CAPITAL LETTER OE => OE
+0153; 006F 0065;       LATIN SMALL LETTER OE => oe
+0192; 0066;            LATIN SMALL LETTER F WITH HOOK => f
+02BC; 0027;            MODIFIER LETTER APOSTROPHE => '
+02CA; 0027;            MODIFIER LETTER ACUTE ACCENT => '
+2010; 002D;            HYPHEN => -
+2013; 002D;            EN DASH => -
+2014; 002D;            EM DASH => -
+2018; 0060;            LEFT SINGLE QUOTATION MARK => `
+2019; 0027;            RIGHT SINGLE QUOTATION MARK >= '
+201C; 0022;            LEFT DOUBLE QUOTATION MARK => "
+201D; 0022;            RIGHT DOUBLE QUOTATION MARK => "
+201E; 0022;            DOUBLE LOW-9 QUOTATION MARK => "
+2022; 002A;            BULLET => *
+2039; 003C;            SINGLE LEFT-POINTING ANGLE QUOTATION MARK => <
+203A; 003E;            SINGLE RIGHT-POINTING ANGLE QUOTATION MARK => >
+2044; 002F;            FRACTION SLASH => /
+20A0; 0045 0055 0052;  EURO-CURRENCY SIGN => EUR
+20AC; 0045 0055 0052;  EURO-CURRENCY SIGN => EUR
+
+
+# Cyrillic (Russionan transliteration after GOST)
+
+0401; ;                        CYRILLIC CAPITAL LETTER IO => JO ?? yo
+0402; ;                        CYRILLIC CAPITAL LETTER DJE => D ???
+0403; ;                        CYRILLIC CAPITAL LETTER GJE ?? GJ GY
+0404; ;                        CYRILLIC CAPITAL LETTER UKRAINIAN IE => IE E ????
+0405; ;                        CYRILLIC CAPITAL LETTER DZE => DZ ???
+0406; 0049;            CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I => I
+0407; 0049;            CYRILLIC CAPITAL LETTER YI => I
+0408; ;                        CYRILLIC CAPITAL LETTER JE ?? J Y
+0409; ;                        CYRILLIC CAPITAL LETTER LJE ?? LJ LY
+040A; ;                        CYRILLIC CAPITAL LETTER NJE ??  NJ NJ
+040B; ;                        CYRILLIC CAPITAL LETTER TSHE ?? TSH
+040C; ;                        CYRILLIC CAPITAL LETTER KJE ?? KJ KY
+
+040E; ;                        CYRILLIC CAPITAL LETTER SHORT U => U (Belorussian)
+040F; ;                        CYRILLIC CAPITAL LETTER DZHE ?? DZH
+
+0490; 0047;            CYRILLIC CAPITAL LETTER GHE WITH UPTURN => G (Belorussian, Ukrainian)
+0491; 0067;            CYRILLIC SMALL LETTER GHE WITH UPTURN => g (Belorussian, Ukrainian)
+
+0451; ;                        CYRILLIC SMALL LETTER IO => jo ?? yo
+0452; ;                        CYRILLIC SMALL LETTER DJE => dj dy ??
+0453; ;                        CYRILLIC SMALL LETTER GJE ?? gj gy
+0454; ;                        CYRILLIC SMALL LETTER UKRAINIAN IE ?? e
+0455; ;                        CYRILLIC SMALL LETTER DZE => dz ??
+0456; 0069;            CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I => i
+0457; 0069;            CYRILLIC SMALL LETTER YI => i
+0458; ;                        CYRILLIC SMALL LETTER JE ?? j y
+0459; ;                        CYRILLIC SMALL LETTER LJE ?? lj ly
+045A; ;                        CYRILLIC SMALL LETTER NJE ?? nj ny
+045B; ;                        CYRILLIC SMALL LETTER TSHE => tsh ??? Serbocroatian
+046C; ;                        CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS => ??
+
+045E; ;                        CYRILLIC SMALL LETTER SHORT U => U (Belorussian)
+045F; ;                        CYRILLIC SMALL LETTER DZHE ???
+
+###########
+
+0410; 0041;            CYRILLIC CAPITAL LETTER A => A
+0411; 0042;            CYRILLIC CAPITAL LETTER BE => B
+0412; 0056;            CYRILLIC CAPITAL LETTER VE => V
+0413; 0047;            CYRILLIC CAPITAL LETTER GHE => G ?? GH
+0414; 0044;            CYRILLIC CAPITAL LETTER DE => D
+0415; 0045;            CYRILLIC CAPITAL LETTER IE => E
+0416; 005A 0048;       CYRILLIC CAPITAL LETTER ZHE => ZH
+0417; 005A;            CYRILLIC CAPITAL LETTER ZE => Z
+0418; 0049;            CYRILLIC CAPITAL LETTER I=> I
+0419; 004A;            CYRILLIC CAPITAL LETTER SHORT I => J
+041A; 004B;            CYRILLIC CAPITAL LETTER KA => K
+041B; 004C;            CYRILLIC CAPITAL LETTER EL => L
+041C; 004D;            CYRILLIC CAPITAL LETTER EM => M
+041D; 004E;            CYRILLIC CAPITAL LETTER EN => N
+041E; 004F;            CYRILLIC CAPITAL LETTER O => O
+041F; 0050;            CYRILLIC CAPITAL LETTER PE => P
+0420; 0052;            CYRILLIC CAPITAL LETTER ER => R
+0421; 0053;            CYRILLIC CAPITAL LETTER ES => S
+0422; 0054;            CYRILLIC CAPITAL LETTER TE => T
+0423; 0055;            CYRILLIC CAPITAL LETTER U => U
+0424; 0046;            CYRILLIC CAPITAL LETTER EF => F
+0425; 004B 0048;       CYRILLIC CAPITAL LETTER HA => KH
+0426; 0043;            CYRILLIC CAPITAL LETTER TSE => C
+0427; 0043 0048;       CYRILLIC CAPITAL LETTER CHE => CH
+0428; 0053 0048                CYRILLIC CAPITAL LETTER SHA => SH
+0429; 0053 0048 0048;  CYRILLIC CAPITAL LETTER SHCHA => SHH (??? SHCH)
+042A; 0022;            CYRILLIC CAPITAL LETTER HARD SIGN => "
+042B; 0059;            CYRILLIC CAPITAL LETTER YERU => Y
+042C; 0027;            CYRILLIC CAPITAL LETTER SOFT SIGN => '
+042D; 0045 0048;       CYRILLIC CAPITAL LETTER E => EH
+042E; 004A 0055;       CYRILLIC CAPITAL LETTER YU => JU
+042F; 004A 0041;       CYRILLIC CAPITAL LETTER YA => JA
+0430; 0061;            CYRILLIC SMALL LETTER A => a
+0431; 0062;            CYRILLIC SMALL LETTER BE => b
+0432; 0076;            CYRILLIC SMALL LETTER VE => v
+0433; 0067;            CYRILLIC SMALL LETTER GHE => g ?? gh
+0434; 0064;            CYRILLIC SMALL LETTER DE => d
+0435; 0065;            CYRILLIC SMALL LETTER IE => e
+0436; 007A 0068;       CYRILLIC SMALL LETTER ZHE => zh
+0437; 007A;            CYRILLIC SMALL LETTER ZE => z
+0438; 0069;            CYRILLIC SMALL LETTER I => i
+0439; 006A;            CYRILLIC SMALL LETTER SHORT I => j
+043A; 006B;            CYRILLIC SMALL LETTER KA => k
+043B; 006C;            CYRILLIC SMALL LETTER EL => l
+043C; 006D             CYRILLIC SMALL LETTER EM => m
+043D; 006E;            CYRILLIC SMALL LETTER EN => n
+043E; 006F;            CYRILLIC SMALL LETTER O => o
+043F; 0070;            CYRILLIC SMALL LETTER PE => p
+0440; 0072;            CYRILLIC SMALL LETTER ER => r
+0441; 0073;            CYRILLIC SMALL LETTER ES => s
+0442; 0074;            CYRILLIC SMALL LETTER TE => t
+0443; 0075;            CYRILLIC SMALL LETTER U => u
+0444; 0066;            CYRILLIC SMALL LETTER EF => f
+0445; 006B 0068;       CYRILLIC SMALL LETTER HA => kh
+0446; 0063;            CYRILLIC SMALL LETTER TSE => c
+0447; 0063 0068;       CYRILLIC SMALL LETTER CHE => ch
+0448; 0073 0068;       CYRILLIC SMALL LETTER SHA => sh
+0449; 0073 0068 0068;  CYRILLIC SMALL LETTER SHCHA => shh ?? shch
+044A; 0022;            CYRILLIC SMALL LETTER HARD SIGN => "
+044B; 0079;            CYRILLIC SMALL LETTER YERU => y
+044C; 0027;            CYRILLIC SMALL LETTER SOFT SIGN => '
+044D; 0065 0068;       CYRILLIC SMALL LETTER E => eh
+044E; 006A 0075;       CYRILLIC SMALL LETTER YU => ju ?? yu
+044F; 006A 0061;       CYRILLIC SMALL LETTER YA => ja ?? ya
+
+
+################
+
+# not in windows-1251
+
+0400;CYRILLIC CAPITAL LETTER IE WITH GRAVE;Lu;0;L;0415 0300;;;;N;;;;0450;
+
+040D;CYRILLIC CAPITAL LETTER I WITH GRAVE;Lu;0;L;0418 0300;;;;N;;;;045D;
+
+0450;CYRILLIC SMALL LETTER IE WITH GRAVE;Ll;0;L;0435 0300;;;;N;;;0400;;0400
+
+045C;CYRILLIC SMALL LETTER KJE;Ll;0;L;043A 0301;;;;N;;;040C;;040C
+045D;CYRILLIC SMALL LETTER I WITH GRAVE;Ll;0;L;0438 0300;;;;N;;;040D;;040D
+
+0460;CYRILLIC CAPITAL LETTER OMEGA;Lu;0;L;;;;;N;;;;0461;
+0461;CYRILLIC SMALL LETTER OMEGA;Ll;0;L;;;;;N;;;0460;;0460
+0462;CYRILLIC CAPITAL LETTER YAT;Lu;0;L;;;;;N;;;;0463;
+0463;CYRILLIC SMALL LETTER YAT;Ll;0;L;;;;;N;;;0462;;0462
+0464;CYRILLIC CAPITAL LETTER IOTIFIED E;Lu;0;L;;;;;N;;;;0465;
+0465;CYRILLIC SMALL LETTER IOTIFIED E;Ll;0;L;;;;;N;;;0464;;0464
+0466;CYRILLIC CAPITAL LETTER LITTLE YUS;Lu;0;L;;;;;N;;;;0467;
+0467;CYRILLIC SMALL LETTER LITTLE YUS;Ll;0;L;;;;;N;;;0466;;0466
+0468;CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS;Lu;0;L;;;;;N;;;;0469;
+0469;CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS;Ll;0;L;;;;;N;;;0468;;0468
+046A;CYRILLIC CAPITAL LETTER BIG YUS;Lu;0;L;;;;;N;;;;046B;
+046B;CYRILLIC SMALL LETTER BIG YUS;Ll;0;L;;;;;N;;;046A;;046A
+
+046D;CYRILLIC SMALL LETTER IOTIFIED BIG YUS;Ll;0;L;;;;;N;;;046C;;046C
+046E;CYRILLIC CAPITAL LETTER KSI;Lu;0;L;;;;;N;;;;046F;
+046F;CYRILLIC SMALL LETTER KSI;Ll;0;L;;;;;N;;;046E;;046E
+0470;CYRILLIC CAPITAL LETTER PSI;Lu;0;L;;;;;N;;;;0471;
+0471;CYRILLIC SMALL LETTER PSI;Ll;0;L;;;;;N;;;0470;;0470
+0472;CYRILLIC CAPITAL LETTER FITA;Lu;0;L;;;;;N;;;;0473;
+0473;CYRILLIC SMALL LETTER FITA;Ll;0;L;;;;;N;;;0472;;0472
+0474;CYRILLIC CAPITAL LETTER IZHITSA;Lu;0;L;;;;;N;;;;0475;
+0475;CYRILLIC SMALL LETTER IZHITSA;Ll;0;L;;;;;N;;;0474;;0474
+0476;CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT;Lu;0;L;0474 030F;;;;N;CYRILLIC CAPITAL LETTER IZHITSA DOUBLE GRAVE;;;0477;
+0477;CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT;Ll;0;L;0475 030F;;;;N;CYRILLIC SMALL LETTER IZHITSA DOUBLE GRAVE;;0476;;0476
+0478;CYRILLIC CAPITAL LETTER UK;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER UK DIGRAPH;;;0479;
+0479;CYRILLIC SMALL LETTER UK;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER UK DIGRAPH;;0478;;0478
+047A;CYRILLIC CAPITAL LETTER ROUND OMEGA;Lu;0;L;;;;;N;;;;047B;
+047B;CYRILLIC SMALL LETTER ROUND OMEGA;Ll;0;L;;;;;N;;;047A;;047A
+047C;CYRILLIC CAPITAL LETTER OMEGA WITH TITLO;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER OMEGA TITLO;;;047D;
+047D;CYRILLIC SMALL LETTER OMEGA WITH TITLO;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER OMEGA TITLO;;047C;;047C
+047E;CYRILLIC CAPITAL LETTER OT;Lu;0;L;;;;;N;;;;047F;
+047F;CYRILLIC SMALL LETTER OT;Ll;0;L;;;;;N;;;047E;;047E
+0480;CYRILLIC CAPITAL LETTER KOPPA;Lu;0;L;;;;;N;;;;0481;
+0481;CYRILLIC SMALL LETTER KOPPA;Ll;0;L;;;;;N;;;0480;;0480
+0482;CYRILLIC THOUSANDS SIGN;So;0;L;;;;;N;;;;;
+0483;COMBINING CYRILLIC TITLO;Mn;230;NSM;;;;;N;CYRILLIC NON-SPACING TITLO;;;;
+0484;COMBINING CYRILLIC PALATALIZATION;Mn;230;NSM;;;;;N;CYRILLIC NON-SPACING PALATALIZATION;;;;
+0485;COMBINING CYRILLIC DASIA PNEUMATA;Mn;230;NSM;;;;;N;CYRILLIC NON-SPACING DASIA PNEUMATA;;;;
+0486;COMBINING CYRILLIC PSILI PNEUMATA;Mn;230;NSM;;;;;N;CYRILLIC NON-SPACING PSILI PNEUMATA;;;;
+0488;COMBINING CYRILLIC HUNDRED THOUSANDS SIGN;Me;0;NSM;;;;;N;;;;;
+0489;COMBINING CYRILLIC MILLIONS SIGN;Me;0;NSM;;;;;N;;;;;
+048A;CYRILLIC CAPITAL LETTER SHORT I WITH TAIL;Lu;0;L;;;;;N;;;;048B;
+048B;CYRILLIC SMALL LETTER SHORT I WITH TAIL;Ll;0;L;;;;;N;;;048A;;048A
+048C;CYRILLIC CAPITAL LETTER SEMISOFT SIGN;Lu;0;L;;;;;N;;;;048D;
+048D;CYRILLIC SMALL LETTER SEMISOFT SIGN;Ll;0;L;;;;;N;;;048C;;048C
+048E;CYRILLIC CAPITAL LETTER ER WITH TICK;Lu;0;L;;;;;N;;;;048F;
+048F;CYRILLIC SMALL LETTER ER WITH TICK;Ll;0;L;;;;;N;;;048E;;048E
+
+0492;CYRILLIC CAPITAL LETTER GHE WITH STROKE;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER GE BAR;;;0493;
+0493;CYRILLIC SMALL LETTER GHE WITH STROKE;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER GE BAR;;0492;;0492
+0494;CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER GE HOOK;;;0495;
+0495;CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER GE HOOK;;0494;;0494
+0496;CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER ZHE WITH RIGHT DESCENDER;;;0497;
+0497;CYRILLIC SMALL LETTER ZHE WITH DESCENDER;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER ZHE WITH RIGHT DESCENDER;;0496;;0496
+0498;CYRILLIC CAPITAL LETTER ZE WITH DESCENDER;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER ZE CEDILLA;;;0499;
+0499;CYRILLIC SMALL LETTER ZE WITH DESCENDER;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER ZE CEDILLA;;0498;;0498
+049A;CYRILLIC CAPITAL LETTER KA WITH DESCENDER;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER KA WITH RIGHT DESCENDER;;;049B;
+049B;CYRILLIC SMALL LETTER KA WITH DESCENDER;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER KA WITH RIGHT DESCENDER;;049A;;049A
+049C;CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER KA VERTICAL BAR;;;049D;
+049D;CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER KA VERTICAL BAR;;049C;;049C
+049E;CYRILLIC CAPITAL LETTER KA WITH STROKE;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER KA BAR;;;049F;
+049F;CYRILLIC SMALL LETTER KA WITH STROKE;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER KA BAR;;049E;;049E
+04A0;CYRILLIC CAPITAL LETTER BASHKIR KA;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER REVERSED GE KA;;;04A1;
+04A1;CYRILLIC SMALL LETTER BASHKIR KA;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER REVERSED GE KA;;04A0;;04A0
+04A2;CYRILLIC CAPITAL LETTER EN WITH DESCENDER;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER EN WITH RIGHT DESCENDER;;;04A3;
+04A3;CYRILLIC SMALL LETTER EN WITH DESCENDER;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER EN WITH RIGHT DESCENDER;;04A2;;04A2
+04A4;CYRILLIC CAPITAL LIGATURE EN GHE;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER EN GE;;;04A5;
+04A5;CYRILLIC SMALL LIGATURE EN GHE;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER EN GE;;04A4;;04A4
+04A6;CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER PE HOOK;Abkhasian;;04A7;
+04A7;CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER PE HOOK;Abkhasian;04A6;;04A6
+04A8;CYRILLIC CAPITAL LETTER ABKHASIAN HA;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER O HOOK;;;04A9;
+04A9;CYRILLIC SMALL LETTER ABKHASIAN HA;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER O HOOK;;04A8;;04A8
+04AA;CYRILLIC CAPITAL LETTER ES WITH DESCENDER;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER ES CEDILLA;;;04AB;
+04AB;CYRILLIC SMALL LETTER ES WITH DESCENDER;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER ES CEDILLA;;04AA;;04AA
+04AC;CYRILLIC CAPITAL LETTER TE WITH DESCENDER;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER TE WITH RIGHT DESCENDER;;;04AD;
+04AD;CYRILLIC SMALL LETTER TE WITH DESCENDER;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER TE WITH RIGHT DESCENDER;;04AC;;04AC
+04AE;CYRILLIC CAPITAL LETTER STRAIGHT U;Lu;0;L;;;;;N;;;;04AF;
+04AF;CYRILLIC SMALL LETTER STRAIGHT U;Ll;0;L;;;;;N;;;04AE;;04AE
+04B0;CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER STRAIGHT U BAR;;;04B1;
+04B1;CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER STRAIGHT U BAR;;04B0;;04B0
+04B2;CYRILLIC CAPITAL LETTER HA WITH DESCENDER;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER KHA WITH RIGHT DESCENDER;;;04B3;
+04B3;CYRILLIC SMALL LETTER HA WITH DESCENDER;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER KHA WITH RIGHT DESCENDER;;04B2;;04B2
+04B4;CYRILLIC CAPITAL LIGATURE TE TSE;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER TE TSE;Abkhasian;;04B5;
+04B5;CYRILLIC SMALL LIGATURE TE TSE;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER TE TSE;Abkhasian;04B4;;04B4
+04B6;CYRILLIC CAPITAL LETTER CHE WITH DESCENDER;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER CHE WITH RIGHT DESCENDER;;;04B7;
+04B7;CYRILLIC SMALL LETTER CHE WITH DESCENDER;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER CHE WITH RIGHT DESCENDER;;04B6;;04B6
+04B8;CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER CHE VERTICAL BAR;;;04B9;
+04B9;CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER CHE VERTICAL BAR;;04B8;;04B8
+04BA;CYRILLIC CAPITAL LETTER SHHA;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER H;;;04BB;
+04BB;CYRILLIC SMALL LETTER SHHA;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER H;;04BA;;04BA
+04BC;CYRILLIC CAPITAL LETTER ABKHASIAN CHE;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER IE HOOK;;;04BD;
+04BD;CYRILLIC SMALL LETTER ABKHASIAN CHE;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER IE HOOK;;04BC;;04BC
+04BE;CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER IE HOOK OGONEK;;;04BF;
+04BF;CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER IE HOOK OGONEK;;04BE;;04BE
+04C0;CYRILLIC LETTER PALOCHKA;Lu;0;L;;;;;N;CYRILLIC LETTER I;;;;
+04C1;CYRILLIC CAPITAL LETTER ZHE WITH BREVE;Lu;0;L;0416 0306;;;;N;CYRILLIC CAPITAL LETTER SHORT ZHE;;;04C2;
+04C2;CYRILLIC SMALL LETTER ZHE WITH BREVE;Ll;0;L;0436 0306;;;;N;CYRILLIC SMALL LETTER SHORT ZHE;;04C1;;04C1
+04C3;CYRILLIC CAPITAL LETTER KA WITH HOOK;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER KA HOOK;;;04C4;
+04C4;CYRILLIC SMALL LETTER KA WITH HOOK;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER KA HOOK;;04C3;;04C3
+04C5;CYRILLIC CAPITAL LETTER EL WITH TAIL;Lu;0;L;;;;;N;;;;04C6;
+04C6;CYRILLIC SMALL LETTER EL WITH TAIL;Ll;0;L;;;;;N;;;04C5;;04C5
+04C7;CYRILLIC CAPITAL LETTER EN WITH HOOK;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER EN HOOK;;;04C8;
+04C8;CYRILLIC SMALL LETTER EN WITH HOOK;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER EN HOOK;;04C7;;04C7
+04C9;CYRILLIC CAPITAL LETTER EN WITH TAIL;Lu;0;L;;;;;N;;;;04CA;
+04CA;CYRILLIC SMALL LETTER EN WITH TAIL;Ll;0;L;;;;;N;;;04C9;;04C9
+04CB;CYRILLIC CAPITAL LETTER KHAKASSIAN CHE;Lu;0;L;;;;;N;CYRILLIC CAPITAL LETTER CHE WITH LEFT DESCENDER;;;04CC;
+04CC;CYRILLIC SMALL LETTER KHAKASSIAN CHE;Ll;0;L;;;;;N;CYRILLIC SMALL LETTER CHE WITH LEFT DESCENDER;;04CB;;04CB
+04CD;CYRILLIC CAPITAL LETTER EM WITH TAIL;Lu;0;L;;;;;N;;;;04CE;
+04CE;CYRILLIC SMALL LETTER EM WITH TAIL;Ll;0;L;;;;;N;;;04CD;;04CD
+04D0;CYRILLIC CAPITAL LETTER A WITH BREVE;Lu;0;L;0410 0306;;;;N;;;;04D1;
+04D1;CYRILLIC SMALL LETTER A WITH BREVE;Ll;0;L;0430 0306;;;;N;;;04D0;;04D0
+04D2;CYRILLIC CAPITAL LETTER A WITH DIAERESIS;Lu;0;L;0410 0308;;;;N;;;;04D3;
+04D3;CYRILLIC SMALL LETTER A WITH DIAERESIS;Ll;0;L;0430 0308;;;;N;;;04D2;;04D2
+04D4;CYRILLIC CAPITAL LIGATURE A IE;Lu;0;L;;;;;N;;;;04D5;
+04D5;CYRILLIC SMALL LIGATURE A IE;Ll;0;L;;;;;N;;;04D4;;04D4
+04D6;CYRILLIC CAPITAL LETTER IE WITH BREVE;Lu;0;L;0415 0306;;;;N;;;;04D7;
+04D7;CYRILLIC SMALL LETTER IE WITH BREVE;Ll;0;L;0435 0306;;;;N;;;04D6;;04D6
+04D8;CYRILLIC CAPITAL LETTER SCHWA;Lu;0;L;;;;;N;;;;04D9;
+04D9;CYRILLIC SMALL LETTER SCHWA;Ll;0;L;;;;;N;;;04D8;;04D8
+04DA;CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS;Lu;0;L;04D8 0308;;;;N;;;;04DB;
+04DB;CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS;Ll;0;L;04D9 0308;;;;N;;;04DA;;04DA
+04DC;CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS;Lu;0;L;0416 0308;;;;N;;;;04DD;
+04DD;CYRILLIC SMALL LETTER ZHE WITH DIAERESIS;Ll;0;L;0436 0308;;;;N;;;04DC;;04DC
+04DE;CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS;Lu;0;L;0417 0308;;;;N;;;;04DF;
+04DF;CYRILLIC SMALL LETTER ZE WITH DIAERESIS;Ll;0;L;0437 0308;;;;N;;;04DE;;04DE
+04E0;CYRILLIC CAPITAL LETTER ABKHASIAN DZE;Lu;0;L;;;;;N;;;;04E1;
+04E1;CYRILLIC SMALL LETTER ABKHASIAN DZE;Ll;0;L;;;;;N;;;04E0;;04E0
+04E2;CYRILLIC CAPITAL LETTER I WITH MACRON;Lu;0;L;0418 0304;;;;N;;;;04E3;
+04E3;CYRILLIC SMALL LETTER I WITH MACRON;Ll;0;L;0438 0304;;;;N;;;04E2;;04E2
+04E4;CYRILLIC CAPITAL LETTER I WITH DIAERESIS;Lu;0;L;0418 0308;;;;N;;;;04E5;
+04E5;CYRILLIC SMALL LETTER I WITH DIAERESIS;Ll;0;L;0438 0308;;;;N;;;04E4;;04E4
+04E6;CYRILLIC CAPITAL LETTER O WITH DIAERESIS;Lu;0;L;041E 0308;;;;N;;;;04E7;
+04E7;CYRILLIC SMALL LETTER O WITH DIAERESIS;Ll;0;L;043E 0308;;;;N;;;04E6;;04E6
+04E8;CYRILLIC CAPITAL LETTER BARRED O;Lu;0;L;;;;;N;;;;04E9;
+04E9;CYRILLIC SMALL LETTER BARRED O;Ll;0;L;;;;;N;;;04E8;;04E8
+04EA;CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS;Lu;0;L;04E8 0308;;;;N;;;;04EB;
+04EB;CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS;Ll;0;L;04E9 0308;;;;N;;;04EA;;04EA
+04EC;CYRILLIC CAPITAL LETTER E WITH DIAERESIS;Lu;0;L;042D 0308;;;;N;;;;04ED;
+04ED;CYRILLIC SMALL LETTER E WITH DIAERESIS;Ll;0;L;044D 0308;;;;N;;;04EC;;04EC
+04EE;CYRILLIC CAPITAL LETTER U WITH MACRON;Lu;0;L;0423 0304;;;;N;;;;04EF;
+04EF;CYRILLIC SMALL LETTER U WITH MACRON;Ll;0;L;0443 0304;;;;N;;;04EE;;04EE
+04F0;CYRILLIC CAPITAL LETTER U WITH DIAERESIS;Lu;0;L;0423 0308;;;;N;;;;04F1;
+04F1;CYRILLIC SMALL LETTER U WITH DIAERESIS;Ll;0;L;0443 0308;;;;N;;;04F0;;04F0
+04F2;CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE;Lu;0;L;0423 030B;;;;N;;;;04F3;
+04F3;CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE;Ll;0;L;0443 030B;;;;N;;;04F2;;04F2
+04F4;CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS;Lu;0;L;0427 0308;;;;N;;;;04F5;
+04F5;CYRILLIC SMALL LETTER CHE WITH DIAERESIS;Ll;0;L;0447 0308;;;;N;;;04F4;;04F4
+04F8;CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS;Lu;0;L;042B 0308;;;;N;;;;04F9;
+04F9;CYRILLIC SMALL LETTER YERU WITH DIAERESIS;Ll;0;L;044B 0308;;;;N;;;04F8;;04F8
+0500;CYRILLIC CAPITAL LETTER KOMI DE;Lu;0;L;;;;;N;;;;0501;
+0501;CYRILLIC SMALL LETTER KOMI DE;Ll;0;L;;;;;N;;;0500;;0500
+0502;CYRILLIC CAPITAL LETTER KOMI DJE;Lu;0;L;;;;;N;;;;0503;
+0503;CYRILLIC SMALL LETTER KOMI DJE;Ll;0;L;;;;;N;;;0502;;0502
+0504;CYRILLIC CAPITAL LETTER KOMI ZJE;Lu;0;L;;;;;N;;;;0505;
+0505;CYRILLIC SMALL LETTER KOMI ZJE;Ll;0;L;;;;;N;;;0504;;0504
+0506;CYRILLIC CAPITAL LETTER KOMI DZJE;Lu;0;L;;;;;N;;;;0507;
+0507;CYRILLIC SMALL LETTER KOMI DZJE;Ll;0;L;;;;;N;;;0506;;0506
+0508;CYRILLIC CAPITAL LETTER KOMI LJE;Lu;0;L;;;;;N;;;;0509;
+0509;CYRILLIC SMALL LETTER KOMI LJE;Ll;0;L;;;;;N;;;0508;;0508
+050A;CYRILLIC CAPITAL LETTER KOMI NJE;Lu;0;L;;;;;N;;;;050B;
+050B;CYRILLIC SMALL LETTER KOMI NJE;Ll;0;L;;;;;N;;;050A;;050A
+050C;CYRILLIC CAPITAL LETTER KOMI SJE;Lu;0;L;;;;;N;;;;050D;
+050D;CYRILLIC SMALL LETTER KOMI SJE;Ll;0;L;;;;;N;;;050C;;050C
+050E;CYRILLIC CAPITAL LETTER KOMI TJE;Lu;0;L;;;;;N;;;;050F;
+050F;CYRILLIC SMALL LETTER KOMI TJE;Ll;0;L;;;;;N;;;050E;;050E
\ No newline at end of file