~azzar1/unity/add-show-desktop-key

« back to all changes in this revision

Viewing changes to www/php/phpBB3/includes/utf/utf_tools.php

  • Committer: dcoles
  • Date: 2008-02-13 04:10:55 UTC
  • Revision ID: svn-v3-trunk0:2b9c9e99-6f39-0410-b283-7f802c844ae2:trunk:443
Added Forum application along with unmodifed version of phpBB3 "Olympus" 3.0.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
<?php
 
2
/**
 
3
*
 
4
* @package utf
 
5
* @version $Id: utf_tools.php,v 1.72 2007/12/04 16:20:38 naderman Exp $
 
6
* @copyright (c) 2006 phpBB Group
 
7
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
 
8
*
 
9
*/
 
10
 
 
11
/**
 
12
*/
 
13
if (!defined('IN_PHPBB'))
 
14
{
 
15
        exit;
 
16
}
 
17
 
 
18
// Enforce ASCII only string handling
 
19
setlocale(LC_CTYPE, 'C');
 
20
 
 
21
/**
 
22
* UTF-8 tools
 
23
*
 
24
* Whenever possible, these functions will try to use PHP's built-in functions or
 
25
* extensions, otherwise they will default to custom routines.
 
26
*
 
27
* @package utf
 
28
*/
 
29
 
 
30
if (!extension_loaded('xml'))
 
31
{
 
32
        /**
 
33
        * Implementation of PHP's native utf8_encode for people without XML support
 
34
        * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
 
35
        *
 
36
        * @param string $str ISO-8859-1 encoded data
 
37
        * @return string UTF-8 encoded data
 
38
        */
 
39
        function utf8_encode($str)
 
40
        {
 
41
                $out = '';
 
42
                for ($i = 0, $len = strlen($str); $i < $len; $i++)
 
43
                {
 
44
                        $letter = $str[$i];
 
45
                        $num = ord($letter);
 
46
                        if ($num < 0x80)
 
47
                        {
 
48
                                $out .= $letter;
 
49
                        }
 
50
                        else if ($num < 0xC0)
 
51
                        {
 
52
                                $out .= "\xC2" . $letter;
 
53
                        }
 
54
                        else
 
55
                        {
 
56
                                $out .= "\xC3" . chr($num - 64);
 
57
                        }
 
58
                }
 
59
                return $out;
 
60
        }
 
61
 
 
62
        /**
 
63
        * Implementation of PHP's native utf8_decode for people without XML support
 
64
        *
 
65
        * @param string $str UTF-8 encoded data
 
66
        * @return string ISO-8859-1 encoded data
 
67
        */
 
68
        function utf8_decode($str)
 
69
        {
 
70
                $pos = 0;
 
71
                $len = strlen($str);
 
72
                $ret = '';
 
73
        
 
74
                while ($pos < $len)
 
75
                {
 
76
                        $ord = ord($str[$pos]) & 0xF0;
 
77
                        if ($ord === 0xC0 || $ord === 0xD0)
 
78
                        {
 
79
                                $charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
 
80
                                $pos += 2;
 
81
                                $ret .= (($charval < 256) ? chr($charval) : '?');
 
82
                        }
 
83
                        else if ($ord === 0xE0)
 
84
                        {
 
85
                                $ret .= '?';
 
86
                                $pos += 3;
 
87
                        }
 
88
                        else if ($ord === 0xF0)
 
89
                        {
 
90
                                $ret .= '?';
 
91
                                $pos += 4;
 
92
                        }
 
93
                        else
 
94
                        {
 
95
                                $ret .= $str[$pos];
 
96
                                ++$pos;
 
97
                        }
 
98
                }
 
99
                return $ret;
 
100
        }
 
101
}
 
102
 
 
103
// mbstring is old and has it's functions around for older versions of PHP.
 
104
// if mbstring is not loaded, we go into native mode.
 
105
if (extension_loaded('mbstring'))
 
106
{
 
107
        mb_internal_encoding('UTF-8');
 
108
 
 
109
        /**
 
110
        * UTF-8 aware alternative to strrpos
 
111
        * Find position of last occurrence of a char in a string
 
112
        *
 
113
        * Notes:
 
114
        * - offset for mb_strrpos was added in 5.2.0, we emulate if it is lower
 
115
        */
 
116
        if (version_compare(PHP_VERSION, '5.2.0', '>='))
 
117
        {
 
118
                /**
 
119
                * UTF-8 aware alternative to strrpos
 
120
                * @ignore
 
121
                */
 
122
                function utf8_strrpos($str,     $needle, $offset = null)
 
123
                {
 
124
                        // Emulate behaviour of strrpos rather than raising warning
 
125
                        if (empty($str))
 
126
                        {
 
127
                                return false;
 
128
                        }
 
129
 
 
130
                        if (is_null($offset))
 
131
                        {
 
132
                                return mb_strrpos($str, $needle);
 
133
                        }
 
134
                        else
 
135
                        {
 
136
                                return mb_strrpos($str, $needle, $offset);
 
137
                        }
 
138
                }
 
139
        }
 
140
        else
 
141
        {
 
142
                /**
 
143
                * UTF-8 aware alternative to strrpos
 
144
                * @ignore
 
145
                */
 
146
                function utf8_strrpos($str,     $needle, $offset = null)
 
147
                {
 
148
                        // offset for mb_strrpos was added in 5.2.0
 
149
                        if (is_null($offset))
 
150
                        {
 
151
                                // Emulate behaviour of strrpos rather than raising warning
 
152
                                if (empty($str))
 
153
                                {
 
154
                                        return false;
 
155
                                }
 
156
 
 
157
                                return mb_strrpos($str, $needle);
 
158
                        }
 
159
                        else
 
160
                        {
 
161
                                if (!is_int($offset))
 
162
                                {
 
163
                                        trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
 
164
                                        return false;
 
165
                                }
 
166
 
 
167
                                $str = mb_substr($str, $offset);
 
168
 
 
169
                                if (false !== ($pos = mb_strrpos($str, $needle)))
 
170
                                {
 
171
                                        return $pos + $offset;
 
172
                                }
 
173
 
 
174
                                return false;
 
175
                        }
 
176
                }
 
177
        }
 
178
 
 
179
        /**
 
180
        * UTF-8 aware alternative to strpos
 
181
        * @ignore
 
182
        */
 
183
        function utf8_strpos($str, $needle, $offset = null)
 
184
        {
 
185
                if (is_null($offset))
 
186
                {
 
187
                        return mb_strpos($str, $needle);
 
188
                }
 
189
                else
 
190
                {
 
191
                        return mb_strpos($str, $needle, $offset);
 
192
                }
 
193
        }
 
194
 
 
195
        /**
 
196
        * UTF-8 aware alternative to strtolower
 
197
        * @ignore
 
198
        */
 
199
        function utf8_strtolower($str)
 
200
        {
 
201
                return mb_strtolower($str);
 
202
        }
 
203
 
 
204
        /**
 
205
        * UTF-8 aware alternative to strtoupper
 
206
        * @ignore
 
207
        */
 
208
        function utf8_strtoupper($str)
 
209
        {
 
210
                return mb_strtoupper($str);
 
211
        }
 
212
 
 
213
        /**
 
214
        * UTF-8 aware alternative to substr
 
215
        * @ignore
 
216
        */
 
217
        function utf8_substr($str, $offset, $length = null)
 
218
        {
 
219
                if (is_null($length))
 
220
                {
 
221
                        return mb_substr($str, $offset);
 
222
                }
 
223
                else
 
224
                {
 
225
                        return mb_substr($str, $offset, $length);
 
226
                }
 
227
        }
 
228
 
 
229
        /**
 
230
        * Return the length (in characters) of a UTF-8 string
 
231
        * @ignore
 
232
        */
 
233
        function utf8_strlen($text)
 
234
        {
 
235
                return mb_strlen($text, 'utf-8');
 
236
        }
 
237
}
 
238
else
 
239
{
 
240
        /**
 
241
        * UTF-8 aware alternative to strrpos
 
242
        * Find position of last occurrence of a char in a string
 
243
        *
 
244
        * @author Harry Fuecks
 
245
        * @param string $str haystack
 
246
        * @param string $needle needle
 
247
        * @param integer $offset (optional) offset (from left)
 
248
        * @return mixed integer position or FALSE on failure
 
249
        */
 
250
        function utf8_strrpos($str,     $needle, $offset = null)
 
251
        {
 
252
                if (is_null($offset))
 
253
                {
 
254
                        $ar     = explode($needle, $str);
 
255
                        
 
256
                        if (sizeof($ar) > 1)
 
257
                        {
 
258
                                // Pop off the end of the string where the last match was made
 
259
                                array_pop($ar);
 
260
                                $str = join($needle, $ar);
 
261
 
 
262
                                return utf8_strlen($str);
 
263
                        }
 
264
                        return false;
 
265
                }
 
266
                else
 
267
                {
 
268
                        if (!is_int($offset))
 
269
                        {
 
270
                                trigger_error('utf8_strrpos     expects parameter 3     to be long', E_USER_ERROR);
 
271
                                return false;
 
272
                        }
 
273
 
 
274
                        $str = utf8_substr($str, $offset);
 
275
 
 
276
                        if (false !== ($pos = utf8_strrpos($str, $needle)))
 
277
                        {
 
278
                                return $pos     + $offset;
 
279
                        }
 
280
 
 
281
                        return false;
 
282
                }
 
283
        }
 
284
 
 
285
        /**
 
286
        * UTF-8 aware alternative to strpos
 
287
        * Find position of first occurrence of a string
 
288
        *
 
289
        * @author Harry Fuecks
 
290
        * @param string $str haystack
 
291
        * @param string $needle needle
 
292
        * @param integer $offset offset in characters (from left)
 
293
        * @return mixed integer position or FALSE on failure
 
294
        */
 
295
        function utf8_strpos($str, $needle, $offset = null)
 
296
        {
 
297
                if (is_null($offset))
 
298
                {
 
299
                        $ar = explode($needle, $str);
 
300
                        if (sizeof($ar) > 1)
 
301
                        {
 
302
                                return utf8_strlen($ar[0]);
 
303
                        }
 
304
                        return false;
 
305
                }
 
306
                else
 
307
                {
 
308
                        if (!is_int($offset))
 
309
                        {
 
310
                                trigger_error('utf8_strpos:  Offset must  be an integer', E_USER_ERROR);
 
311
                                return false;
 
312
                        }
 
313
 
 
314
                        $str = utf8_substr($str, $offset);
 
315
 
 
316
                        if (false !== ($pos = utf8_strpos($str, $needle)))
 
317
                        {
 
318
                                return $pos + $offset;
 
319
                        }
 
320
 
 
321
                        return false;
 
322
                }
 
323
        }
 
324
 
 
325
        /**
 
326
        * UTF-8 aware alternative to strtolower
 
327
        * Make a string lowercase
 
328
        * Note: The concept of a characters "case" only exists is some alphabets
 
329
        * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
 
330
        * not exist in the Chinese alphabet, for example. See Unicode Standard
 
331
        * Annex #21: Case Mappings
 
332
        *
 
333
        * @param string
 
334
        * @return string string in lowercase
 
335
        */
 
336
        function utf8_strtolower($string)
 
337
        {
 
338
                static $utf8_upper_to_lower = array(
 
339
                        "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
 
340
                        "\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
 
341
                        "\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
 
342
                        "\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
 
343
                        "\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
 
344
                        "\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
 
345
                        "\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
 
346
                        "\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
 
347
                        "\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
 
348
                        "\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
 
349
                        "\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
 
350
                        "\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
 
351
                        "\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
 
352
                        "\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
 
353
                        "\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
 
354
                        "\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
 
355
                        "\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
 
356
                        "\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
 
357
                        "\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
 
358
                        "\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
 
359
                        "\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
 
360
                        "\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
 
361
                        "\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
 
362
                        "\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
 
363
                        "\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
 
364
                        "\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
 
365
                        "\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
 
366
                        "\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
 
367
                        "\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
 
368
                        "\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
 
369
                        "\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
 
370
                        "\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
 
371
                        "\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
 
372
                        "\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
 
373
                        "\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
 
374
                        "\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
 
375
                        "\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
 
376
                        "\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
 
377
                        "\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
 
378
                        "\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
 
379
                        "\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
 
380
                        "\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
 
381
                        "\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
 
382
                        "\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
 
383
                        "\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
 
384
                        "\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
 
385
                );
 
386
 
 
387
                return strtr(strtolower($string), $utf8_upper_to_lower);
 
388
        }
 
389
 
 
390
        /**
 
391
        * UTF-8 aware alternative to strtoupper
 
392
        * Make a string uppercase
 
393
        * Note: The concept of a characters "case" only exists is some alphabets
 
394
        * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
 
395
        * not exist in the Chinese alphabet, for example. See Unicode Standard
 
396
        * Annex #21: Case Mappings
 
397
        *
 
398
        * @param string
 
399
        * @return string string in uppercase
 
400
        */
 
401
        function utf8_strtoupper($string)
 
402
        {
 
403
                static $utf8_lower_to_upper = array(
 
404
                        "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
 
405
                        "\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
 
406
                        "\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
 
407
                        "\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
 
408
                        "\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
 
409
                        "\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
 
410
                        "\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
 
411
                        "\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
 
412
                        "\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
 
413
                        "\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
 
414
                        "\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
 
415
                        "\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
 
416
                        "\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
 
417
                        "\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
 
418
                        "\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
 
419
                        "\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
 
420
                        "\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
 
421
                        "\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
 
422
                        "\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
 
423
                        "\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
 
424
                        "\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
 
425
                        "\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
 
426
                        "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
 
427
                        "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
 
428
                        "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
 
429
                        "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
 
430
                        "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
 
431
                        "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
 
432
                        "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
 
433
                        "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
 
434
                        "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
 
435
                        "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
 
436
                        "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
 
437
                        "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
 
438
                        "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
 
439
                        "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
 
440
                        "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
 
441
                        "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
 
442
                        "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
 
443
                        "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
 
444
                        "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
 
445
                        "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
 
446
                        "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
 
447
                        "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
 
448
                        "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
 
449
                        "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
 
450
                );
 
451
 
 
452
                return strtr(strtoupper($string), $utf8_lower_to_upper);
 
453
        }
 
454
 
 
455
        /**
 
456
        * UTF-8 aware alternative to substr
 
457
        * Return part of a string given character offset (and optionally length)
 
458
        *
 
459
        * Note arguments: comparied to substr - if offset or length are
 
460
        * not integers, this version will not complain but rather massages them
 
461
        * into an integer.
 
462
        *
 
463
        * Note on returned values: substr documentation states false can be
 
464
        * returned in some cases (e.g. offset > string length)
 
465
        * mb_substr never returns false, it will return an empty string instead.
 
466
        * This adopts the mb_substr approach
 
467
        *
 
468
        * Note on implementation: PCRE only supports repetitions of less than
 
469
        * 65536, in order to accept up to MAXINT values for offset and length,
 
470
        * we'll repeat a group of 65535 characters when needed.
 
471
        *
 
472
        * Note on implementation: calculating the number of characters in the
 
473
        * string is a relatively expensive operation, so we only carry it out when
 
474
        * necessary. It isn't necessary for +ve offsets and no specified length
 
475
        *
 
476
        * @author Chris Smith<chris@jalakai.co.uk>
 
477
        * @param string $str
 
478
        * @param integer $offset number of UTF-8 characters offset (from left)
 
479
        * @param integer $length (optional) length in UTF-8 characters from offset
 
480
        * @return mixed string or FALSE if failure
 
481
        */
 
482
        function utf8_substr($str, $offset, $length = NULL)
 
483
        {
 
484
                // generates E_NOTICE
 
485
                // for PHP4 objects, but not PHP5 objects
 
486
                $str = (string) $str;
 
487
                $offset = (int) $offset;
 
488
                if (!is_null($length))
 
489
                {
 
490
                        $length = (int) $length;
 
491
                }
 
492
 
 
493
                // handle trivial cases
 
494
                if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
 
495
                {
 
496
                        return '';
 
497
                }
 
498
 
 
499
                // normalise negative offsets (we could use a tail
 
500
                // anchored pattern, but they are horribly slow!)
 
501
                if ($offset < 0)
 
502
                {
 
503
                        // see notes
 
504
                        $strlen = utf8_strlen($str);
 
505
                        $offset = $strlen + $offset;
 
506
                        if ($offset < 0)
 
507
                        {
 
508
                                $offset = 0;
 
509
                        }
 
510
                }
 
511
 
 
512
                $op = '';
 
513
                $lp = '';
 
514
 
 
515
                // establish a pattern for offset, a
 
516
                // non-captured group equal in length to offset
 
517
                if ($offset > 0)
 
518
                {
 
519
                        $ox = (int) ($offset / 65535);
 
520
                        $oy = $offset % 65535;
 
521
 
 
522
                        if ($ox)
 
523
                        {
 
524
                                $op = '(?:.{65535}){' . $ox . '}';
 
525
                        }
 
526
 
 
527
                        $op = '^(?:' . $op . '.{' . $oy . '})';
 
528
                }
 
529
                else
 
530
                {       
 
531
                        // offset == 0; just anchor the pattern
 
532
                        $op = '^';
 
533
                }
 
534
 
 
535
                // establish a pattern for length
 
536
                if (is_null($length))
 
537
                {
 
538
                        // the rest of the string
 
539
                        $lp = '(.*)$';
 
540
                }
 
541
                else
 
542
                {
 
543
                        if (!isset($strlen))
 
544
                        {
 
545
                                // see notes
 
546
                                $strlen = utf8_strlen($str);
 
547
                        }
 
548
 
 
549
                        // another trivial case
 
550
                        if ($offset > $strlen)
 
551
                        {
 
552
                                return '';
 
553
                        }
 
554
 
 
555
                        if ($length > 0)
 
556
                        {
 
557
                                // reduce any length that would
 
558
                                // go passed the end of the string
 
559
                                $length = min($strlen - $offset, $length);
 
560
 
 
561
                                $lx = (int) ($length / 65535);
 
562
                                $ly = $length % 65535;
 
563
                                
 
564
                                // negative length requires a captured group
 
565
                                // of length characters
 
566
                                if ($lx)
 
567
                                {
 
568
                                        $lp = '(?:.{65535}){' . $lx . '}';
 
569
                                }
 
570
                                $lp = '(' . $lp . '.{'. $ly . '})';
 
571
                        }
 
572
                        else if ($length < 0)
 
573
                        {
 
574
                                if ($length < ($offset - $strlen))
 
575
                                {
 
576
                                        return '';
 
577
                                }
 
578
 
 
579
                                $lx = (int)((-$length) / 65535);
 
580
                                $ly = (-$length) % 65535;
 
581
 
 
582
                                // negative length requires ... capture everything
 
583
                                // except a group of  -length characters
 
584
                                // anchored at the tail-end of the string
 
585
                                if ($lx)
 
586
                                {
 
587
                                        $lp = '(?:.{65535}){' . $lx . '}';
 
588
                                }
 
589
                                $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
 
590
                        }
 
591
                }
 
592
 
 
593
                if (!preg_match('#' . $op . $lp . '#us', $str, $match))
 
594
                {
 
595
                        return '';
 
596
                }
 
597
 
 
598
                return $match[1];
 
599
        }
 
600
 
 
601
        /**
 
602
        * Return the length (in characters) of a UTF-8 string
 
603
        *
 
604
        * @param        string  $text           UTF-8 string
 
605
        * @return       integer                         Length (in chars) of given string
 
606
        */
 
607
        function utf8_strlen($text)
 
608
        {
 
609
                // Since utf8_decode is replacing multibyte characters to ? strlen works fine
 
610
                return strlen(utf8_decode($text));
 
611
        }
 
612
}
 
613
 
 
614
/**
 
615
* UTF-8 aware alternative to str_split
 
616
* Convert a string to an array
 
617
*
 
618
* @author Harry Fuecks
 
619
* @param string $str UTF-8 encoded
 
620
* @param int $split_len number to characters to split string by
 
621
* @return array characters in string reverses
 
622
*/
 
623
function utf8_str_split($str, $split_len = 1)
 
624
{
 
625
        if (!is_int($split_len) || $split_len < 1)
 
626
        {
 
627
                return false;
 
628
        }
 
629
 
 
630
        $len = utf8_strlen($str);
 
631
        if ($len <= $split_len)
 
632
        {
 
633
                return array($str);
 
634
        }
 
635
        
 
636
        preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
 
637
        return $ar[0];
 
638
}
 
639
 
 
640
/**
 
641
* UTF-8 aware alternative to strspn
 
642
* Find length of initial segment matching the mask
 
643
*
 
644
* @author Harry Fuecks
 
645
*/
 
646
function utf8_strspn($str, $mask, $start = null, $length = null)
 
647
{
 
648
        if ($start !== null || $length !== null)
 
649
        {
 
650
                $str = utf8_substr($str, $start, $length);
 
651
        }
 
652
 
 
653
        preg_match('/^[' . $mask . ']+/u', $str, $matches);
 
654
 
 
655
        if (isset($matches[0]))
 
656
        {
 
657
                return utf8_strlen($matches[0]);
 
658
        }
 
659
 
 
660
        return 0;
 
661
}
 
662
 
 
663
/**
 
664
* UTF-8 aware alternative to ucfirst
 
665
* Make a string's first character uppercase
 
666
*
 
667
* @author Harry Fuecks
 
668
* @param string
 
669
* @return string with first character as upper case (if applicable)
 
670
*/
 
671
function utf8_ucfirst($str)
 
672
{
 
673
        switch (utf8_strlen($str))
 
674
        {
 
675
                case 0:
 
676
                        return '';
 
677
                break;
 
678
 
 
679
                case 1:
 
680
                        return utf8_strtoupper($str);
 
681
                break;
 
682
 
 
683
                default:
 
684
                        preg_match('/^(.{1})(.*)$/us', $str, $matches);
 
685
                        return utf8_strtoupper($matches[1]) . $matches[2];
 
686
                break;
 
687
        }
 
688
}
 
689
 
 
690
/**
 
691
* Recode a string to UTF-8
 
692
*
 
693
* If the encoding is not supported, the string is returned as-is
 
694
*
 
695
* @param        string  $string         Original string
 
696
* @param        string  $encoding       Original encoding (lowered)
 
697
* @return       string                          The string, encoded in UTF-8
 
698
*/
 
699
function utf8_recode($string, $encoding)
 
700
{
 
701
        $encoding = strtolower($encoding);
 
702
 
 
703
        if ($encoding == 'utf-8' || !is_string($string) || empty($string))
 
704
        {
 
705
                return $string;
 
706
        }
 
707
 
 
708
        // we force iso-8859-1 to be cp1252
 
709
        if ($encoding == 'iso-8859-1')
 
710
        {
 
711
                $encoding = 'cp1252';
 
712
        }
 
713
        // convert iso-8859-8-i to iso-8859-8
 
714
        else if ($encoding == 'iso-8859-8-i')
 
715
        {
 
716
                $encoding = 'iso-8859-8';
 
717
                $string = hebrev($string);
 
718
        }
 
719
 
 
720
        // First, try iconv()
 
721
        if (function_exists('iconv'))
 
722
        {
 
723
                $ret = @iconv($encoding, 'utf-8', $string);
 
724
 
 
725
                if (!empty($ret))
 
726
                {
 
727
                        return $ret;
 
728
                }
 
729
        }
 
730
 
 
731
        // Try the mb_string extension
 
732
        if (function_exists('mb_convert_encoding'))
 
733
        {
 
734
                // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
 
735
                switch ($encoding)
 
736
                {
 
737
                        case 'iso-8859-1':
 
738
                        case 'iso-8859-2':
 
739
                        case 'iso-8859-4':
 
740
                        case 'iso-8859-7':
 
741
                        case 'iso-8859-9':
 
742
                        case 'iso-8859-15':
 
743
                        case 'windows-1251':
 
744
                        case 'windows-1252':
 
745
                        case 'cp1252':
 
746
                        case 'shift_jis':
 
747
                        case 'euc-kr':
 
748
                        case 'big5':
 
749
                        case 'gb2312':
 
750
                                $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
 
751
 
 
752
                                if (!empty($ret))
 
753
                                {
 
754
                                        return $ret;
 
755
                                }
 
756
                }
 
757
        }
 
758
 
 
759
        // Try the recode extension
 
760
        if (function_exists('recode_string'))
 
761
        {
 
762
                $ret = @recode_string($encoding . '..utf-8', $string);
 
763
 
 
764
                if (!empty($ret))
 
765
                {
 
766
                        return $ret;
 
767
                }
 
768
        }
 
769
 
 
770
        // If nothing works, check if we have a custom transcoder available
 
771
        if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
 
772
        {
 
773
                // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
 
774
                trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
 
775
        }
 
776
 
 
777
        global $phpbb_root_path, $phpEx;
 
778
 
 
779
        // iso-8859-* character encoding
 
780
        if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
 
781
        {
 
782
                switch ($array[1])
 
783
                {
 
784
                        case '1':
 
785
                        case '2':
 
786
                        case '4':
 
787
                        case '7':
 
788
                        case '8':
 
789
                        case '9':
 
790
                        case '15':
 
791
                                if (!function_exists('iso_8859_' . $array[1]))
 
792
                                {
 
793
                                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
 
794
                                        {
 
795
                                                trigger_error('Basic reencoder file is missing', E_USER_ERROR);
 
796
                                        }
 
797
                                        include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
 
798
                                }
 
799
                                return call_user_func('iso_8859_' . $array[1], $string);
 
800
                        break;
 
801
 
 
802
                        default:
 
803
                                trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
 
804
                        break;
 
805
                }
 
806
        }
 
807
 
 
808
        // CP/WIN character encoding
 
809
        if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
 
810
        {
 
811
                switch ($array[1])
 
812
                {
 
813
                        case '932':
 
814
                        break;
 
815
                        case '1250':
 
816
                        case '1251':
 
817
                        case '1252':
 
818
                        case '1254':
 
819
                        case '1255':
 
820
                        case '1256':
 
821
                        case '1257':
 
822
                        case '874':
 
823
                                if (!function_exists('cp' . $array[1]))
 
824
                                {
 
825
                                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
 
826
                                        {
 
827
                                                trigger_error('Basic reencoder file is missing', E_USER_ERROR);
 
828
                                        }
 
829
                                        include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
 
830
                                }
 
831
                                return call_user_func('cp' . $array[1], $string);
 
832
                        break;
 
833
 
 
834
                        default:
 
835
                                trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
 
836
                        break;
 
837
                }
 
838
        }
 
839
 
 
840
        // TIS-620
 
841
        if (preg_match('/tis[_ -]?620/', $encoding))
 
842
        {
 
843
                if (!function_exists('tis_620'))
 
844
                {
 
845
                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
 
846
                        {
 
847
                                trigger_error('Basic reencoder file is missing', E_USER_ERROR);
 
848
                        }
 
849
                        include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
 
850
                }
 
851
                return tis_620($string);
 
852
        }
 
853
 
 
854
        // SJIS
 
855
        if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
 
856
        {
 
857
                if (!function_exists('sjis'))
 
858
                {
 
859
                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
 
860
                        {
 
861
                                trigger_error('CJK reencoder file is missing', E_USER_ERROR);
 
862
                        }
 
863
                        include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
 
864
                }
 
865
                return sjis($string);
 
866
        }
 
867
 
 
868
        // EUC_KR
 
869
        if (preg_match('/euc[_ -]?kr/', $encoding))
 
870
        {
 
871
                if (!function_exists('euc_kr'))
 
872
                {
 
873
                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
 
874
                        {
 
875
                                trigger_error('CJK reencoder file is missing', E_USER_ERROR);
 
876
                        }
 
877
                        include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
 
878
                }
 
879
                return euc_kr($string);
 
880
        }
 
881
 
 
882
        // BIG-5
 
883
        if (preg_match('/big[_ -]?5/', $encoding))
 
884
        {
 
885
                if (!function_exists('big5'))
 
886
                {
 
887
                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
 
888
                        {
 
889
                                trigger_error('CJK reencoder file is missing', E_USER_ERROR);
 
890
                        }
 
891
                        include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
 
892
                }
 
893
                return big5($string);
 
894
        }
 
895
 
 
896
        // GB2312
 
897
        if (preg_match('/gb[_ -]?2312/', $encoding))
 
898
        {
 
899
                if (!function_exists('gb2312'))
 
900
                {
 
901
                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
 
902
                        {
 
903
                                trigger_error('CJK reencoder file is missing', E_USER_ERROR);
 
904
                        }
 
905
                        include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
 
906
                }
 
907
                return gb2312($string);
 
908
        }
 
909
 
 
910
        // Trigger an error?! Fow now just give bad data :-(
 
911
        trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
 
912
        //return $string; // use utf_normalizer::cleanup() ?
 
913
}
 
914
 
 
915
/**
 
916
* Replace all UTF-8 chars that are not in ASCII with their NCR
 
917
*
 
918
* @param        string  $text           UTF-8 string in NFC
 
919
* @return       string                          ASCII string using NCRs for non-ASCII chars
 
920
*/
 
921
function utf8_encode_ncr($text)
 
922
{
 
923
        return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
 
924
}
 
925
 
 
926
/**
 
927
* Callback used in encode_ncr()
 
928
*
 
929
* Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
 
930
*
 
931
* @param        array   $m                      0-based numerically indexed array passed by preg_replace_callback()
 
932
* @return       string                          A HTML NCR if the character is valid, or the original string otherwise
 
933
*/
 
934
function utf8_encode_ncr_callback($m)
 
935
{
 
936
        return '&#' . utf8_ord($m[0]) . ';';
 
937
}
 
938
 
 
939
/**
 
940
* Converts a UTF-8 char to an NCR
 
941
*
 
942
* @param string $chr UTF-8 char
 
943
* @return integer UNICODE code point
 
944
*/
 
945
function utf8_ord($chr)
 
946
{
 
947
        switch (strlen($chr))
 
948
        {
 
949
                case 1:
 
950
                        return ord($chr);
 
951
                break;
 
952
 
 
953
                case 2:
 
954
                        return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
 
955
                break;
 
956
 
 
957
                case 3:
 
958
                        return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
 
959
                break;
 
960
 
 
961
                case 4:
 
962
                        return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
 
963
                break;
 
964
 
 
965
                default:
 
966
                        return $chr;
 
967
        }
 
968
}
 
969
 
 
970
/**
 
971
* Converts an NCR to a UTF-8 char
 
972
*
 
973
* @param        int             $cp     UNICODE code point
 
974
* @return       string          UTF-8 char
 
975
*/
 
976
function utf8_chr($cp)
 
977
{
 
978
        if ($cp > 0xFFFF)
 
979
        {
 
980
                return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 
981
        }
 
982
        else if ($cp > 0x7FF)
 
983
        {
 
984
                return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 
985
        }
 
986
        else if ($cp > 0x7F)
 
987
        {
 
988
                return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
 
989
        }
 
990
        else
 
991
        {
 
992
                return chr($cp);
 
993
        }
 
994
}
 
995
 
 
996
/**
 
997
* Convert Numeric Character References to UTF-8 chars
 
998
*
 
999
* Notes:
 
1000
*       - we do not convert NCRs recursively, if you pass &#38;#38; it will return &#38;
 
1001
*       - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
 
1002
*
 
1003
* @param        string  $text           String to convert, encoded in UTF-8 (no normal form required)
 
1004
* @return       string                          UTF-8 string where NCRs have been replaced with the actual chars
 
1005
*/
 
1006
function utf8_decode_ncr($text)
 
1007
{
 
1008
        return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
 
1009
}
 
1010
 
 
1011
/**
 
1012
* Callback used in decode_ncr()
 
1013
*
 
1014
* Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
 
1015
* It will ignore most of invalid NCRs, but not all!
 
1016
*
 
1017
* @param        array   $m                      0-based numerically indexed array passed by preg_replace_callback()
 
1018
* @return       string                          UTF-8 char
 
1019
*/
 
1020
function utf8_decode_ncr_callback($m)
 
1021
{
 
1022
        $cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
 
1023
 
 
1024
        return utf8_chr($cp);
 
1025
}
 
1026
 
 
1027
/**
 
1028
* Case folds a unicode string as per Unicode 5.0, section 3.13
 
1029
*
 
1030
* @param        string  $text   text to be case folded
 
1031
* @param        string  $option determines how we will fold the cases
 
1032
* @return       string                  case folded text
 
1033
*/
 
1034
function utf8_case_fold($text, $option = 'full')
 
1035
{
 
1036
        static $uniarray = array();
 
1037
        global $phpbb_root_path, $phpEx;
 
1038
 
 
1039
        // common is always set
 
1040
        if (!isset($uniarray['c']))
 
1041
        {
 
1042
                $uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
 
1043
        }
 
1044
 
 
1045
        // only set full if we need to
 
1046
        if ($option === 'full' && !isset($uniarray['f']))
 
1047
        {
 
1048
                $uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
 
1049
        }
 
1050
 
 
1051
        // only set simple if we need to
 
1052
        if ($option !== 'full' && !isset($uniarray['s']))
 
1053
        {
 
1054
                $uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
 
1055
        }
 
1056
 
 
1057
        // common is always replaced
 
1058
        $text = strtr($text, $uniarray['c']);
 
1059
 
 
1060
        if ($option === 'full')
 
1061
        {
 
1062
                // full replaces a character with multiple characters
 
1063
                $text = strtr($text, $uniarray['f']);
 
1064
        }
 
1065
        else
 
1066
        {
 
1067
                // simple replaces a character with another character
 
1068
                $text = strtr($text, $uniarray['s']);
 
1069
        }
 
1070
 
 
1071
        return $text;
 
1072
}
 
1073
 
 
1074
/**
 
1075
* Takes the input and does a "special" case fold. It does minor normalization
 
1076
* and returns NFKC compatable text
 
1077
*
 
1078
* @param        string  $text   text to be case folded
 
1079
* @param        string  $option determines how we will fold the cases
 
1080
* @return       string                  case folded text
 
1081
*/
 
1082
function utf8_case_fold_nfkc($text, $option = 'full')
 
1083
{
 
1084
        static $fc_nfkc_closure = array(
 
1085
                "\xCD\xBA"      => "\x20\xCE\xB9",
 
1086
                "\xCF\x92"      => "\xCF\x85",
 
1087
                "\xCF\x93"      => "\xCF\x8D",
 
1088
                "\xCF\x94"      => "\xCF\x8B",
 
1089
                "\xCF\xB2"      => "\xCF\x83",
 
1090
                "\xCF\xB9"      => "\xCF\x83",
 
1091
                "\xE1\xB4\xAC"  => "\x61",
 
1092
                "\xE1\xB4\xAD"  => "\xC3\xA6",
 
1093
                "\xE1\xB4\xAE"  => "\x62",
 
1094
                "\xE1\xB4\xB0"  => "\x64",
 
1095
                "\xE1\xB4\xB1"  => "\x65",
 
1096
                "\xE1\xB4\xB2"  => "\xC7\x9D",
 
1097
                "\xE1\xB4\xB3"  => "\x67",
 
1098
                "\xE1\xB4\xB4"  => "\x68",
 
1099
                "\xE1\xB4\xB5"  => "\x69",
 
1100
                "\xE1\xB4\xB6"  => "\x6A",
 
1101
                "\xE1\xB4\xB7"  => "\x6B",
 
1102
                "\xE1\xB4\xB8"  => "\x6C",
 
1103
                "\xE1\xB4\xB9"  => "\x6D",
 
1104
                "\xE1\xB4\xBA"  => "\x6E",
 
1105
                "\xE1\xB4\xBC"  => "\x6F",
 
1106
                "\xE1\xB4\xBD"  => "\xC8\xA3",
 
1107
                "\xE1\xB4\xBE"  => "\x70",
 
1108
                "\xE1\xB4\xBF"  => "\x72",
 
1109
                "\xE1\xB5\x80"  => "\x74",
 
1110
                "\xE1\xB5\x81"  => "\x75",
 
1111
                "\xE1\xB5\x82"  => "\x77",
 
1112
                "\xE2\x82\xA8"  => "\x72\x73",
 
1113
                "\xE2\x84\x82"  => "\x63",
 
1114
                "\xE2\x84\x83"  => "\xC2\xB0\x63",
 
1115
                "\xE2\x84\x87"  => "\xC9\x9B",
 
1116
                "\xE2\x84\x89"  => "\xC2\xB0\x66",
 
1117
                "\xE2\x84\x8B"  => "\x68",
 
1118
                "\xE2\x84\x8C"  => "\x68",
 
1119
                "\xE2\x84\x8D"  => "\x68",
 
1120
                "\xE2\x84\x90"  => "\x69",
 
1121
                "\xE2\x84\x91"  => "\x69",
 
1122
                "\xE2\x84\x92"  => "\x6C",
 
1123
                "\xE2\x84\x95"  => "\x6E",
 
1124
                "\xE2\x84\x96"  => "\x6E\x6F",
 
1125
                "\xE2\x84\x99"  => "\x70",
 
1126
                "\xE2\x84\x9A"  => "\x71",
 
1127
                "\xE2\x84\x9B"  => "\x72",
 
1128
                "\xE2\x84\x9C"  => "\x72",
 
1129
                "\xE2\x84\x9D"  => "\x72",
 
1130
                "\xE2\x84\xA0"  => "\x73\x6D",
 
1131
                "\xE2\x84\xA1"  => "\x74\x65\x6C",
 
1132
                "\xE2\x84\xA2"  => "\x74\x6D",
 
1133
                "\xE2\x84\xA4"  => "\x7A",
 
1134
                "\xE2\x84\xA8"  => "\x7A",
 
1135
                "\xE2\x84\xAC"  => "\x62",
 
1136
                "\xE2\x84\xAD"  => "\x63",
 
1137
                "\xE2\x84\xB0"  => "\x65",
 
1138
                "\xE2\x84\xB1"  => "\x66",
 
1139
                "\xE2\x84\xB3"  => "\x6D",
 
1140
                "\xE2\x84\xBB"  => "\x66\x61\x78",
 
1141
                "\xE2\x84\xBE"  => "\xCE\xB3",
 
1142
                "\xE2\x84\xBF"  => "\xCF\x80",
 
1143
                "\xE2\x85\x85"  => "\x64",
 
1144
                "\xE3\x89\x90"  => "\x70\x74\x65",
 
1145
                "\xE3\x8B\x8C"  => "\x68\x67",
 
1146
                "\xE3\x8B\x8E"  => "\x65\x76",
 
1147
                "\xE3\x8B\x8F"  => "\x6C\x74\x64",
 
1148
                "\xE3\x8D\xB1"  => "\x68\x70\x61",
 
1149
                "\xE3\x8D\xB3"  => "\x61\x75",
 
1150
                "\xE3\x8D\xB5"  => "\x6F\x76",
 
1151
                "\xE3\x8D\xBA"  => "\x69\x75",
 
1152
                "\xE3\x8E\x80"  => "\x70\x61",
 
1153
                "\xE3\x8E\x81"  => "\x6E\x61",
 
1154
                "\xE3\x8E\x82"  => "\xCE\xBC\x61",
 
1155
                "\xE3\x8E\x83"  => "\x6D\x61",
 
1156
                "\xE3\x8E\x84"  => "\x6B\x61",
 
1157
                "\xE3\x8E\x85"  => "\x6B\x62",
 
1158
                "\xE3\x8E\x86"  => "\x6D\x62",
 
1159
                "\xE3\x8E\x87"  => "\x67\x62",
 
1160
                "\xE3\x8E\x8A"  => "\x70\x66",
 
1161
                "\xE3\x8E\x8B"  => "\x6E\x66",
 
1162
                "\xE3\x8E\x8C"  => "\xCE\xBC\x66",
 
1163
                "\xE3\x8E\x90"  => "\x68\x7A",
 
1164
                "\xE3\x8E\x91"  => "\x6B\x68\x7A",
 
1165
                "\xE3\x8E\x92"  => "\x6D\x68\x7A",
 
1166
                "\xE3\x8E\x93"  => "\x67\x68\x7A",
 
1167
                "\xE3\x8E\x94"  => "\x74\x68\x7A",
 
1168
                "\xE3\x8E\xA9"  => "\x70\x61",
 
1169
                "\xE3\x8E\xAA"  => "\x6B\x70\x61",
 
1170
                "\xE3\x8E\xAB"  => "\x6D\x70\x61",
 
1171
                "\xE3\x8E\xAC"  => "\x67\x70\x61",
 
1172
                "\xE3\x8E\xB4"  => "\x70\x76",
 
1173
                "\xE3\x8E\xB5"  => "\x6E\x76",
 
1174
                "\xE3\x8E\xB6"  => "\xCE\xBC\x76",
 
1175
                "\xE3\x8E\xB7"  => "\x6D\x76",
 
1176
                "\xE3\x8E\xB8"  => "\x6B\x76",
 
1177
                "\xE3\x8E\xB9"  => "\x6D\x76",
 
1178
                "\xE3\x8E\xBA"  => "\x70\x77",
 
1179
                "\xE3\x8E\xBB"  => "\x6E\x77",
 
1180
                "\xE3\x8E\xBC"  => "\xCE\xBC\x77",
 
1181
                "\xE3\x8E\xBD"  => "\x6D\x77",
 
1182
                "\xE3\x8E\xBE"  => "\x6B\x77",
 
1183
                "\xE3\x8E\xBF"  => "\x6D\x77",
 
1184
                "\xE3\x8F\x80"  => "\x6B\xCF\x89",
 
1185
                "\xE3\x8F\x81"  => "\x6D\xCF\x89",
 
1186
                "\xE3\x8F\x83"  => "\x62\x71",
 
1187
                "\xE3\x8F\x86"  => "\x63\xE2\x88\x95\x6B\x67",
 
1188
                "\xE3\x8F\x87"  => "\x63\x6F\x2E",
 
1189
                "\xE3\x8F\x88"  => "\x64\x62",
 
1190
                "\xE3\x8F\x89"  => "\x67\x79",
 
1191
                "\xE3\x8F\x8B"  => "\x68\x70",
 
1192
                "\xE3\x8F\x8D"  => "\x6B\x6B",
 
1193
                "\xE3\x8F\x8E"  => "\x6B\x6D",
 
1194
                "\xE3\x8F\x97"  => "\x70\x68",
 
1195
                "\xE3\x8F\x99"  => "\x70\x70\x6D",
 
1196
                "\xE3\x8F\x9A"  => "\x70\x72",
 
1197
                "\xE3\x8F\x9C"  => "\x73\x76",
 
1198
                "\xE3\x8F\x9D"  => "\x77\x62",
 
1199
                "\xE3\x8F\x9E"  => "\x76\xE2\x88\x95\x6D",
 
1200
                "\xE3\x8F\x9F"  => "\x61\xE2\x88\x95\x6D",
 
1201
                "\xF0\x9D\x90\x80"      => "\x61",
 
1202
                "\xF0\x9D\x90\x81"      => "\x62",
 
1203
                "\xF0\x9D\x90\x82"      => "\x63",
 
1204
                "\xF0\x9D\x90\x83"      => "\x64",
 
1205
                "\xF0\x9D\x90\x84"      => "\x65",
 
1206
                "\xF0\x9D\x90\x85"      => "\x66",
 
1207
                "\xF0\x9D\x90\x86"      => "\x67",
 
1208
                "\xF0\x9D\x90\x87"      => "\x68",
 
1209
                "\xF0\x9D\x90\x88"      => "\x69",
 
1210
                "\xF0\x9D\x90\x89"      => "\x6A",
 
1211
                "\xF0\x9D\x90\x8A"      => "\x6B",
 
1212
                "\xF0\x9D\x90\x8B"      => "\x6C",
 
1213
                "\xF0\x9D\x90\x8C"      => "\x6D",
 
1214
                "\xF0\x9D\x90\x8D"      => "\x6E",
 
1215
                "\xF0\x9D\x90\x8E"      => "\x6F",
 
1216
                "\xF0\x9D\x90\x8F"      => "\x70",
 
1217
                "\xF0\x9D\x90\x90"      => "\x71",
 
1218
                "\xF0\x9D\x90\x91"      => "\x72",
 
1219
                "\xF0\x9D\x90\x92"      => "\x73",
 
1220
                "\xF0\x9D\x90\x93"      => "\x74",
 
1221
                "\xF0\x9D\x90\x94"      => "\x75",
 
1222
                "\xF0\x9D\x90\x95"      => "\x76",
 
1223
                "\xF0\x9D\x90\x96"      => "\x77",
 
1224
                "\xF0\x9D\x90\x97"      => "\x78",
 
1225
                "\xF0\x9D\x90\x98"      => "\x79",
 
1226
                "\xF0\x9D\x90\x99"      => "\x7A",
 
1227
                "\xF0\x9D\x90\xB4"      => "\x61",
 
1228
                "\xF0\x9D\x90\xB5"      => "\x62",
 
1229
                "\xF0\x9D\x90\xB6"      => "\x63",
 
1230
                "\xF0\x9D\x90\xB7"      => "\x64",
 
1231
                "\xF0\x9D\x90\xB8"      => "\x65",
 
1232
                "\xF0\x9D\x90\xB9"      => "\x66",
 
1233
                "\xF0\x9D\x90\xBA"      => "\x67",
 
1234
                "\xF0\x9D\x90\xBB"      => "\x68",
 
1235
                "\xF0\x9D\x90\xBC"      => "\x69",
 
1236
                "\xF0\x9D\x90\xBD"      => "\x6A",
 
1237
                "\xF0\x9D\x90\xBE"      => "\x6B",
 
1238
                "\xF0\x9D\x90\xBF"      => "\x6C",
 
1239
                "\xF0\x9D\x91\x80"      => "\x6D",
 
1240
                "\xF0\x9D\x91\x81"      => "\x6E",
 
1241
                "\xF0\x9D\x91\x82"      => "\x6F",
 
1242
                "\xF0\x9D\x91\x83"      => "\x70",
 
1243
                "\xF0\x9D\x91\x84"      => "\x71",
 
1244
                "\xF0\x9D\x91\x85"      => "\x72",
 
1245
                "\xF0\x9D\x91\x86"      => "\x73",
 
1246
                "\xF0\x9D\x91\x87"      => "\x74",
 
1247
                "\xF0\x9D\x91\x88"      => "\x75",
 
1248
                "\xF0\x9D\x91\x89"      => "\x76",
 
1249
                "\xF0\x9D\x91\x8A"      => "\x77",
 
1250
                "\xF0\x9D\x91\x8B"      => "\x78",
 
1251
                "\xF0\x9D\x91\x8C"      => "\x79",
 
1252
                "\xF0\x9D\x91\x8D"      => "\x7A",
 
1253
                "\xF0\x9D\x91\xA8"      => "\x61",
 
1254
                "\xF0\x9D\x91\xA9"      => "\x62",
 
1255
                "\xF0\x9D\x91\xAA"      => "\x63",
 
1256
                "\xF0\x9D\x91\xAB"      => "\x64",
 
1257
                "\xF0\x9D\x91\xAC"      => "\x65",
 
1258
                "\xF0\x9D\x91\xAD"      => "\x66",
 
1259
                "\xF0\x9D\x91\xAE"      => "\x67",
 
1260
                "\xF0\x9D\x91\xAF"      => "\x68",
 
1261
                "\xF0\x9D\x91\xB0"      => "\x69",
 
1262
                "\xF0\x9D\x91\xB1"      => "\x6A",
 
1263
                "\xF0\x9D\x91\xB2"      => "\x6B",
 
1264
                "\xF0\x9D\x91\xB3"      => "\x6C",
 
1265
                "\xF0\x9D\x91\xB4"      => "\x6D",
 
1266
                "\xF0\x9D\x91\xB5"      => "\x6E",
 
1267
                "\xF0\x9D\x91\xB6"      => "\x6F",
 
1268
                "\xF0\x9D\x91\xB7"      => "\x70",
 
1269
                "\xF0\x9D\x91\xB8"      => "\x71",
 
1270
                "\xF0\x9D\x91\xB9"      => "\x72",
 
1271
                "\xF0\x9D\x91\xBA"      => "\x73",
 
1272
                "\xF0\x9D\x91\xBB"      => "\x74",
 
1273
                "\xF0\x9D\x91\xBC"      => "\x75",
 
1274
                "\xF0\x9D\x91\xBD"      => "\x76",
 
1275
                "\xF0\x9D\x91\xBE"      => "\x77",
 
1276
                "\xF0\x9D\x91\xBF"      => "\x78",
 
1277
                "\xF0\x9D\x92\x80"      => "\x79",
 
1278
                "\xF0\x9D\x92\x81"      => "\x7A",
 
1279
                "\xF0\x9D\x92\x9C"      => "\x61",
 
1280
                "\xF0\x9D\x92\x9E"      => "\x63",
 
1281
                "\xF0\x9D\x92\x9F"      => "\x64",
 
1282
                "\xF0\x9D\x92\xA2"      => "\x67",
 
1283
                "\xF0\x9D\x92\xA5"      => "\x6A",
 
1284
                "\xF0\x9D\x92\xA6"      => "\x6B",
 
1285
                "\xF0\x9D\x92\xA9"      => "\x6E",
 
1286
                "\xF0\x9D\x92\xAA"      => "\x6F",
 
1287
                "\xF0\x9D\x92\xAB"      => "\x70",
 
1288
                "\xF0\x9D\x92\xAC"      => "\x71",
 
1289
                "\xF0\x9D\x92\xAE"      => "\x73",
 
1290
                "\xF0\x9D\x92\xAF"      => "\x74",
 
1291
                "\xF0\x9D\x92\xB0"      => "\x75",
 
1292
                "\xF0\x9D\x92\xB1"      => "\x76",
 
1293
                "\xF0\x9D\x92\xB2"      => "\x77",
 
1294
                "\xF0\x9D\x92\xB3"      => "\x78",
 
1295
                "\xF0\x9D\x92\xB4"      => "\x79",
 
1296
                "\xF0\x9D\x92\xB5"      => "\x7A",
 
1297
                "\xF0\x9D\x93\x90"      => "\x61",
 
1298
                "\xF0\x9D\x93\x91"      => "\x62",
 
1299
                "\xF0\x9D\x93\x92"      => "\x63",
 
1300
                "\xF0\x9D\x93\x93"      => "\x64",
 
1301
                "\xF0\x9D\x93\x94"      => "\x65",
 
1302
                "\xF0\x9D\x93\x95"      => "\x66",
 
1303
                "\xF0\x9D\x93\x96"      => "\x67",
 
1304
                "\xF0\x9D\x93\x97"      => "\x68",
 
1305
                "\xF0\x9D\x93\x98"      => "\x69",
 
1306
                "\xF0\x9D\x93\x99"      => "\x6A",
 
1307
                "\xF0\x9D\x93\x9A"      => "\x6B",
 
1308
                "\xF0\x9D\x93\x9B"      => "\x6C",
 
1309
                "\xF0\x9D\x93\x9C"      => "\x6D",
 
1310
                "\xF0\x9D\x93\x9D"      => "\x6E",
 
1311
                "\xF0\x9D\x93\x9E"      => "\x6F",
 
1312
                "\xF0\x9D\x93\x9F"      => "\x70",
 
1313
                "\xF0\x9D\x93\xA0"      => "\x71",
 
1314
                "\xF0\x9D\x93\xA1"      => "\x72",
 
1315
                "\xF0\x9D\x93\xA2"      => "\x73",
 
1316
                "\xF0\x9D\x93\xA3"      => "\x74",
 
1317
                "\xF0\x9D\x93\xA4"      => "\x75",
 
1318
                "\xF0\x9D\x93\xA5"      => "\x76",
 
1319
                "\xF0\x9D\x93\xA6"      => "\x77",
 
1320
                "\xF0\x9D\x93\xA7"      => "\x78",
 
1321
                "\xF0\x9D\x93\xA8"      => "\x79",
 
1322
                "\xF0\x9D\x93\xA9"      => "\x7A",
 
1323
                "\xF0\x9D\x94\x84"      => "\x61",
 
1324
                "\xF0\x9D\x94\x85"      => "\x62",
 
1325
                "\xF0\x9D\x94\x87"      => "\x64",
 
1326
                "\xF0\x9D\x94\x88"      => "\x65",
 
1327
                "\xF0\x9D\x94\x89"      => "\x66",
 
1328
                "\xF0\x9D\x94\x8A"      => "\x67",
 
1329
                "\xF0\x9D\x94\x8D"      => "\x6A",
 
1330
                "\xF0\x9D\x94\x8E"      => "\x6B",
 
1331
                "\xF0\x9D\x94\x8F"      => "\x6C",
 
1332
                "\xF0\x9D\x94\x90"      => "\x6D",
 
1333
                "\xF0\x9D\x94\x91"      => "\x6E",
 
1334
                "\xF0\x9D\x94\x92"      => "\x6F",
 
1335
                "\xF0\x9D\x94\x93"      => "\x70",
 
1336
                "\xF0\x9D\x94\x94"      => "\x71",
 
1337
                "\xF0\x9D\x94\x96"      => "\x73",
 
1338
                "\xF0\x9D\x94\x97"      => "\x74",
 
1339
                "\xF0\x9D\x94\x98"      => "\x75",
 
1340
                "\xF0\x9D\x94\x99"      => "\x76",
 
1341
                "\xF0\x9D\x94\x9A"      => "\x77",
 
1342
                "\xF0\x9D\x94\x9B"      => "\x78",
 
1343
                "\xF0\x9D\x94\x9C"      => "\x79",
 
1344
                "\xF0\x9D\x94\xB8"      => "\x61",
 
1345
                "\xF0\x9D\x94\xB9"      => "\x62",
 
1346
                "\xF0\x9D\x94\xBB"      => "\x64",
 
1347
                "\xF0\x9D\x94\xBC"      => "\x65",
 
1348
                "\xF0\x9D\x94\xBD"      => "\x66",
 
1349
                "\xF0\x9D\x94\xBE"      => "\x67",
 
1350
                "\xF0\x9D\x95\x80"      => "\x69",
 
1351
                "\xF0\x9D\x95\x81"      => "\x6A",
 
1352
                "\xF0\x9D\x95\x82"      => "\x6B",
 
1353
                "\xF0\x9D\x95\x83"      => "\x6C",
 
1354
                "\xF0\x9D\x95\x84"      => "\x6D",
 
1355
                "\xF0\x9D\x95\x86"      => "\x6F",
 
1356
                "\xF0\x9D\x95\x8A"      => "\x73",
 
1357
                "\xF0\x9D\x95\x8B"      => "\x74",
 
1358
                "\xF0\x9D\x95\x8C"      => "\x75",
 
1359
                "\xF0\x9D\x95\x8D"      => "\x76",
 
1360
                "\xF0\x9D\x95\x8E"      => "\x77",
 
1361
                "\xF0\x9D\x95\x8F"      => "\x78",
 
1362
                "\xF0\x9D\x95\x90"      => "\x79",
 
1363
                "\xF0\x9D\x95\xAC"      => "\x61",
 
1364
                "\xF0\x9D\x95\xAD"      => "\x62",
 
1365
                "\xF0\x9D\x95\xAE"      => "\x63",
 
1366
                "\xF0\x9D\x95\xAF"      => "\x64",
 
1367
                "\xF0\x9D\x95\xB0"      => "\x65",
 
1368
                "\xF0\x9D\x95\xB1"      => "\x66",
 
1369
                "\xF0\x9D\x95\xB2"      => "\x67",
 
1370
                "\xF0\x9D\x95\xB3"      => "\x68",
 
1371
                "\xF0\x9D\x95\xB4"      => "\x69",
 
1372
                "\xF0\x9D\x95\xB5"      => "\x6A",
 
1373
                "\xF0\x9D\x95\xB6"      => "\x6B",
 
1374
                "\xF0\x9D\x95\xB7"      => "\x6C",
 
1375
                "\xF0\x9D\x95\xB8"      => "\x6D",
 
1376
                "\xF0\x9D\x95\xB9"      => "\x6E",
 
1377
                "\xF0\x9D\x95\xBA"      => "\x6F",
 
1378
                "\xF0\x9D\x95\xBB"      => "\x70",
 
1379
                "\xF0\x9D\x95\xBC"      => "\x71",
 
1380
                "\xF0\x9D\x95\xBD"      => "\x72",
 
1381
                "\xF0\x9D\x95\xBE"      => "\x73",
 
1382
                "\xF0\x9D\x95\xBF"      => "\x74",
 
1383
                "\xF0\x9D\x96\x80"      => "\x75",
 
1384
                "\xF0\x9D\x96\x81"      => "\x76",
 
1385
                "\xF0\x9D\x96\x82"      => "\x77",
 
1386
                "\xF0\x9D\x96\x83"      => "\x78",
 
1387
                "\xF0\x9D\x96\x84"      => "\x79",
 
1388
                "\xF0\x9D\x96\x85"      => "\x7A",
 
1389
                "\xF0\x9D\x96\xA0"      => "\x61",
 
1390
                "\xF0\x9D\x96\xA1"      => "\x62",
 
1391
                "\xF0\x9D\x96\xA2"      => "\x63",
 
1392
                "\xF0\x9D\x96\xA3"      => "\x64",
 
1393
                "\xF0\x9D\x96\xA4"      => "\x65",
 
1394
                "\xF0\x9D\x96\xA5"      => "\x66",
 
1395
                "\xF0\x9D\x96\xA6"      => "\x67",
 
1396
                "\xF0\x9D\x96\xA7"      => "\x68",
 
1397
                "\xF0\x9D\x96\xA8"      => "\x69",
 
1398
                "\xF0\x9D\x96\xA9"      => "\x6A",
 
1399
                "\xF0\x9D\x96\xAA"      => "\x6B",
 
1400
                "\xF0\x9D\x96\xAB"      => "\x6C",
 
1401
                "\xF0\x9D\x96\xAC"      => "\x6D",
 
1402
                "\xF0\x9D\x96\xAD"      => "\x6E",
 
1403
                "\xF0\x9D\x96\xAE"      => "\x6F",
 
1404
                "\xF0\x9D\x96\xAF"      => "\x70",
 
1405
                "\xF0\x9D\x96\xB0"      => "\x71",
 
1406
                "\xF0\x9D\x96\xB1"      => "\x72",
 
1407
                "\xF0\x9D\x96\xB2"      => "\x73",
 
1408
                "\xF0\x9D\x96\xB3"      => "\x74",
 
1409
                "\xF0\x9D\x96\xB4"      => "\x75",
 
1410
                "\xF0\x9D\x96\xB5"      => "\x76",
 
1411
                "\xF0\x9D\x96\xB6"      => "\x77",
 
1412
                "\xF0\x9D\x96\xB7"      => "\x78",
 
1413
                "\xF0\x9D\x96\xB8"      => "\x79",
 
1414
                "\xF0\x9D\x96\xB9"      => "\x7A",
 
1415
                "\xF0\x9D\x97\x94"      => "\x61",
 
1416
                "\xF0\x9D\x97\x95"      => "\x62",
 
1417
                "\xF0\x9D\x97\x96"      => "\x63",
 
1418
                "\xF0\x9D\x97\x97"      => "\x64",
 
1419
                "\xF0\x9D\x97\x98"      => "\x65",
 
1420
                "\xF0\x9D\x97\x99"      => "\x66",
 
1421
                "\xF0\x9D\x97\x9A"      => "\x67",
 
1422
                "\xF0\x9D\x97\x9B"      => "\x68",
 
1423
                "\xF0\x9D\x97\x9C"      => "\x69",
 
1424
                "\xF0\x9D\x97\x9D"      => "\x6A",
 
1425
                "\xF0\x9D\x97\x9E"      => "\x6B",
 
1426
                "\xF0\x9D\x97\x9F"      => "\x6C",
 
1427
                "\xF0\x9D\x97\xA0"      => "\x6D",
 
1428
                "\xF0\x9D\x97\xA1"      => "\x6E",
 
1429
                "\xF0\x9D\x97\xA2"      => "\x6F",
 
1430
                "\xF0\x9D\x97\xA3"      => "\x70",
 
1431
                "\xF0\x9D\x97\xA4"      => "\x71",
 
1432
                "\xF0\x9D\x97\xA5"      => "\x72",
 
1433
                "\xF0\x9D\x97\xA6"      => "\x73",
 
1434
                "\xF0\x9D\x97\xA7"      => "\x74",
 
1435
                "\xF0\x9D\x97\xA8"      => "\x75",
 
1436
                "\xF0\x9D\x97\xA9"      => "\x76",
 
1437
                "\xF0\x9D\x97\xAA"      => "\x77",
 
1438
                "\xF0\x9D\x97\xAB"      => "\x78",
 
1439
                "\xF0\x9D\x97\xAC"      => "\x79",
 
1440
                "\xF0\x9D\x97\xAD"      => "\x7A",
 
1441
                "\xF0\x9D\x98\x88"      => "\x61",
 
1442
                "\xF0\x9D\x98\x89"      => "\x62",
 
1443
                "\xF0\x9D\x98\x8A"      => "\x63",
 
1444
                "\xF0\x9D\x98\x8B"      => "\x64",
 
1445
                "\xF0\x9D\x98\x8C"      => "\x65",
 
1446
                "\xF0\x9D\x98\x8D"      => "\x66",
 
1447
                "\xF0\x9D\x98\x8E"      => "\x67",
 
1448
                "\xF0\x9D\x98\x8F"      => "\x68",
 
1449
                "\xF0\x9D\x98\x90"      => "\x69",
 
1450
                "\xF0\x9D\x98\x91"      => "\x6A",
 
1451
                "\xF0\x9D\x98\x92"      => "\x6B",
 
1452
                "\xF0\x9D\x98\x93"      => "\x6C",
 
1453
                "\xF0\x9D\x98\x94"      => "\x6D",
 
1454
                "\xF0\x9D\x98\x95"      => "\x6E",
 
1455
                "\xF0\x9D\x98\x96"      => "\x6F",
 
1456
                "\xF0\x9D\x98\x97"      => "\x70",
 
1457
                "\xF0\x9D\x98\x98"      => "\x71",
 
1458
                "\xF0\x9D\x98\x99"      => "\x72",
 
1459
                "\xF0\x9D\x98\x9A"      => "\x73",
 
1460
                "\xF0\x9D\x98\x9B"      => "\x74",
 
1461
                "\xF0\x9D\x98\x9C"      => "\x75",
 
1462
                "\xF0\x9D\x98\x9D"      => "\x76",
 
1463
                "\xF0\x9D\x98\x9E"      => "\x77",
 
1464
                "\xF0\x9D\x98\x9F"      => "\x78",
 
1465
                "\xF0\x9D\x98\xA0"      => "\x79",
 
1466
                "\xF0\x9D\x98\xA1"      => "\x7A",
 
1467
                "\xF0\x9D\x98\xBC"      => "\x61",
 
1468
                "\xF0\x9D\x98\xBD"      => "\x62",
 
1469
                "\xF0\x9D\x98\xBE"      => "\x63",
 
1470
                "\xF0\x9D\x98\xBF"      => "\x64",
 
1471
                "\xF0\x9D\x99\x80"      => "\x65",
 
1472
                "\xF0\x9D\x99\x81"      => "\x66",
 
1473
                "\xF0\x9D\x99\x82"      => "\x67",
 
1474
                "\xF0\x9D\x99\x83"      => "\x68",
 
1475
                "\xF0\x9D\x99\x84"      => "\x69",
 
1476
                "\xF0\x9D\x99\x85"      => "\x6A",
 
1477
                "\xF0\x9D\x99\x86"      => "\x6B",
 
1478
                "\xF0\x9D\x99\x87"      => "\x6C",
 
1479
                "\xF0\x9D\x99\x88"      => "\x6D",
 
1480
                "\xF0\x9D\x99\x89"      => "\x6E",
 
1481
                "\xF0\x9D\x99\x8A"      => "\x6F",
 
1482
                "\xF0\x9D\x99\x8B"      => "\x70",
 
1483
                "\xF0\x9D\x99\x8C"      => "\x71",
 
1484
                "\xF0\x9D\x99\x8D"      => "\x72",
 
1485
                "\xF0\x9D\x99\x8E"      => "\x73",
 
1486
                "\xF0\x9D\x99\x8F"      => "\x74",
 
1487
                "\xF0\x9D\x99\x90"      => "\x75",
 
1488
                "\xF0\x9D\x99\x91"      => "\x76",
 
1489
                "\xF0\x9D\x99\x92"      => "\x77",
 
1490
                "\xF0\x9D\x99\x93"      => "\x78",
 
1491
                "\xF0\x9D\x99\x94"      => "\x79",
 
1492
                "\xF0\x9D\x99\x95"      => "\x7A",
 
1493
                "\xF0\x9D\x99\xB0"      => "\x61",
 
1494
                "\xF0\x9D\x99\xB1"      => "\x62",
 
1495
                "\xF0\x9D\x99\xB2"      => "\x63",
 
1496
                "\xF0\x9D\x99\xB3"      => "\x64",
 
1497
                "\xF0\x9D\x99\xB4"      => "\x65",
 
1498
                "\xF0\x9D\x99\xB5"      => "\x66",
 
1499
                "\xF0\x9D\x99\xB6"      => "\x67",
 
1500
                "\xF0\x9D\x99\xB7"      => "\x68",
 
1501
                "\xF0\x9D\x99\xB8"      => "\x69",
 
1502
                "\xF0\x9D\x99\xB9"      => "\x6A",
 
1503
                "\xF0\x9D\x99\xBA"      => "\x6B",
 
1504
                "\xF0\x9D\x99\xBB"      => "\x6C",
 
1505
                "\xF0\x9D\x99\xBC"      => "\x6D",
 
1506
                "\xF0\x9D\x99\xBD"      => "\x6E",
 
1507
                "\xF0\x9D\x99\xBE"      => "\x6F",
 
1508
                "\xF0\x9D\x99\xBF"      => "\x70",
 
1509
                "\xF0\x9D\x9A\x80"      => "\x71",
 
1510
                "\xF0\x9D\x9A\x81"      => "\x72",
 
1511
                "\xF0\x9D\x9A\x82"      => "\x73",
 
1512
                "\xF0\x9D\x9A\x83"      => "\x74",
 
1513
                "\xF0\x9D\x9A\x84"      => "\x75",
 
1514
                "\xF0\x9D\x9A\x85"      => "\x76",
 
1515
                "\xF0\x9D\x9A\x86"      => "\x77",
 
1516
                "\xF0\x9D\x9A\x87"      => "\x78",
 
1517
                "\xF0\x9D\x9A\x88"      => "\x79",
 
1518
                "\xF0\x9D\x9A\x89"      => "\x7A",
 
1519
                "\xF0\x9D\x9A\xA8"      => "\xCE\xB1",
 
1520
                "\xF0\x9D\x9A\xA9"      => "\xCE\xB2",
 
1521
                "\xF0\x9D\x9A\xAA"      => "\xCE\xB3",
 
1522
                "\xF0\x9D\x9A\xAB"      => "\xCE\xB4",
 
1523
                "\xF0\x9D\x9A\xAC"      => "\xCE\xB5",
 
1524
                "\xF0\x9D\x9A\xAD"      => "\xCE\xB6",
 
1525
                "\xF0\x9D\x9A\xAE"      => "\xCE\xB7",
 
1526
                "\xF0\x9D\x9A\xAF"      => "\xCE\xB8",
 
1527
                "\xF0\x9D\x9A\xB0"      => "\xCE\xB9",
 
1528
                "\xF0\x9D\x9A\xB1"      => "\xCE\xBA",
 
1529
                "\xF0\x9D\x9A\xB2"      => "\xCE\xBB",
 
1530
                "\xF0\x9D\x9A\xB3"      => "\xCE\xBC",
 
1531
                "\xF0\x9D\x9A\xB4"      => "\xCE\xBD",
 
1532
                "\xF0\x9D\x9A\xB5"      => "\xCE\xBE",
 
1533
                "\xF0\x9D\x9A\xB6"      => "\xCE\xBF",
 
1534
                "\xF0\x9D\x9A\xB7"      => "\xCF\x80",
 
1535
                "\xF0\x9D\x9A\xB8"      => "\xCF\x81",
 
1536
                "\xF0\x9D\x9A\xB9"      => "\xCE\xB8",
 
1537
                "\xF0\x9D\x9A\xBA"      => "\xCF\x83",
 
1538
                "\xF0\x9D\x9A\xBB"      => "\xCF\x84",
 
1539
                "\xF0\x9D\x9A\xBC"      => "\xCF\x85",
 
1540
                "\xF0\x9D\x9A\xBD"      => "\xCF\x86",
 
1541
                "\xF0\x9D\x9A\xBE"      => "\xCF\x87",
 
1542
                "\xF0\x9D\x9A\xBF"      => "\xCF\x88",
 
1543
                "\xF0\x9D\x9B\x80"      => "\xCF\x89",
 
1544
                "\xF0\x9D\x9B\x93"      => "\xCF\x83",
 
1545
                "\xF0\x9D\x9B\xA2"      => "\xCE\xB1",
 
1546
                "\xF0\x9D\x9B\xA3"      => "\xCE\xB2",
 
1547
                "\xF0\x9D\x9B\xA4"      => "\xCE\xB3",
 
1548
                "\xF0\x9D\x9B\xA5"      => "\xCE\xB4",
 
1549
                "\xF0\x9D\x9B\xA6"      => "\xCE\xB5",
 
1550
                "\xF0\x9D\x9B\xA7"      => "\xCE\xB6",
 
1551
                "\xF0\x9D\x9B\xA8"      => "\xCE\xB7",
 
1552
                "\xF0\x9D\x9B\xA9"      => "\xCE\xB8",
 
1553
                "\xF0\x9D\x9B\xAA"      => "\xCE\xB9",
 
1554
                "\xF0\x9D\x9B\xAB"      => "\xCE\xBA",
 
1555
                "\xF0\x9D\x9B\xAC"      => "\xCE\xBB",
 
1556
                "\xF0\x9D\x9B\xAD"      => "\xCE\xBC",
 
1557
                "\xF0\x9D\x9B\xAE"      => "\xCE\xBD",
 
1558
                "\xF0\x9D\x9B\xAF"      => "\xCE\xBE",
 
1559
                "\xF0\x9D\x9B\xB0"      => "\xCE\xBF",
 
1560
                "\xF0\x9D\x9B\xB1"      => "\xCF\x80",
 
1561
                "\xF0\x9D\x9B\xB2"      => "\xCF\x81",
 
1562
                "\xF0\x9D\x9B\xB3"      => "\xCE\xB8",
 
1563
                "\xF0\x9D\x9B\xB4"      => "\xCF\x83",
 
1564
                "\xF0\x9D\x9B\xB5"      => "\xCF\x84",
 
1565
                "\xF0\x9D\x9B\xB6"      => "\xCF\x85",
 
1566
                "\xF0\x9D\x9B\xB7"      => "\xCF\x86",
 
1567
                "\xF0\x9D\x9B\xB8"      => "\xCF\x87",
 
1568
                "\xF0\x9D\x9B\xB9"      => "\xCF\x88",
 
1569
                "\xF0\x9D\x9B\xBA"      => "\xCF\x89",
 
1570
                "\xF0\x9D\x9C\x8D"      => "\xCF\x83",
 
1571
                "\xF0\x9D\x9C\x9C"      => "\xCE\xB1",
 
1572
                "\xF0\x9D\x9C\x9D"      => "\xCE\xB2",
 
1573
                "\xF0\x9D\x9C\x9E"      => "\xCE\xB3",
 
1574
                "\xF0\x9D\x9C\x9F"      => "\xCE\xB4",
 
1575
                "\xF0\x9D\x9C\xA0"      => "\xCE\xB5",
 
1576
                "\xF0\x9D\x9C\xA1"      => "\xCE\xB6",
 
1577
                "\xF0\x9D\x9C\xA2"      => "\xCE\xB7",
 
1578
                "\xF0\x9D\x9C\xA3"      => "\xCE\xB8",
 
1579
                "\xF0\x9D\x9C\xA4"      => "\xCE\xB9",
 
1580
                "\xF0\x9D\x9C\xA5"      => "\xCE\xBA",
 
1581
                "\xF0\x9D\x9C\xA6"      => "\xCE\xBB",
 
1582
                "\xF0\x9D\x9C\xA7"      => "\xCE\xBC",
 
1583
                "\xF0\x9D\x9C\xA8"      => "\xCE\xBD",
 
1584
                "\xF0\x9D\x9C\xA9"      => "\xCE\xBE",
 
1585
                "\xF0\x9D\x9C\xAA"      => "\xCE\xBF",
 
1586
                "\xF0\x9D\x9C\xAB"      => "\xCF\x80",
 
1587
                "\xF0\x9D\x9C\xAC"      => "\xCF\x81",
 
1588
                "\xF0\x9D\x9C\xAD"      => "\xCE\xB8",
 
1589
                "\xF0\x9D\x9C\xAE"      => "\xCF\x83",
 
1590
                "\xF0\x9D\x9C\xAF"      => "\xCF\x84",
 
1591
                "\xF0\x9D\x9C\xB0"      => "\xCF\x85",
 
1592
                "\xF0\x9D\x9C\xB1"      => "\xCF\x86",
 
1593
                "\xF0\x9D\x9C\xB2"      => "\xCF\x87",
 
1594
                "\xF0\x9D\x9C\xB3"      => "\xCF\x88",
 
1595
                "\xF0\x9D\x9C\xB4"      => "\xCF\x89",
 
1596
                "\xF0\x9D\x9D\x87"      => "\xCF\x83",
 
1597
                "\xF0\x9D\x9D\x96"      => "\xCE\xB1",
 
1598
                "\xF0\x9D\x9D\x97"      => "\xCE\xB2",
 
1599
                "\xF0\x9D\x9D\x98"      => "\xCE\xB3",
 
1600
                "\xF0\x9D\x9D\x99"      => "\xCE\xB4",
 
1601
                "\xF0\x9D\x9D\x9A"      => "\xCE\xB5",
 
1602
                "\xF0\x9D\x9D\x9B"      => "\xCE\xB6",
 
1603
                "\xF0\x9D\x9D\x9C"      => "\xCE\xB7",
 
1604
                "\xF0\x9D\x9D\x9D"      => "\xCE\xB8",
 
1605
                "\xF0\x9D\x9D\x9E"      => "\xCE\xB9",
 
1606
                "\xF0\x9D\x9D\x9F"      => "\xCE\xBA",
 
1607
                "\xF0\x9D\x9D\xA0"      => "\xCE\xBB",
 
1608
                "\xF0\x9D\x9D\xA1"      => "\xCE\xBC",
 
1609
                "\xF0\x9D\x9D\xA2"      => "\xCE\xBD",
 
1610
                "\xF0\x9D\x9D\xA3"      => "\xCE\xBE",
 
1611
                "\xF0\x9D\x9D\xA4"      => "\xCE\xBF",
 
1612
                "\xF0\x9D\x9D\xA5"      => "\xCF\x80",
 
1613
                "\xF0\x9D\x9D\xA6"      => "\xCF\x81",
 
1614
                "\xF0\x9D\x9D\xA7"      => "\xCE\xB8",
 
1615
                "\xF0\x9D\x9D\xA8"      => "\xCF\x83",
 
1616
                "\xF0\x9D\x9D\xA9"      => "\xCF\x84",
 
1617
                "\xF0\x9D\x9D\xAA"      => "\xCF\x85",
 
1618
                "\xF0\x9D\x9D\xAB"      => "\xCF\x86",
 
1619
                "\xF0\x9D\x9D\xAC"      => "\xCF\x87",
 
1620
                "\xF0\x9D\x9D\xAD"      => "\xCF\x88",
 
1621
                "\xF0\x9D\x9D\xAE"      => "\xCF\x89",
 
1622
                "\xF0\x9D\x9E\x81"      => "\xCF\x83",
 
1623
                "\xF0\x9D\x9E\x90"      => "\xCE\xB1",
 
1624
                "\xF0\x9D\x9E\x91"      => "\xCE\xB2",
 
1625
                "\xF0\x9D\x9E\x92"      => "\xCE\xB3",
 
1626
                "\xF0\x9D\x9E\x93"      => "\xCE\xB4",
 
1627
                "\xF0\x9D\x9E\x94"      => "\xCE\xB5",
 
1628
                "\xF0\x9D\x9E\x95"      => "\xCE\xB6",
 
1629
                "\xF0\x9D\x9E\x96"      => "\xCE\xB7",
 
1630
                "\xF0\x9D\x9E\x97"      => "\xCE\xB8",
 
1631
                "\xF0\x9D\x9E\x98"      => "\xCE\xB9",
 
1632
                "\xF0\x9D\x9E\x99"      => "\xCE\xBA",
 
1633
                "\xF0\x9D\x9E\x9A"      => "\xCE\xBB",
 
1634
                "\xF0\x9D\x9E\x9B"      => "\xCE\xBC",
 
1635
                "\xF0\x9D\x9E\x9C"      => "\xCE\xBD",
 
1636
                "\xF0\x9D\x9E\x9D"      => "\xCE\xBE",
 
1637
                "\xF0\x9D\x9E\x9E"      => "\xCE\xBF",
 
1638
                "\xF0\x9D\x9E\x9F"      => "\xCF\x80",
 
1639
                "\xF0\x9D\x9E\xA0"      => "\xCF\x81",
 
1640
                "\xF0\x9D\x9E\xA1"      => "\xCE\xB8",
 
1641
                "\xF0\x9D\x9E\xA2"      => "\xCF\x83",
 
1642
                "\xF0\x9D\x9E\xA3"      => "\xCF\x84",
 
1643
                "\xF0\x9D\x9E\xA4"      => "\xCF\x85",
 
1644
                "\xF0\x9D\x9E\xA5"      => "\xCF\x86",
 
1645
                "\xF0\x9D\x9E\xA6"      => "\xCF\x87",
 
1646
                "\xF0\x9D\x9E\xA7"      => "\xCF\x88",
 
1647
                "\xF0\x9D\x9E\xA8"      => "\xCF\x89",
 
1648
                "\xF0\x9D\x9E\xBB"      => "\xCF\x83",
 
1649
                "\xF0\x9D\x9F\x8A"      => "\xCF\x9D",
 
1650
        );
 
1651
        global $phpbb_root_path, $phpEx;
 
1652
 
 
1653
        // do the case fold
 
1654
        $text = utf8_case_fold($text, $option);
 
1655
 
 
1656
        if (!class_exists('utf_normalizer'))
 
1657
        {
 
1658
                global $phpbb_root_path, $phpEx;
 
1659
                include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
 
1660
        }
 
1661
 
 
1662
        // convert to NFKC
 
1663
        utf_normalizer::nfkc($text);
 
1664
 
 
1665
        // FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
 
1666
        $text = strtr($text, $fc_nfkc_closure);
 
1667
 
 
1668
        return $text;
 
1669
}
 
1670
 
 
1671
/**
 
1672
* Assume the input is NFC:
 
1673
* Takes the input and does a "special" case fold. It does minor normalization as well.
 
1674
*
 
1675
* @param        string  $text   text to be case folded
 
1676
* @param        string  $option determines how we will fold the cases
 
1677
* @return       string                  case folded text
 
1678
*/
 
1679
function utf8_case_fold_nfc($text, $option = 'full')
 
1680
{
 
1681
        static $uniarray = array();
 
1682
        static $ypogegrammeni = array(
 
1683
                "\xCD\xBA"              => "\x20\xCD\x85",
 
1684
                "\xE1\xBE\x80"  => "\xE1\xBC\x80\xCD\x85",
 
1685
                "\xE1\xBE\x81"  => "\xE1\xBC\x81\xCD\x85",
 
1686
                "\xE1\xBE\x82"  => "\xE1\xBC\x82\xCD\x85",
 
1687
                "\xE1\xBE\x83"  => "\xE1\xBC\x83\xCD\x85",
 
1688
                "\xE1\xBE\x84"  => "\xE1\xBC\x84\xCD\x85",
 
1689
                "\xE1\xBE\x85"  => "\xE1\xBC\x85\xCD\x85",
 
1690
                "\xE1\xBE\x86"  => "\xE1\xBC\x86\xCD\x85",
 
1691
                "\xE1\xBE\x87"  => "\xE1\xBC\x87\xCD\x85",
 
1692
                "\xE1\xBE\x88"  => "\xE1\xBC\x88\xCD\x85",
 
1693
                "\xE1\xBE\x89"  => "\xE1\xBC\x89\xCD\x85",
 
1694
                "\xE1\xBE\x8A"  => "\xE1\xBC\x8A\xCD\x85",
 
1695
                "\xE1\xBE\x8B"  => "\xE1\xBC\x8B\xCD\x85",
 
1696
                "\xE1\xBE\x8C"  => "\xE1\xBC\x8C\xCD\x85",
 
1697
                "\xE1\xBE\x8D"  => "\xE1\xBC\x8D\xCD\x85",
 
1698
                "\xE1\xBE\x8E"  => "\xE1\xBC\x8E\xCD\x85",
 
1699
                "\xE1\xBE\x8F"  => "\xE1\xBC\x8F\xCD\x85",
 
1700
                "\xE1\xBE\x90"  => "\xE1\xBC\xA0\xCD\x85",
 
1701
                "\xE1\xBE\x91"  => "\xE1\xBC\xA1\xCD\x85",
 
1702
                "\xE1\xBE\x92"  => "\xE1\xBC\xA2\xCD\x85",
 
1703
                "\xE1\xBE\x93"  => "\xE1\xBC\xA3\xCD\x85",
 
1704
                "\xE1\xBE\x94"  => "\xE1\xBC\xA4\xCD\x85",
 
1705
                "\xE1\xBE\x95"  => "\xE1\xBC\xA5\xCD\x85",
 
1706
                "\xE1\xBE\x96"  => "\xE1\xBC\xA6\xCD\x85",
 
1707
                "\xE1\xBE\x97"  => "\xE1\xBC\xA7\xCD\x85",
 
1708
                "\xE1\xBE\x98"  => "\xE1\xBC\xA8\xCD\x85",
 
1709
                "\xE1\xBE\x99"  => "\xE1\xBC\xA9\xCD\x85",
 
1710
                "\xE1\xBE\x9A"  => "\xE1\xBC\xAA\xCD\x85",
 
1711
                "\xE1\xBE\x9B"  => "\xE1\xBC\xAB\xCD\x85",
 
1712
                "\xE1\xBE\x9C"  => "\xE1\xBC\xAC\xCD\x85",
 
1713
                "\xE1\xBE\x9D"  => "\xE1\xBC\xAD\xCD\x85",
 
1714
                "\xE1\xBE\x9E"  => "\xE1\xBC\xAE\xCD\x85",
 
1715
                "\xE1\xBE\x9F"  => "\xE1\xBC\xAF\xCD\x85",
 
1716
                "\xE1\xBE\xA0"  => "\xE1\xBD\xA0\xCD\x85",
 
1717
                "\xE1\xBE\xA1"  => "\xE1\xBD\xA1\xCD\x85",
 
1718
                "\xE1\xBE\xA2"  => "\xE1\xBD\xA2\xCD\x85",
 
1719
                "\xE1\xBE\xA3"  => "\xE1\xBD\xA3\xCD\x85",
 
1720
                "\xE1\xBE\xA4"  => "\xE1\xBD\xA4\xCD\x85",
 
1721
                "\xE1\xBE\xA5"  => "\xE1\xBD\xA5\xCD\x85",
 
1722
                "\xE1\xBE\xA6"  => "\xE1\xBD\xA6\xCD\x85",
 
1723
                "\xE1\xBE\xA7"  => "\xE1\xBD\xA7\xCD\x85",
 
1724
                "\xE1\xBE\xA8"  => "\xE1\xBD\xA8\xCD\x85",
 
1725
                "\xE1\xBE\xA9"  => "\xE1\xBD\xA9\xCD\x85",
 
1726
                "\xE1\xBE\xAA"  => "\xE1\xBD\xAA\xCD\x85",
 
1727
                "\xE1\xBE\xAB"  => "\xE1\xBD\xAB\xCD\x85",
 
1728
                "\xE1\xBE\xAC"  => "\xE1\xBD\xAC\xCD\x85",
 
1729
                "\xE1\xBE\xAD"  => "\xE1\xBD\xAD\xCD\x85",
 
1730
                "\xE1\xBE\xAE"  => "\xE1\xBD\xAE\xCD\x85",
 
1731
                "\xE1\xBE\xAF"  => "\xE1\xBD\xAF\xCD\x85",
 
1732
                "\xE1\xBE\xB2"  => "\xE1\xBD\xB0\xCD\x85",
 
1733
                "\xE1\xBE\xB3"  => "\xCE\xB1\xCD\x85",
 
1734
                "\xE1\xBE\xB4"  => "\xCE\xAC\xCD\x85",
 
1735
                "\xE1\xBE\xB7"  => "\xE1\xBE\xB6\xCD\x85",
 
1736
                "\xE1\xBE\xBC"  => "\xCE\x91\xCD\x85",
 
1737
                "\xE1\xBF\x82"  => "\xE1\xBD\xB4\xCD\x85",
 
1738
                "\xE1\xBF\x83"  => "\xCE\xB7\xCD\x85",
 
1739
                "\xE1\xBF\x84"  => "\xCE\xAE\xCD\x85",
 
1740
                "\xE1\xBF\x87"  => "\xE1\xBF\x86\xCD\x85",
 
1741
                "\xE1\xBF\x8C"  => "\xCE\x97\xCD\x85",
 
1742
                "\xE1\xBF\xB2"  => "\xE1\xBD\xBC\xCD\x85",
 
1743
                "\xE1\xBF\xB3"  => "\xCF\x89\xCD\x85",
 
1744
                "\xE1\xBF\xB4"  => "\xCF\x8E\xCD\x85",
 
1745
                "\xE1\xBF\xB7"  => "\xE1\xBF\xB6\xCD\x85",
 
1746
                "\xE1\xBF\xBC"  => "\xCE\xA9\xCD\x85",
 
1747
        );
 
1748
        global $phpbb_root_path, $phpEx;
 
1749
 
 
1750
        // perform a small trick, avoid further normalization on composed points that contain U+0345 in their decomposition
 
1751
        $text = strtr($text, $ypogegrammeni);
 
1752
 
 
1753
        // do the case fold
 
1754
        $text = utf8_case_fold($text, $option);
 
1755
 
 
1756
        return $text;
 
1757
}
 
1758
 
 
1759
/**
 
1760
* A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
 
1761
* to be in NFC (Normalization Form Composition).
 
1762
*
 
1763
* @param        mixed   $strings        a string or an array of strings to normalize
 
1764
* @return       mixed                           the normalized content, preserving array keys if array given.
 
1765
*/
 
1766
function utf8_normalize_nfc($strings)
 
1767
{
 
1768
        if (empty($strings))
 
1769
        {
 
1770
                return $strings;
 
1771
        }
 
1772
 
 
1773
        if (!class_exists('utf_normalizer'))
 
1774
        {
 
1775
                global $phpbb_root_path, $phpEx;
 
1776
                include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
 
1777
        }
 
1778
 
 
1779
        if (!is_array($strings))
 
1780
        {
 
1781
                utf_normalizer::nfc($strings);
 
1782
        }
 
1783
        else if (is_array($strings))
 
1784
        {
 
1785
                foreach ($strings as $key => $string)
 
1786
                {
 
1787
                        if (is_array($string))
 
1788
                        {
 
1789
                                foreach ($string as $_key => $_string)
 
1790
                                {
 
1791
                                        utf_normalizer::nfc($strings[$key][$_key]);
 
1792
                                }
 
1793
                        }
 
1794
                        else
 
1795
                        {
 
1796
                                utf_normalizer::nfc($strings[$key]);
 
1797
                        }
 
1798
                }
 
1799
        }
 
1800
 
 
1801
        return $strings;
 
1802
}
 
1803
 
 
1804
/**
 
1805
* This function is used to generate a "clean" version of a string.
 
1806
* Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
 
1807
* Additionally a homographs of one character are transformed into one specific character (preferably ASCII
 
1808
* if it is an ASCII character).
 
1809
*
 
1810
* Please be aware that if you change something within this function or within
 
1811
* functions used here you need to rebuild/update the username_clean column in the users table. And all other
 
1812
* columns that store a clean string otherwise you will break this functionality.
 
1813
*
 
1814
* @param        string  $text   An unclean string, mabye user input (has to be valid UTF-8!)
 
1815
* @return       string                  Cleaned up version of the input string
 
1816
*/
 
1817
function utf8_clean_string($text)
 
1818
{
 
1819
        global $phpbb_root_path, $phpEx;
 
1820
 
 
1821
        static $homographs = array();
 
1822
        if (empty($homographs))
 
1823
        {
 
1824
                $homographs = include($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx);
 
1825
        }
 
1826
 
 
1827
        $text = utf8_case_fold_nfkc($text);
 
1828
        $text = strtr($text, $homographs);
 
1829
        // Other control characters
 
1830
        $text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text);
 
1831
 
 
1832
        // we need to reduce multiple spaces to a single one
 
1833
        $text = preg_replace('# {2,}#', ' ', $text);
 
1834
 
 
1835
        // we can use trim here as all the other space characters should have been turned
 
1836
        // into normal ASCII spaces by now
 
1837
        return trim($text);
 
1838
}
 
1839
 
 
1840
/**
 
1841
* A wrapper for htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
 
1842
*/
 
1843
function utf8_htmlspecialchars(&$value)
 
1844
{
 
1845
        return htmlspecialchars($value, ENT_COMPAT, 'UTF-8');
 
1846
}
 
1847
 
 
1848
/**
 
1849
* Trying to convert returned system message to utf8
 
1850
*
 
1851
* PHP assumes such messages are ISO-8859-1 so we'll do that too
 
1852
* and if it breaks messages we'll blame it on them ;-)
 
1853
*/
 
1854
function utf8_convert_message($message)
 
1855
{
 
1856
        // First of all check if conversion is neded at all, as there is no point
 
1857
        // in converting ASCII messages from ISO-8859-1 to UTF-8
 
1858
        if (!preg_match('/[\x80-\xFF]/', $message))
 
1859
        {
 
1860
                return utf8_htmlspecialchars($message);
 
1861
        }
 
1862
 
 
1863
        // else we need to convert some part of the message
 
1864
        return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1'));
 
1865
}
 
1866
 
 
1867
/**
 
1868
* UTF8-compatible wordwrap replacement
 
1869
*
 
1870
* @param        string  $string The input string
 
1871
* @param        int             $width  The column width. Defaults to 75.
 
1872
* @param        string  $break  The line is broken using the optional break parameter. Defaults to '\n'.
 
1873
* @param        bool    $cut    If the cut is set to TRUE, the string is always wrapped at the specified width. So if you have a word that is larger than the given width, it is broken apart.
 
1874
*
 
1875
* @return       string                  the given string wrapped at the specified column.
 
1876
*
 
1877
*/
 
1878
function utf8_wordwrap($string, $width = 75, $break = "\n", $cut = false)
 
1879
{
 
1880
        // We first need to explode on $break, not destroying existing (intended) breaks
 
1881
        $lines = explode($break, $string);
 
1882
        $new_lines = array(0 => '');
 
1883
        $index = 0;
 
1884
 
 
1885
        foreach ($lines as $line)
 
1886
        {
 
1887
                $words = explode(' ', $line);
 
1888
 
 
1889
                for ($i = 0, $size = sizeof($words); $i < $size; $i++)
 
1890
                {
 
1891
                        $word = $words[$i];
 
1892
 
 
1893
                        // If cut is true we need to cut the word if it is > width chars
 
1894
                        if ($cut && utf8_strlen($word) > $width)
 
1895
                        {
 
1896
                                $words[$i] = utf8_substr($word, $width);
 
1897
                                $word = utf8_substr($word, 0, $width);
 
1898
                                $i--;
 
1899
                        }
 
1900
 
 
1901
                        if (utf8_strlen($new_lines[$index] . $word) > $width)
 
1902
                        {
 
1903
                                $new_lines[$index] = substr($new_lines[$index], 0, -1);
 
1904
                                $index++;
 
1905
                                $new_lines[$index] = '';
 
1906
                        }
 
1907
 
 
1908
                        $new_lines[$index] .= $word . ' ';
 
1909
                }
 
1910
 
 
1911
                $new_lines[$index] = substr($new_lines[$index], 0, -1);
 
1912
                $index++;
 
1913
                $new_lines[$index] = '';
 
1914
        }
 
1915
 
 
1916
        unset($new_lines[$index]);
 
1917
        return implode($break, $new_lines);
 
1918
}
 
1919
 
 
1920
?>
 
 
b'\\ No newline at end of file'