~azzar1/unity/add-show-desktop-key

« back to all changes in this revision

Viewing changes to www/php/phpBB3/includes/utf/utf_tools.php

  • Committer: William Grant
  • Date: 2009-02-23 23:47:02 UTC
  • mfrom: (1099.1.211 new-dispatch)
  • Revision ID: grantw@unimelb.edu.au-20090223234702-db4b1llly46ignwo
Merge from lp:~ivle-dev/ivle/new-dispatch.

Pretty much everything changes. Reread the setup docs. Backup your databases.
Every file is now in a different installed location, the configuration system
is rewritten, the dispatch system is rewritten, URLs are different, the
database is different, worksheets and exercises are no longer on the
filesystem, we use a templating engine, jail service protocols are rewritten,
we don't repeat ourselves, we have authorization rewritten, phpBB is gone,
and probably lots of other things that I cannot remember.

This is certainly the biggest commit I have ever made, and hopefully
the largest I ever will.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
<?php
2
 
/**
3
 
*
4
 
* @package utf
5
 
* @version $Id: utf_tools.php,v 1.72 2007/12/04 16:20:38 naderman Exp $
6
 
* @copyright (c) 2006 phpBB Group
7
 
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
8
 
*
9
 
*/
10
 
 
11
 
/**
12
 
*/
13
 
if (!defined('IN_PHPBB'))
14
 
{
15
 
        exit;
16
 
}
17
 
 
18
 
// Enforce ASCII only string handling
19
 
setlocale(LC_CTYPE, 'C');
20
 
 
21
 
/**
22
 
* UTF-8 tools
23
 
*
24
 
* Whenever possible, these functions will try to use PHP's built-in functions or
25
 
* extensions, otherwise they will default to custom routines.
26
 
*
27
 
* @package utf
28
 
*/
29
 
 
30
 
if (!extension_loaded('xml'))
31
 
{
32
 
        /**
33
 
        * Implementation of PHP's native utf8_encode for people without XML support
34
 
        * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
35
 
        *
36
 
        * @param string $str ISO-8859-1 encoded data
37
 
        * @return string UTF-8 encoded data
38
 
        */
39
 
        function utf8_encode($str)
40
 
        {
41
 
                $out = '';
42
 
                for ($i = 0, $len = strlen($str); $i < $len; $i++)
43
 
                {
44
 
                        $letter = $str[$i];
45
 
                        $num = ord($letter);
46
 
                        if ($num < 0x80)
47
 
                        {
48
 
                                $out .= $letter;
49
 
                        }
50
 
                        else if ($num < 0xC0)
51
 
                        {
52
 
                                $out .= "\xC2" . $letter;
53
 
                        }
54
 
                        else
55
 
                        {
56
 
                                $out .= "\xC3" . chr($num - 64);
57
 
                        }
58
 
                }
59
 
                return $out;
60
 
        }
61
 
 
62
 
        /**
63
 
        * Implementation of PHP's native utf8_decode for people without XML support
64
 
        *
65
 
        * @param string $str UTF-8 encoded data
66
 
        * @return string ISO-8859-1 encoded data
67
 
        */
68
 
        function utf8_decode($str)
69
 
        {
70
 
                $pos = 0;
71
 
                $len = strlen($str);
72
 
                $ret = '';
73
 
        
74
 
                while ($pos < $len)
75
 
                {
76
 
                        $ord = ord($str[$pos]) & 0xF0;
77
 
                        if ($ord === 0xC0 || $ord === 0xD0)
78
 
                        {
79
 
                                $charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
80
 
                                $pos += 2;
81
 
                                $ret .= (($charval < 256) ? chr($charval) : '?');
82
 
                        }
83
 
                        else if ($ord === 0xE0)
84
 
                        {
85
 
                                $ret .= '?';
86
 
                                $pos += 3;
87
 
                        }
88
 
                        else if ($ord === 0xF0)
89
 
                        {
90
 
                                $ret .= '?';
91
 
                                $pos += 4;
92
 
                        }
93
 
                        else
94
 
                        {
95
 
                                $ret .= $str[$pos];
96
 
                                ++$pos;
97
 
                        }
98
 
                }
99
 
                return $ret;
100
 
        }
101
 
}
102
 
 
103
 
// mbstring is old and has it's functions around for older versions of PHP.
104
 
// if mbstring is not loaded, we go into native mode.
105
 
if (extension_loaded('mbstring'))
106
 
{
107
 
        mb_internal_encoding('UTF-8');
108
 
 
109
 
        /**
110
 
        * UTF-8 aware alternative to strrpos
111
 
        * Find position of last occurrence of a char in a string
112
 
        *
113
 
        * Notes:
114
 
        * - offset for mb_strrpos was added in 5.2.0, we emulate if it is lower
115
 
        */
116
 
        if (version_compare(PHP_VERSION, '5.2.0', '>='))
117
 
        {
118
 
                /**
119
 
                * UTF-8 aware alternative to strrpos
120
 
                * @ignore
121
 
                */
122
 
                function utf8_strrpos($str,     $needle, $offset = null)
123
 
                {
124
 
                        // Emulate behaviour of strrpos rather than raising warning
125
 
                        if (empty($str))
126
 
                        {
127
 
                                return false;
128
 
                        }
129
 
 
130
 
                        if (is_null($offset))
131
 
                        {
132
 
                                return mb_strrpos($str, $needle);
133
 
                        }
134
 
                        else
135
 
                        {
136
 
                                return mb_strrpos($str, $needle, $offset);
137
 
                        }
138
 
                }
139
 
        }
140
 
        else
141
 
        {
142
 
                /**
143
 
                * UTF-8 aware alternative to strrpos
144
 
                * @ignore
145
 
                */
146
 
                function utf8_strrpos($str,     $needle, $offset = null)
147
 
                {
148
 
                        // offset for mb_strrpos was added in 5.2.0
149
 
                        if (is_null($offset))
150
 
                        {
151
 
                                // Emulate behaviour of strrpos rather than raising warning
152
 
                                if (empty($str))
153
 
                                {
154
 
                                        return false;
155
 
                                }
156
 
 
157
 
                                return mb_strrpos($str, $needle);
158
 
                        }
159
 
                        else
160
 
                        {
161
 
                                if (!is_int($offset))
162
 
                                {
163
 
                                        trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
164
 
                                        return false;
165
 
                                }
166
 
 
167
 
                                $str = mb_substr($str, $offset);
168
 
 
169
 
                                if (false !== ($pos = mb_strrpos($str, $needle)))
170
 
                                {
171
 
                                        return $pos + $offset;
172
 
                                }
173
 
 
174
 
                                return false;
175
 
                        }
176
 
                }
177
 
        }
178
 
 
179
 
        /**
180
 
        * UTF-8 aware alternative to strpos
181
 
        * @ignore
182
 
        */
183
 
        function utf8_strpos($str, $needle, $offset = null)
184
 
        {
185
 
                if (is_null($offset))
186
 
                {
187
 
                        return mb_strpos($str, $needle);
188
 
                }
189
 
                else
190
 
                {
191
 
                        return mb_strpos($str, $needle, $offset);
192
 
                }
193
 
        }
194
 
 
195
 
        /**
196
 
        * UTF-8 aware alternative to strtolower
197
 
        * @ignore
198
 
        */
199
 
        function utf8_strtolower($str)
200
 
        {
201
 
                return mb_strtolower($str);
202
 
        }
203
 
 
204
 
        /**
205
 
        * UTF-8 aware alternative to strtoupper
206
 
        * @ignore
207
 
        */
208
 
        function utf8_strtoupper($str)
209
 
        {
210
 
                return mb_strtoupper($str);
211
 
        }
212
 
 
213
 
        /**
214
 
        * UTF-8 aware alternative to substr
215
 
        * @ignore
216
 
        */
217
 
        function utf8_substr($str, $offset, $length = null)
218
 
        {
219
 
                if (is_null($length))
220
 
                {
221
 
                        return mb_substr($str, $offset);
222
 
                }
223
 
                else
224
 
                {
225
 
                        return mb_substr($str, $offset, $length);
226
 
                }
227
 
        }
228
 
 
229
 
        /**
230
 
        * Return the length (in characters) of a UTF-8 string
231
 
        * @ignore
232
 
        */
233
 
        function utf8_strlen($text)
234
 
        {
235
 
                return mb_strlen($text, 'utf-8');
236
 
        }
237
 
}
238
 
else
239
 
{
240
 
        /**
241
 
        * UTF-8 aware alternative to strrpos
242
 
        * Find position of last occurrence of a char in a string
243
 
        *
244
 
        * @author Harry Fuecks
245
 
        * @param string $str haystack
246
 
        * @param string $needle needle
247
 
        * @param integer $offset (optional) offset (from left)
248
 
        * @return mixed integer position or FALSE on failure
249
 
        */
250
 
        function utf8_strrpos($str,     $needle, $offset = null)
251
 
        {
252
 
                if (is_null($offset))
253
 
                {
254
 
                        $ar     = explode($needle, $str);
255
 
                        
256
 
                        if (sizeof($ar) > 1)
257
 
                        {
258
 
                                // Pop off the end of the string where the last match was made
259
 
                                array_pop($ar);
260
 
                                $str = join($needle, $ar);
261
 
 
262
 
                                return utf8_strlen($str);
263
 
                        }
264
 
                        return false;
265
 
                }
266
 
                else
267
 
                {
268
 
                        if (!is_int($offset))
269
 
                        {
270
 
                                trigger_error('utf8_strrpos     expects parameter 3     to be long', E_USER_ERROR);
271
 
                                return false;
272
 
                        }
273
 
 
274
 
                        $str = utf8_substr($str, $offset);
275
 
 
276
 
                        if (false !== ($pos = utf8_strrpos($str, $needle)))
277
 
                        {
278
 
                                return $pos     + $offset;
279
 
                        }
280
 
 
281
 
                        return false;
282
 
                }
283
 
        }
284
 
 
285
 
        /**
286
 
        * UTF-8 aware alternative to strpos
287
 
        * Find position of first occurrence of a string
288
 
        *
289
 
        * @author Harry Fuecks
290
 
        * @param string $str haystack
291
 
        * @param string $needle needle
292
 
        * @param integer $offset offset in characters (from left)
293
 
        * @return mixed integer position or FALSE on failure
294
 
        */
295
 
        function utf8_strpos($str, $needle, $offset = null)
296
 
        {
297
 
                if (is_null($offset))
298
 
                {
299
 
                        $ar = explode($needle, $str);
300
 
                        if (sizeof($ar) > 1)
301
 
                        {
302
 
                                return utf8_strlen($ar[0]);
303
 
                        }
304
 
                        return false;
305
 
                }
306
 
                else
307
 
                {
308
 
                        if (!is_int($offset))
309
 
                        {
310
 
                                trigger_error('utf8_strpos:  Offset must  be an integer', E_USER_ERROR);
311
 
                                return false;
312
 
                        }
313
 
 
314
 
                        $str = utf8_substr($str, $offset);
315
 
 
316
 
                        if (false !== ($pos = utf8_strpos($str, $needle)))
317
 
                        {
318
 
                                return $pos + $offset;
319
 
                        }
320
 
 
321
 
                        return false;
322
 
                }
323
 
        }
324
 
 
325
 
        /**
326
 
        * UTF-8 aware alternative to strtolower
327
 
        * Make a string lowercase
328
 
        * Note: The concept of a characters "case" only exists is some alphabets
329
 
        * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
330
 
        * not exist in the Chinese alphabet, for example. See Unicode Standard
331
 
        * Annex #21: Case Mappings
332
 
        *
333
 
        * @param string
334
 
        * @return string string in lowercase
335
 
        */
336
 
        function utf8_strtolower($string)
337
 
        {
338
 
                static $utf8_upper_to_lower = array(
339
 
                        "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
340
 
                        "\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
341
 
                        "\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
342
 
                        "\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
343
 
                        "\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
344
 
                        "\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
345
 
                        "\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
346
 
                        "\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
347
 
                        "\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
348
 
                        "\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
349
 
                        "\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
350
 
                        "\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
351
 
                        "\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
352
 
                        "\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
353
 
                        "\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
354
 
                        "\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
355
 
                        "\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
356
 
                        "\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
357
 
                        "\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
358
 
                        "\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
359
 
                        "\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
360
 
                        "\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
361
 
                        "\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
362
 
                        "\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
363
 
                        "\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
364
 
                        "\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
365
 
                        "\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
366
 
                        "\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
367
 
                        "\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
368
 
                        "\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
369
 
                        "\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
370
 
                        "\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
371
 
                        "\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
372
 
                        "\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
373
 
                        "\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
374
 
                        "\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
375
 
                        "\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
376
 
                        "\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
377
 
                        "\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
378
 
                        "\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
379
 
                        "\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
380
 
                        "\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
381
 
                        "\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
382
 
                        "\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
383
 
                        "\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
384
 
                        "\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
385
 
                );
386
 
 
387
 
                return strtr(strtolower($string), $utf8_upper_to_lower);
388
 
        }
389
 
 
390
 
        /**
391
 
        * UTF-8 aware alternative to strtoupper
392
 
        * Make a string uppercase
393
 
        * Note: The concept of a characters "case" only exists is some alphabets
394
 
        * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
395
 
        * not exist in the Chinese alphabet, for example. See Unicode Standard
396
 
        * Annex #21: Case Mappings
397
 
        *
398
 
        * @param string
399
 
        * @return string string in uppercase
400
 
        */
401
 
        function utf8_strtoupper($string)
402
 
        {
403
 
                static $utf8_lower_to_upper = array(
404
 
                        "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
405
 
                        "\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
406
 
                        "\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
407
 
                        "\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
408
 
                        "\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
409
 
                        "\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
410
 
                        "\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
411
 
                        "\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
412
 
                        "\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
413
 
                        "\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
414
 
                        "\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
415
 
                        "\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
416
 
                        "\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
417
 
                        "\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
418
 
                        "\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
419
 
                        "\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
420
 
                        "\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
421
 
                        "\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
422
 
                        "\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
423
 
                        "\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
424
 
                        "\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
425
 
                        "\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
426
 
                        "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
427
 
                        "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
428
 
                        "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
429
 
                        "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
430
 
                        "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
431
 
                        "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
432
 
                        "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
433
 
                        "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
434
 
                        "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
435
 
                        "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
436
 
                        "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
437
 
                        "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
438
 
                        "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
439
 
                        "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
440
 
                        "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
441
 
                        "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
442
 
                        "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
443
 
                        "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
444
 
                        "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
445
 
                        "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
446
 
                        "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
447
 
                        "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
448
 
                        "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
449
 
                        "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
450
 
                );
451
 
 
452
 
                return strtr(strtoupper($string), $utf8_lower_to_upper);
453
 
        }
454
 
 
455
 
        /**
456
 
        * UTF-8 aware alternative to substr
457
 
        * Return part of a string given character offset (and optionally length)
458
 
        *
459
 
        * Note arguments: comparied to substr - if offset or length are
460
 
        * not integers, this version will not complain but rather massages them
461
 
        * into an integer.
462
 
        *
463
 
        * Note on returned values: substr documentation states false can be
464
 
        * returned in some cases (e.g. offset > string length)
465
 
        * mb_substr never returns false, it will return an empty string instead.
466
 
        * This adopts the mb_substr approach
467
 
        *
468
 
        * Note on implementation: PCRE only supports repetitions of less than
469
 
        * 65536, in order to accept up to MAXINT values for offset and length,
470
 
        * we'll repeat a group of 65535 characters when needed.
471
 
        *
472
 
        * Note on implementation: calculating the number of characters in the
473
 
        * string is a relatively expensive operation, so we only carry it out when
474
 
        * necessary. It isn't necessary for +ve offsets and no specified length
475
 
        *
476
 
        * @author Chris Smith<chris@jalakai.co.uk>
477
 
        * @param string $str
478
 
        * @param integer $offset number of UTF-8 characters offset (from left)
479
 
        * @param integer $length (optional) length in UTF-8 characters from offset
480
 
        * @return mixed string or FALSE if failure
481
 
        */
482
 
        function utf8_substr($str, $offset, $length = NULL)
483
 
        {
484
 
                // generates E_NOTICE
485
 
                // for PHP4 objects, but not PHP5 objects
486
 
                $str = (string) $str;
487
 
                $offset = (int) $offset;
488
 
                if (!is_null($length))
489
 
                {
490
 
                        $length = (int) $length;
491
 
                }
492
 
 
493
 
                // handle trivial cases
494
 
                if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
495
 
                {
496
 
                        return '';
497
 
                }
498
 
 
499
 
                // normalise negative offsets (we could use a tail
500
 
                // anchored pattern, but they are horribly slow!)
501
 
                if ($offset < 0)
502
 
                {
503
 
                        // see notes
504
 
                        $strlen = utf8_strlen($str);
505
 
                        $offset = $strlen + $offset;
506
 
                        if ($offset < 0)
507
 
                        {
508
 
                                $offset = 0;
509
 
                        }
510
 
                }
511
 
 
512
 
                $op = '';
513
 
                $lp = '';
514
 
 
515
 
                // establish a pattern for offset, a
516
 
                // non-captured group equal in length to offset
517
 
                if ($offset > 0)
518
 
                {
519
 
                        $ox = (int) ($offset / 65535);
520
 
                        $oy = $offset % 65535;
521
 
 
522
 
                        if ($ox)
523
 
                        {
524
 
                                $op = '(?:.{65535}){' . $ox . '}';
525
 
                        }
526
 
 
527
 
                        $op = '^(?:' . $op . '.{' . $oy . '})';
528
 
                }
529
 
                else
530
 
                {       
531
 
                        // offset == 0; just anchor the pattern
532
 
                        $op = '^';
533
 
                }
534
 
 
535
 
                // establish a pattern for length
536
 
                if (is_null($length))
537
 
                {
538
 
                        // the rest of the string
539
 
                        $lp = '(.*)$';
540
 
                }
541
 
                else
542
 
                {
543
 
                        if (!isset($strlen))
544
 
                        {
545
 
                                // see notes
546
 
                                $strlen = utf8_strlen($str);
547
 
                        }
548
 
 
549
 
                        // another trivial case
550
 
                        if ($offset > $strlen)
551
 
                        {
552
 
                                return '';
553
 
                        }
554
 
 
555
 
                        if ($length > 0)
556
 
                        {
557
 
                                // reduce any length that would
558
 
                                // go passed the end of the string
559
 
                                $length = min($strlen - $offset, $length);
560
 
 
561
 
                                $lx = (int) ($length / 65535);
562
 
                                $ly = $length % 65535;
563
 
                                
564
 
                                // negative length requires a captured group
565
 
                                // of length characters
566
 
                                if ($lx)
567
 
                                {
568
 
                                        $lp = '(?:.{65535}){' . $lx . '}';
569
 
                                }
570
 
                                $lp = '(' . $lp . '.{'. $ly . '})';
571
 
                        }
572
 
                        else if ($length < 0)
573
 
                        {
574
 
                                if ($length < ($offset - $strlen))
575
 
                                {
576
 
                                        return '';
577
 
                                }
578
 
 
579
 
                                $lx = (int)((-$length) / 65535);
580
 
                                $ly = (-$length) % 65535;
581
 
 
582
 
                                // negative length requires ... capture everything
583
 
                                // except a group of  -length characters
584
 
                                // anchored at the tail-end of the string
585
 
                                if ($lx)
586
 
                                {
587
 
                                        $lp = '(?:.{65535}){' . $lx . '}';
588
 
                                }
589
 
                                $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
590
 
                        }
591
 
                }
592
 
 
593
 
                if (!preg_match('#' . $op . $lp . '#us', $str, $match))
594
 
                {
595
 
                        return '';
596
 
                }
597
 
 
598
 
                return $match[1];
599
 
        }
600
 
 
601
 
        /**
602
 
        * Return the length (in characters) of a UTF-8 string
603
 
        *
604
 
        * @param        string  $text           UTF-8 string
605
 
        * @return       integer                         Length (in chars) of given string
606
 
        */
607
 
        function utf8_strlen($text)
608
 
        {
609
 
                // Since utf8_decode is replacing multibyte characters to ? strlen works fine
610
 
                return strlen(utf8_decode($text));
611
 
        }
612
 
}
613
 
 
614
 
/**
615
 
* UTF-8 aware alternative to str_split
616
 
* Convert a string to an array
617
 
*
618
 
* @author Harry Fuecks
619
 
* @param string $str UTF-8 encoded
620
 
* @param int $split_len number to characters to split string by
621
 
* @return array characters in string reverses
622
 
*/
623
 
function utf8_str_split($str, $split_len = 1)
624
 
{
625
 
        if (!is_int($split_len) || $split_len < 1)
626
 
        {
627
 
                return false;
628
 
        }
629
 
 
630
 
        $len = utf8_strlen($str);
631
 
        if ($len <= $split_len)
632
 
        {
633
 
                return array($str);
634
 
        }
635
 
        
636
 
        preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
637
 
        return $ar[0];
638
 
}
639
 
 
640
 
/**
641
 
* UTF-8 aware alternative to strspn
642
 
* Find length of initial segment matching the mask
643
 
*
644
 
* @author Harry Fuecks
645
 
*/
646
 
function utf8_strspn($str, $mask, $start = null, $length = null)
647
 
{
648
 
        if ($start !== null || $length !== null)
649
 
        {
650
 
                $str = utf8_substr($str, $start, $length);
651
 
        }
652
 
 
653
 
        preg_match('/^[' . $mask . ']+/u', $str, $matches);
654
 
 
655
 
        if (isset($matches[0]))
656
 
        {
657
 
                return utf8_strlen($matches[0]);
658
 
        }
659
 
 
660
 
        return 0;
661
 
}
662
 
 
663
 
/**
664
 
* UTF-8 aware alternative to ucfirst
665
 
* Make a string's first character uppercase
666
 
*
667
 
* @author Harry Fuecks
668
 
* @param string
669
 
* @return string with first character as upper case (if applicable)
670
 
*/
671
 
function utf8_ucfirst($str)
672
 
{
673
 
        switch (utf8_strlen($str))
674
 
        {
675
 
                case 0:
676
 
                        return '';
677
 
                break;
678
 
 
679
 
                case 1:
680
 
                        return utf8_strtoupper($str);
681
 
                break;
682
 
 
683
 
                default:
684
 
                        preg_match('/^(.{1})(.*)$/us', $str, $matches);
685
 
                        return utf8_strtoupper($matches[1]) . $matches[2];
686
 
                break;
687
 
        }
688
 
}
689
 
 
690
 
/**
691
 
* Recode a string to UTF-8
692
 
*
693
 
* If the encoding is not supported, the string is returned as-is
694
 
*
695
 
* @param        string  $string         Original string
696
 
* @param        string  $encoding       Original encoding (lowered)
697
 
* @return       string                          The string, encoded in UTF-8
698
 
*/
699
 
function utf8_recode($string, $encoding)
700
 
{
701
 
        $encoding = strtolower($encoding);
702
 
 
703
 
        if ($encoding == 'utf-8' || !is_string($string) || empty($string))
704
 
        {
705
 
                return $string;
706
 
        }
707
 
 
708
 
        // we force iso-8859-1 to be cp1252
709
 
        if ($encoding == 'iso-8859-1')
710
 
        {
711
 
                $encoding = 'cp1252';
712
 
        }
713
 
        // convert iso-8859-8-i to iso-8859-8
714
 
        else if ($encoding == 'iso-8859-8-i')
715
 
        {
716
 
                $encoding = 'iso-8859-8';
717
 
                $string = hebrev($string);
718
 
        }
719
 
 
720
 
        // First, try iconv()
721
 
        if (function_exists('iconv'))
722
 
        {
723
 
                $ret = @iconv($encoding, 'utf-8', $string);
724
 
 
725
 
                if (!empty($ret))
726
 
                {
727
 
                        return $ret;
728
 
                }
729
 
        }
730
 
 
731
 
        // Try the mb_string extension
732
 
        if (function_exists('mb_convert_encoding'))
733
 
        {
734
 
                // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
735
 
                switch ($encoding)
736
 
                {
737
 
                        case 'iso-8859-1':
738
 
                        case 'iso-8859-2':
739
 
                        case 'iso-8859-4':
740
 
                        case 'iso-8859-7':
741
 
                        case 'iso-8859-9':
742
 
                        case 'iso-8859-15':
743
 
                        case 'windows-1251':
744
 
                        case 'windows-1252':
745
 
                        case 'cp1252':
746
 
                        case 'shift_jis':
747
 
                        case 'euc-kr':
748
 
                        case 'big5':
749
 
                        case 'gb2312':
750
 
                                $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
751
 
 
752
 
                                if (!empty($ret))
753
 
                                {
754
 
                                        return $ret;
755
 
                                }
756
 
                }
757
 
        }
758
 
 
759
 
        // Try the recode extension
760
 
        if (function_exists('recode_string'))
761
 
        {
762
 
                $ret = @recode_string($encoding . '..utf-8', $string);
763
 
 
764
 
                if (!empty($ret))
765
 
                {
766
 
                        return $ret;
767
 
                }
768
 
        }
769
 
 
770
 
        // If nothing works, check if we have a custom transcoder available
771
 
        if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
772
 
        {
773
 
                // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
774
 
                trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
775
 
        }
776
 
 
777
 
        global $phpbb_root_path, $phpEx;
778
 
 
779
 
        // iso-8859-* character encoding
780
 
        if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
781
 
        {
782
 
                switch ($array[1])
783
 
                {
784
 
                        case '1':
785
 
                        case '2':
786
 
                        case '4':
787
 
                        case '7':
788
 
                        case '8':
789
 
                        case '9':
790
 
                        case '15':
791
 
                                if (!function_exists('iso_8859_' . $array[1]))
792
 
                                {
793
 
                                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
794
 
                                        {
795
 
                                                trigger_error('Basic reencoder file is missing', E_USER_ERROR);
796
 
                                        }
797
 
                                        include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
798
 
                                }
799
 
                                return call_user_func('iso_8859_' . $array[1], $string);
800
 
                        break;
801
 
 
802
 
                        default:
803
 
                                trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
804
 
                        break;
805
 
                }
806
 
        }
807
 
 
808
 
        // CP/WIN character encoding
809
 
        if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
810
 
        {
811
 
                switch ($array[1])
812
 
                {
813
 
                        case '932':
814
 
                        break;
815
 
                        case '1250':
816
 
                        case '1251':
817
 
                        case '1252':
818
 
                        case '1254':
819
 
                        case '1255':
820
 
                        case '1256':
821
 
                        case '1257':
822
 
                        case '874':
823
 
                                if (!function_exists('cp' . $array[1]))
824
 
                                {
825
 
                                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
826
 
                                        {
827
 
                                                trigger_error('Basic reencoder file is missing', E_USER_ERROR);
828
 
                                        }
829
 
                                        include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
830
 
                                }
831
 
                                return call_user_func('cp' . $array[1], $string);
832
 
                        break;
833
 
 
834
 
                        default:
835
 
                                trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
836
 
                        break;
837
 
                }
838
 
        }
839
 
 
840
 
        // TIS-620
841
 
        if (preg_match('/tis[_ -]?620/', $encoding))
842
 
        {
843
 
                if (!function_exists('tis_620'))
844
 
                {
845
 
                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
846
 
                        {
847
 
                                trigger_error('Basic reencoder file is missing', E_USER_ERROR);
848
 
                        }
849
 
                        include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
850
 
                }
851
 
                return tis_620($string);
852
 
        }
853
 
 
854
 
        // SJIS
855
 
        if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
856
 
        {
857
 
                if (!function_exists('sjis'))
858
 
                {
859
 
                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
860
 
                        {
861
 
                                trigger_error('CJK reencoder file is missing', E_USER_ERROR);
862
 
                        }
863
 
                        include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
864
 
                }
865
 
                return sjis($string);
866
 
        }
867
 
 
868
 
        // EUC_KR
869
 
        if (preg_match('/euc[_ -]?kr/', $encoding))
870
 
        {
871
 
                if (!function_exists('euc_kr'))
872
 
                {
873
 
                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
874
 
                        {
875
 
                                trigger_error('CJK reencoder file is missing', E_USER_ERROR);
876
 
                        }
877
 
                        include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
878
 
                }
879
 
                return euc_kr($string);
880
 
        }
881
 
 
882
 
        // BIG-5
883
 
        if (preg_match('/big[_ -]?5/', $encoding))
884
 
        {
885
 
                if (!function_exists('big5'))
886
 
                {
887
 
                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
888
 
                        {
889
 
                                trigger_error('CJK reencoder file is missing', E_USER_ERROR);
890
 
                        }
891
 
                        include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
892
 
                }
893
 
                return big5($string);
894
 
        }
895
 
 
896
 
        // GB2312
897
 
        if (preg_match('/gb[_ -]?2312/', $encoding))
898
 
        {
899
 
                if (!function_exists('gb2312'))
900
 
                {
901
 
                        if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
902
 
                        {
903
 
                                trigger_error('CJK reencoder file is missing', E_USER_ERROR);
904
 
                        }
905
 
                        include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
906
 
                }
907
 
                return gb2312($string);
908
 
        }
909
 
 
910
 
        // Trigger an error?! Fow now just give bad data :-(
911
 
        trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
912
 
        //return $string; // use utf_normalizer::cleanup() ?
913
 
}
914
 
 
915
 
/**
916
 
* Replace all UTF-8 chars that are not in ASCII with their NCR
917
 
*
918
 
* @param        string  $text           UTF-8 string in NFC
919
 
* @return       string                          ASCII string using NCRs for non-ASCII chars
920
 
*/
921
 
function utf8_encode_ncr($text)
922
 
{
923
 
        return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
924
 
}
925
 
 
926
 
/**
927
 
* Callback used in encode_ncr()
928
 
*
929
 
* Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
930
 
*
931
 
* @param        array   $m                      0-based numerically indexed array passed by preg_replace_callback()
932
 
* @return       string                          A HTML NCR if the character is valid, or the original string otherwise
933
 
*/
934
 
function utf8_encode_ncr_callback($m)
935
 
{
936
 
        return '&#' . utf8_ord($m[0]) . ';';
937
 
}
938
 
 
939
 
/**
940
 
* Converts a UTF-8 char to an NCR
941
 
*
942
 
* @param string $chr UTF-8 char
943
 
* @return integer UNICODE code point
944
 
*/
945
 
function utf8_ord($chr)
946
 
{
947
 
        switch (strlen($chr))
948
 
        {
949
 
                case 1:
950
 
                        return ord($chr);
951
 
                break;
952
 
 
953
 
                case 2:
954
 
                        return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
955
 
                break;
956
 
 
957
 
                case 3:
958
 
                        return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
959
 
                break;
960
 
 
961
 
                case 4:
962
 
                        return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
963
 
                break;
964
 
 
965
 
                default:
966
 
                        return $chr;
967
 
        }
968
 
}
969
 
 
970
 
/**
971
 
* Converts an NCR to a UTF-8 char
972
 
*
973
 
* @param        int             $cp     UNICODE code point
974
 
* @return       string          UTF-8 char
975
 
*/
976
 
function utf8_chr($cp)
977
 
{
978
 
        if ($cp > 0xFFFF)
979
 
        {
980
 
                return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
981
 
        }
982
 
        else if ($cp > 0x7FF)
983
 
        {
984
 
                return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
985
 
        }
986
 
        else if ($cp > 0x7F)
987
 
        {
988
 
                return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
989
 
        }
990
 
        else
991
 
        {
992
 
                return chr($cp);
993
 
        }
994
 
}
995
 
 
996
 
/**
997
 
* Convert Numeric Character References to UTF-8 chars
998
 
*
999
 
* Notes:
1000
 
*       - we do not convert NCRs recursively, if you pass &#38;#38; it will return &#38;
1001
 
*       - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
1002
 
*
1003
 
* @param        string  $text           String to convert, encoded in UTF-8 (no normal form required)
1004
 
* @return       string                          UTF-8 string where NCRs have been replaced with the actual chars
1005
 
*/
1006
 
function utf8_decode_ncr($text)
1007
 
{
1008
 
        return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
1009
 
}
1010
 
 
1011
 
/**
1012
 
* Callback used in decode_ncr()
1013
 
*
1014
 
* Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
1015
 
* It will ignore most of invalid NCRs, but not all!
1016
 
*
1017
 
* @param        array   $m                      0-based numerically indexed array passed by preg_replace_callback()
1018
 
* @return       string                          UTF-8 char
1019
 
*/
1020
 
function utf8_decode_ncr_callback($m)
1021
 
{
1022
 
        $cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
1023
 
 
1024
 
        return utf8_chr($cp);
1025
 
}
1026
 
 
1027
 
/**
1028
 
* Case folds a unicode string as per Unicode 5.0, section 3.13
1029
 
*
1030
 
* @param        string  $text   text to be case folded
1031
 
* @param        string  $option determines how we will fold the cases
1032
 
* @return       string                  case folded text
1033
 
*/
1034
 
function utf8_case_fold($text, $option = 'full')
1035
 
{
1036
 
        static $uniarray = array();
1037
 
        global $phpbb_root_path, $phpEx;
1038
 
 
1039
 
        // common is always set
1040
 
        if (!isset($uniarray['c']))
1041
 
        {
1042
 
                $uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
1043
 
        }
1044
 
 
1045
 
        // only set full if we need to
1046
 
        if ($option === 'full' && !isset($uniarray['f']))
1047
 
        {
1048
 
                $uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
1049
 
        }
1050
 
 
1051
 
        // only set simple if we need to
1052
 
        if ($option !== 'full' && !isset($uniarray['s']))
1053
 
        {
1054
 
                $uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
1055
 
        }
1056
 
 
1057
 
        // common is always replaced
1058
 
        $text = strtr($text, $uniarray['c']);
1059
 
 
1060
 
        if ($option === 'full')
1061
 
        {
1062
 
                // full replaces a character with multiple characters
1063
 
                $text = strtr($text, $uniarray['f']);
1064
 
        }
1065
 
        else
1066
 
        {
1067
 
                // simple replaces a character with another character
1068
 
                $text = strtr($text, $uniarray['s']);
1069
 
        }
1070
 
 
1071
 
        return $text;
1072
 
}
1073
 
 
1074
 
/**
1075
 
* Takes the input and does a "special" case fold. It does minor normalization
1076
 
* and returns NFKC compatable text
1077
 
*
1078
 
* @param        string  $text   text to be case folded
1079
 
* @param        string  $option determines how we will fold the cases
1080
 
* @return       string                  case folded text
1081
 
*/
1082
 
function utf8_case_fold_nfkc($text, $option = 'full')
1083
 
{
1084
 
        static $fc_nfkc_closure = array(
1085
 
                "\xCD\xBA"      => "\x20\xCE\xB9",
1086
 
                "\xCF\x92"      => "\xCF\x85",
1087
 
                "\xCF\x93"      => "\xCF\x8D",
1088
 
                "\xCF\x94"      => "\xCF\x8B",
1089
 
                "\xCF\xB2"      => "\xCF\x83",
1090
 
                "\xCF\xB9"      => "\xCF\x83",
1091
 
                "\xE1\xB4\xAC"  => "\x61",
1092
 
                "\xE1\xB4\xAD"  => "\xC3\xA6",
1093
 
                "\xE1\xB4\xAE"  => "\x62",
1094
 
                "\xE1\xB4\xB0"  => "\x64",
1095
 
                "\xE1\xB4\xB1"  => "\x65",
1096
 
                "\xE1\xB4\xB2"  => "\xC7\x9D",
1097
 
                "\xE1\xB4\xB3"  => "\x67",
1098
 
                "\xE1\xB4\xB4"  => "\x68",
1099
 
                "\xE1\xB4\xB5"  => "\x69",
1100
 
                "\xE1\xB4\xB6"  => "\x6A",
1101
 
                "\xE1\xB4\xB7"  => "\x6B",
1102
 
                "\xE1\xB4\xB8"  => "\x6C",
1103
 
                "\xE1\xB4\xB9"  => "\x6D",
1104
 
                "\xE1\xB4\xBA"  => "\x6E",
1105
 
                "\xE1\xB4\xBC"  => "\x6F",
1106
 
                "\xE1\xB4\xBD"  => "\xC8\xA3",
1107
 
                "\xE1\xB4\xBE"  => "\x70",
1108
 
                "\xE1\xB4\xBF"  => "\x72",
1109
 
                "\xE1\xB5\x80"  => "\x74",
1110
 
                "\xE1\xB5\x81"  => "\x75",
1111
 
                "\xE1\xB5\x82"  => "\x77",
1112
 
                "\xE2\x82\xA8"  => "\x72\x73",
1113
 
                "\xE2\x84\x82"  => "\x63",
1114
 
                "\xE2\x84\x83"  => "\xC2\xB0\x63",
1115
 
                "\xE2\x84\x87"  => "\xC9\x9B",
1116
 
                "\xE2\x84\x89"  => "\xC2\xB0\x66",
1117
 
                "\xE2\x84\x8B"  => "\x68",
1118
 
                "\xE2\x84\x8C"  => "\x68",
1119
 
                "\xE2\x84\x8D"  => "\x68",
1120
 
                "\xE2\x84\x90"  => "\x69",
1121
 
                "\xE2\x84\x91"  => "\x69",
1122
 
                "\xE2\x84\x92"  => "\x6C",
1123
 
                "\xE2\x84\x95"  => "\x6E",
1124
 
                "\xE2\x84\x96"  => "\x6E\x6F",
1125
 
                "\xE2\x84\x99"  => "\x70",
1126
 
                "\xE2\x84\x9A"  => "\x71",
1127
 
                "\xE2\x84\x9B"  => "\x72",
1128
 
                "\xE2\x84\x9C"  => "\x72",
1129
 
                "\xE2\x84\x9D"  => "\x72",
1130
 
                "\xE2\x84\xA0"  => "\x73\x6D",
1131
 
                "\xE2\x84\xA1"  => "\x74\x65\x6C",
1132
 
                "\xE2\x84\xA2"  => "\x74\x6D",
1133
 
                "\xE2\x84\xA4"  => "\x7A",
1134
 
                "\xE2\x84\xA8"  => "\x7A",
1135
 
                "\xE2\x84\xAC"  => "\x62",
1136
 
                "\xE2\x84\xAD"  => "\x63",
1137
 
                "\xE2\x84\xB0"  => "\x65",
1138
 
                "\xE2\x84\xB1"  => "\x66",
1139
 
                "\xE2\x84\xB3"  => "\x6D",
1140
 
                "\xE2\x84\xBB"  => "\x66\x61\x78",
1141
 
                "\xE2\x84\xBE"  => "\xCE\xB3",
1142
 
                "\xE2\x84\xBF"  => "\xCF\x80",
1143
 
                "\xE2\x85\x85"  => "\x64",
1144
 
                "\xE3\x89\x90"  => "\x70\x74\x65",
1145
 
                "\xE3\x8B\x8C"  => "\x68\x67",
1146
 
                "\xE3\x8B\x8E"  => "\x65\x76",
1147
 
                "\xE3\x8B\x8F"  => "\x6C\x74\x64",
1148
 
                "\xE3\x8D\xB1"  => "\x68\x70\x61",
1149
 
                "\xE3\x8D\xB3"  => "\x61\x75",
1150
 
                "\xE3\x8D\xB5"  => "\x6F\x76",
1151
 
                "\xE3\x8D\xBA"  => "\x69\x75",
1152
 
                "\xE3\x8E\x80"  => "\x70\x61",
1153
 
                "\xE3\x8E\x81"  => "\x6E\x61",
1154
 
                "\xE3\x8E\x82"  => "\xCE\xBC\x61",
1155
 
                "\xE3\x8E\x83"  => "\x6D\x61",
1156
 
                "\xE3\x8E\x84"  => "\x6B\x61",
1157
 
                "\xE3\x8E\x85"  => "\x6B\x62",
1158
 
                "\xE3\x8E\x86"  => "\x6D\x62",
1159
 
                "\xE3\x8E\x87"  => "\x67\x62",
1160
 
                "\xE3\x8E\x8A"  => "\x70\x66",
1161
 
                "\xE3\x8E\x8B"  => "\x6E\x66",
1162
 
                "\xE3\x8E\x8C"  => "\xCE\xBC\x66",
1163
 
                "\xE3\x8E\x90"  => "\x68\x7A",
1164
 
                "\xE3\x8E\x91"  => "\x6B\x68\x7A",
1165
 
                "\xE3\x8E\x92"  => "\x6D\x68\x7A",
1166
 
                "\xE3\x8E\x93"  => "\x67\x68\x7A",
1167
 
                "\xE3\x8E\x94"  => "\x74\x68\x7A",
1168
 
                "\xE3\x8E\xA9"  => "\x70\x61",
1169
 
                "\xE3\x8E\xAA"  => "\x6B\x70\x61",
1170
 
                "\xE3\x8E\xAB"  => "\x6D\x70\x61",
1171
 
                "\xE3\x8E\xAC"  => "\x67\x70\x61",
1172
 
                "\xE3\x8E\xB4"  => "\x70\x76",
1173
 
                "\xE3\x8E\xB5"  => "\x6E\x76",
1174
 
                "\xE3\x8E\xB6"  => "\xCE\xBC\x76",
1175
 
                "\xE3\x8E\xB7"  => "\x6D\x76",
1176
 
                "\xE3\x8E\xB8"  => "\x6B\x76",
1177
 
                "\xE3\x8E\xB9"  => "\x6D\x76",
1178
 
                "\xE3\x8E\xBA"  => "\x70\x77",
1179
 
                "\xE3\x8E\xBB"  => "\x6E\x77",
1180
 
                "\xE3\x8E\xBC"  => "\xCE\xBC\x77",
1181
 
                "\xE3\x8E\xBD"  => "\x6D\x77",
1182
 
                "\xE3\x8E\xBE"  => "\x6B\x77",
1183
 
                "\xE3\x8E\xBF"  => "\x6D\x77",
1184
 
                "\xE3\x8F\x80"  => "\x6B\xCF\x89",
1185
 
                "\xE3\x8F\x81"  => "\x6D\xCF\x89",
1186
 
                "\xE3\x8F\x83"  => "\x62\x71",
1187
 
                "\xE3\x8F\x86"  => "\x63\xE2\x88\x95\x6B\x67",
1188
 
                "\xE3\x8F\x87"  => "\x63\x6F\x2E",
1189
 
                "\xE3\x8F\x88"  => "\x64\x62",
1190
 
                "\xE3\x8F\x89"  => "\x67\x79",
1191
 
                "\xE3\x8F\x8B"  => "\x68\x70",
1192
 
                "\xE3\x8F\x8D"  => "\x6B\x6B",
1193
 
                "\xE3\x8F\x8E"  => "\x6B\x6D",
1194
 
                "\xE3\x8F\x97"  => "\x70\x68",
1195
 
                "\xE3\x8F\x99"  => "\x70\x70\x6D",
1196
 
                "\xE3\x8F\x9A"  => "\x70\x72",
1197
 
                "\xE3\x8F\x9C"  => "\x73\x76",
1198
 
                "\xE3\x8F\x9D"  => "\x77\x62",
1199
 
                "\xE3\x8F\x9E"  => "\x76\xE2\x88\x95\x6D",
1200
 
                "\xE3\x8F\x9F"  => "\x61\xE2\x88\x95\x6D",
1201
 
                "\xF0\x9D\x90\x80"      => "\x61",
1202
 
                "\xF0\x9D\x90\x81"      => "\x62",
1203
 
                "\xF0\x9D\x90\x82"      => "\x63",
1204
 
                "\xF0\x9D\x90\x83"      => "\x64",
1205
 
                "\xF0\x9D\x90\x84"      => "\x65",
1206
 
                "\xF0\x9D\x90\x85"      => "\x66",
1207
 
                "\xF0\x9D\x90\x86"      => "\x67",
1208
 
                "\xF0\x9D\x90\x87"      => "\x68",
1209
 
                "\xF0\x9D\x90\x88"      => "\x69",
1210
 
                "\xF0\x9D\x90\x89"      => "\x6A",
1211
 
                "\xF0\x9D\x90\x8A"      => "\x6B",
1212
 
                "\xF0\x9D\x90\x8B"      => "\x6C",
1213
 
                "\xF0\x9D\x90\x8C"      => "\x6D",
1214
 
                "\xF0\x9D\x90\x8D"      => "\x6E",
1215
 
                "\xF0\x9D\x90\x8E"      => "\x6F",
1216
 
                "\xF0\x9D\x90\x8F"      => "\x70",
1217
 
                "\xF0\x9D\x90\x90"      => "\x71",
1218
 
                "\xF0\x9D\x90\x91"      => "\x72",
1219
 
                "\xF0\x9D\x90\x92"      => "\x73",
1220
 
                "\xF0\x9D\x90\x93"      => "\x74",
1221
 
                "\xF0\x9D\x90\x94"      => "\x75",
1222
 
                "\xF0\x9D\x90\x95"      => "\x76",
1223
 
                "\xF0\x9D\x90\x96"      => "\x77",
1224
 
                "\xF0\x9D\x90\x97"      => "\x78",
1225
 
                "\xF0\x9D\x90\x98"      => "\x79",
1226
 
                "\xF0\x9D\x90\x99"      => "\x7A",
1227
 
                "\xF0\x9D\x90\xB4"      => "\x61",
1228
 
                "\xF0\x9D\x90\xB5"      => "\x62",
1229
 
                "\xF0\x9D\x90\xB6"      => "\x63",
1230
 
                "\xF0\x9D\x90\xB7"      => "\x64",
1231
 
                "\xF0\x9D\x90\xB8"      => "\x65",
1232
 
                "\xF0\x9D\x90\xB9"      => "\x66",
1233
 
                "\xF0\x9D\x90\xBA"      => "\x67",
1234
 
                "\xF0\x9D\x90\xBB"      => "\x68",
1235
 
                "\xF0\x9D\x90\xBC"      => "\x69",
1236
 
                "\xF0\x9D\x90\xBD"      => "\x6A",
1237
 
                "\xF0\x9D\x90\xBE"      => "\x6B",
1238
 
                "\xF0\x9D\x90\xBF"      => "\x6C",
1239
 
                "\xF0\x9D\x91\x80"      => "\x6D",
1240
 
                "\xF0\x9D\x91\x81"      => "\x6E",
1241
 
                "\xF0\x9D\x91\x82"      => "\x6F",
1242
 
                "\xF0\x9D\x91\x83"      => "\x70",
1243
 
                "\xF0\x9D\x91\x84"      => "\x71",
1244
 
                "\xF0\x9D\x91\x85"      => "\x72",
1245
 
                "\xF0\x9D\x91\x86"      => "\x73",
1246
 
                "\xF0\x9D\x91\x87"      => "\x74",
1247
 
                "\xF0\x9D\x91\x88"      => "\x75",
1248
 
                "\xF0\x9D\x91\x89"      => "\x76",
1249
 
                "\xF0\x9D\x91\x8A"      => "\x77",
1250
 
                "\xF0\x9D\x91\x8B"      => "\x78",
1251
 
                "\xF0\x9D\x91\x8C"      => "\x79",
1252
 
                "\xF0\x9D\x91\x8D"      => "\x7A",
1253
 
                "\xF0\x9D\x91\xA8"      => "\x61",
1254
 
                "\xF0\x9D\x91\xA9"      => "\x62",
1255
 
                "\xF0\x9D\x91\xAA"      => "\x63",
1256
 
                "\xF0\x9D\x91\xAB"      => "\x64",
1257
 
                "\xF0\x9D\x91\xAC"      => "\x65",
1258
 
                "\xF0\x9D\x91\xAD"      => "\x66",
1259
 
                "\xF0\x9D\x91\xAE"      => "\x67",
1260
 
                "\xF0\x9D\x91\xAF"      => "\x68",
1261
 
                "\xF0\x9D\x91\xB0"      => "\x69",
1262
 
                "\xF0\x9D\x91\xB1"      => "\x6A",
1263
 
                "\xF0\x9D\x91\xB2"      => "\x6B",
1264
 
                "\xF0\x9D\x91\xB3"      => "\x6C",
1265
 
                "\xF0\x9D\x91\xB4"      => "\x6D",
1266
 
                "\xF0\x9D\x91\xB5"      => "\x6E",
1267
 
                "\xF0\x9D\x91\xB6"      => "\x6F",
1268
 
                "\xF0\x9D\x91\xB7"      => "\x70",
1269
 
                "\xF0\x9D\x91\xB8"      => "\x71",
1270
 
                "\xF0\x9D\x91\xB9"      => "\x72",
1271
 
                "\xF0\x9D\x91\xBA"      => "\x73",
1272
 
                "\xF0\x9D\x91\xBB"      => "\x74",
1273
 
                "\xF0\x9D\x91\xBC"      => "\x75",
1274
 
                "\xF0\x9D\x91\xBD"      => "\x76",
1275
 
                "\xF0\x9D\x91\xBE"      => "\x77",
1276
 
                "\xF0\x9D\x91\xBF"      => "\x78",
1277
 
                "\xF0\x9D\x92\x80"      => "\x79",
1278
 
                "\xF0\x9D\x92\x81"      => "\x7A",
1279
 
                "\xF0\x9D\x92\x9C"      => "\x61",
1280
 
                "\xF0\x9D\x92\x9E"      => "\x63",
1281
 
                "\xF0\x9D\x92\x9F"      => "\x64",
1282
 
                "\xF0\x9D\x92\xA2"      => "\x67",
1283
 
                "\xF0\x9D\x92\xA5"      => "\x6A",
1284
 
                "\xF0\x9D\x92\xA6"      => "\x6B",
1285
 
                "\xF0\x9D\x92\xA9"      => "\x6E",
1286
 
                "\xF0\x9D\x92\xAA"      => "\x6F",
1287
 
                "\xF0\x9D\x92\xAB"      => "\x70",
1288
 
                "\xF0\x9D\x92\xAC"      => "\x71",
1289
 
                "\xF0\x9D\x92\xAE"      => "\x73",
1290
 
                "\xF0\x9D\x92\xAF"      => "\x74",
1291
 
                "\xF0\x9D\x92\xB0"      => "\x75",
1292
 
                "\xF0\x9D\x92\xB1"      => "\x76",
1293
 
                "\xF0\x9D\x92\xB2"      => "\x77",
1294
 
                "\xF0\x9D\x92\xB3"      => "\x78",
1295
 
                "\xF0\x9D\x92\xB4"      => "\x79",
1296
 
                "\xF0\x9D\x92\xB5"      => "\x7A",
1297
 
                "\xF0\x9D\x93\x90"      => "\x61",
1298
 
                "\xF0\x9D\x93\x91"      => "\x62",
1299
 
                "\xF0\x9D\x93\x92"      => "\x63",
1300
 
                "\xF0\x9D\x93\x93"      => "\x64",
1301
 
                "\xF0\x9D\x93\x94"      => "\x65",
1302
 
                "\xF0\x9D\x93\x95"      => "\x66",
1303
 
                "\xF0\x9D\x93\x96"      => "\x67",
1304
 
                "\xF0\x9D\x93\x97"      => "\x68",
1305
 
                "\xF0\x9D\x93\x98"      => "\x69",
1306
 
                "\xF0\x9D\x93\x99"      => "\x6A",
1307
 
                "\xF0\x9D\x93\x9A"      => "\x6B",
1308
 
                "\xF0\x9D\x93\x9B"      => "\x6C",
1309
 
                "\xF0\x9D\x93\x9C"      => "\x6D",
1310
 
                "\xF0\x9D\x93\x9D"      => "\x6E",
1311
 
                "\xF0\x9D\x93\x9E"      => "\x6F",
1312
 
                "\xF0\x9D\x93\x9F"      => "\x70",
1313
 
                "\xF0\x9D\x93\xA0"      => "\x71",
1314
 
                "\xF0\x9D\x93\xA1"      => "\x72",
1315
 
                "\xF0\x9D\x93\xA2"      => "\x73",
1316
 
                "\xF0\x9D\x93\xA3"      => "\x74",
1317
 
                "\xF0\x9D\x93\xA4"      => "\x75",
1318
 
                "\xF0\x9D\x93\xA5"      => "\x76",
1319
 
                "\xF0\x9D\x93\xA6"      => "\x77",
1320
 
                "\xF0\x9D\x93\xA7"      => "\x78",
1321
 
                "\xF0\x9D\x93\xA8"      => "\x79",
1322
 
                "\xF0\x9D\x93\xA9"      => "\x7A",
1323
 
                "\xF0\x9D\x94\x84"      => "\x61",
1324
 
                "\xF0\x9D\x94\x85"      => "\x62",
1325
 
                "\xF0\x9D\x94\x87"      => "\x64",
1326
 
                "\xF0\x9D\x94\x88"      => "\x65",
1327
 
                "\xF0\x9D\x94\x89"      => "\x66",
1328
 
                "\xF0\x9D\x94\x8A"      => "\x67",
1329
 
                "\xF0\x9D\x94\x8D"      => "\x6A",
1330
 
                "\xF0\x9D\x94\x8E"      => "\x6B",
1331
 
                "\xF0\x9D\x94\x8F"      => "\x6C",
1332
 
                "\xF0\x9D\x94\x90"      => "\x6D",
1333
 
                "\xF0\x9D\x94\x91"      => "\x6E",
1334
 
                "\xF0\x9D\x94\x92"      => "\x6F",
1335
 
                "\xF0\x9D\x94\x93"      => "\x70",
1336
 
                "\xF0\x9D\x94\x94"      => "\x71",
1337
 
                "\xF0\x9D\x94\x96"      => "\x73",
1338
 
                "\xF0\x9D\x94\x97"      => "\x74",
1339
 
                "\xF0\x9D\x94\x98"      => "\x75",
1340
 
                "\xF0\x9D\x94\x99"      => "\x76",
1341
 
                "\xF0\x9D\x94\x9A"      => "\x77",
1342
 
                "\xF0\x9D\x94\x9B"      => "\x78",
1343
 
                "\xF0\x9D\x94\x9C"      => "\x79",
1344
 
                "\xF0\x9D\x94\xB8"      => "\x61",
1345
 
                "\xF0\x9D\x94\xB9"      => "\x62",
1346
 
                "\xF0\x9D\x94\xBB"      => "\x64",
1347
 
                "\xF0\x9D\x94\xBC"      => "\x65",
1348
 
                "\xF0\x9D\x94\xBD"      => "\x66",
1349
 
                "\xF0\x9D\x94\xBE"      => "\x67",
1350
 
                "\xF0\x9D\x95\x80"      => "\x69",
1351
 
                "\xF0\x9D\x95\x81"      => "\x6A",
1352
 
                "\xF0\x9D\x95\x82"      => "\x6B",
1353
 
                "\xF0\x9D\x95\x83"      => "\x6C",
1354
 
                "\xF0\x9D\x95\x84"      => "\x6D",
1355
 
                "\xF0\x9D\x95\x86"      => "\x6F",
1356
 
                "\xF0\x9D\x95\x8A"      => "\x73",
1357
 
                "\xF0\x9D\x95\x8B"      => "\x74",
1358
 
                "\xF0\x9D\x95\x8C"      => "\x75",
1359
 
                "\xF0\x9D\x95\x8D"      => "\x76",
1360
 
                "\xF0\x9D\x95\x8E"      => "\x77",
1361
 
                "\xF0\x9D\x95\x8F"      => "\x78",
1362
 
                "\xF0\x9D\x95\x90"      => "\x79",
1363
 
                "\xF0\x9D\x95\xAC"      => "\x61",
1364
 
                "\xF0\x9D\x95\xAD"      => "\x62",
1365
 
                "\xF0\x9D\x95\xAE"      => "\x63",
1366
 
                "\xF0\x9D\x95\xAF"      => "\x64",
1367
 
                "\xF0\x9D\x95\xB0"      => "\x65",
1368
 
                "\xF0\x9D\x95\xB1"      => "\x66",
1369
 
                "\xF0\x9D\x95\xB2"      => "\x67",
1370
 
                "\xF0\x9D\x95\xB3"      => "\x68",
1371
 
                "\xF0\x9D\x95\xB4"      => "\x69",
1372
 
                "\xF0\x9D\x95\xB5"      => "\x6A",
1373
 
                "\xF0\x9D\x95\xB6"      => "\x6B",
1374
 
                "\xF0\x9D\x95\xB7"      => "\x6C",
1375
 
                "\xF0\x9D\x95\xB8"      => "\x6D",
1376
 
                "\xF0\x9D\x95\xB9"      => "\x6E",
1377
 
                "\xF0\x9D\x95\xBA"      => "\x6F",
1378
 
                "\xF0\x9D\x95\xBB"      => "\x70",
1379
 
                "\xF0\x9D\x95\xBC"      => "\x71",
1380
 
                "\xF0\x9D\x95\xBD"      => "\x72",
1381
 
                "\xF0\x9D\x95\xBE"      => "\x73",
1382
 
                "\xF0\x9D\x95\xBF"      => "\x74",
1383
 
                "\xF0\x9D\x96\x80"      => "\x75",
1384
 
                "\xF0\x9D\x96\x81"      => "\x76",
1385
 
                "\xF0\x9D\x96\x82"      => "\x77",
1386
 
                "\xF0\x9D\x96\x83"      => "\x78",
1387
 
                "\xF0\x9D\x96\x84"      => "\x79",
1388
 
                "\xF0\x9D\x96\x85"      => "\x7A",
1389
 
                "\xF0\x9D\x96\xA0"      => "\x61",
1390
 
                "\xF0\x9D\x96\xA1"      => "\x62",
1391
 
                "\xF0\x9D\x96\xA2"      => "\x63",
1392
 
                "\xF0\x9D\x96\xA3"      => "\x64",
1393
 
                "\xF0\x9D\x96\xA4"      => "\x65",
1394
 
                "\xF0\x9D\x96\xA5"      => "\x66",
1395
 
                "\xF0\x9D\x96\xA6"      => "\x67",
1396
 
                "\xF0\x9D\x96\xA7"      => "\x68",
1397
 
                "\xF0\x9D\x96\xA8"      => "\x69",
1398
 
                "\xF0\x9D\x96\xA9"      => "\x6A",
1399
 
                "\xF0\x9D\x96\xAA"      => "\x6B",
1400
 
                "\xF0\x9D\x96\xAB"      => "\x6C",
1401
 
                "\xF0\x9D\x96\xAC"      => "\x6D",
1402
 
                "\xF0\x9D\x96\xAD"      => "\x6E",
1403
 
                "\xF0\x9D\x96\xAE"      => "\x6F",
1404
 
                "\xF0\x9D\x96\xAF"      => "\x70",
1405
 
                "\xF0\x9D\x96\xB0"      => "\x71",
1406
 
                "\xF0\x9D\x96\xB1"      => "\x72",
1407
 
                "\xF0\x9D\x96\xB2"      => "\x73",
1408
 
                "\xF0\x9D\x96\xB3"      => "\x74",
1409
 
                "\xF0\x9D\x96\xB4"      => "\x75",
1410
 
                "\xF0\x9D\x96\xB5"      => "\x76",
1411
 
                "\xF0\x9D\x96\xB6"      => "\x77",
1412
 
                "\xF0\x9D\x96\xB7"      => "\x78",
1413
 
                "\xF0\x9D\x96\xB8"      => "\x79",
1414
 
                "\xF0\x9D\x96\xB9"      => "\x7A",
1415
 
                "\xF0\x9D\x97\x94"      => "\x61",
1416
 
                "\xF0\x9D\x97\x95"      => "\x62",
1417
 
                "\xF0\x9D\x97\x96"      => "\x63",
1418
 
                "\xF0\x9D\x97\x97"      => "\x64",
1419
 
                "\xF0\x9D\x97\x98"      => "\x65",
1420
 
                "\xF0\x9D\x97\x99"      => "\x66",
1421
 
                "\xF0\x9D\x97\x9A"      => "\x67",
1422
 
                "\xF0\x9D\x97\x9B"      => "\x68",
1423
 
                "\xF0\x9D\x97\x9C"      => "\x69",
1424
 
                "\xF0\x9D\x97\x9D"      => "\x6A",
1425
 
                "\xF0\x9D\x97\x9E"      => "\x6B",
1426
 
                "\xF0\x9D\x97\x9F"      => "\x6C",
1427
 
                "\xF0\x9D\x97\xA0"      => "\x6D",
1428
 
                "\xF0\x9D\x97\xA1"      => "\x6E",
1429
 
                "\xF0\x9D\x97\xA2"      => "\x6F",
1430
 
                "\xF0\x9D\x97\xA3"      => "\x70",
1431
 
                "\xF0\x9D\x97\xA4"      => "\x71",
1432
 
                "\xF0\x9D\x97\xA5"      => "\x72",
1433
 
                "\xF0\x9D\x97\xA6"      => "\x73",
1434
 
                "\xF0\x9D\x97\xA7"      => "\x74",
1435
 
                "\xF0\x9D\x97\xA8"      => "\x75",
1436
 
                "\xF0\x9D\x97\xA9"      => "\x76",
1437
 
                "\xF0\x9D\x97\xAA"      => "\x77",
1438
 
                "\xF0\x9D\x97\xAB"      => "\x78",
1439
 
                "\xF0\x9D\x97\xAC"      => "\x79",
1440
 
                "\xF0\x9D\x97\xAD"      => "\x7A",
1441
 
                "\xF0\x9D\x98\x88"      => "\x61",
1442
 
                "\xF0\x9D\x98\x89"      => "\x62",
1443
 
                "\xF0\x9D\x98\x8A"      => "\x63",
1444
 
                "\xF0\x9D\x98\x8B"      => "\x64",
1445
 
                "\xF0\x9D\x98\x8C"      => "\x65",
1446
 
                "\xF0\x9D\x98\x8D"      => "\x66",
1447
 
                "\xF0\x9D\x98\x8E"      => "\x67",
1448
 
                "\xF0\x9D\x98\x8F"      => "\x68",
1449
 
                "\xF0\x9D\x98\x90"      => "\x69",
1450
 
                "\xF0\x9D\x98\x91"      => "\x6A",
1451
 
                "\xF0\x9D\x98\x92"      => "\x6B",
1452
 
                "\xF0\x9D\x98\x93"      => "\x6C",
1453
 
                "\xF0\x9D\x98\x94"      => "\x6D",
1454
 
                "\xF0\x9D\x98\x95"      => "\x6E",
1455
 
                "\xF0\x9D\x98\x96"      => "\x6F",
1456
 
                "\xF0\x9D\x98\x97"      => "\x70",
1457
 
                "\xF0\x9D\x98\x98"      => "\x71",
1458
 
                "\xF0\x9D\x98\x99"      => "\x72",
1459
 
                "\xF0\x9D\x98\x9A"      => "\x73",
1460
 
                "\xF0\x9D\x98\x9B"      => "\x74",
1461
 
                "\xF0\x9D\x98\x9C"      => "\x75",
1462
 
                "\xF0\x9D\x98\x9D"      => "\x76",
1463
 
                "\xF0\x9D\x98\x9E"      => "\x77",
1464
 
                "\xF0\x9D\x98\x9F"      => "\x78",
1465
 
                "\xF0\x9D\x98\xA0"      => "\x79",
1466
 
                "\xF0\x9D\x98\xA1"      => "\x7A",
1467
 
                "\xF0\x9D\x98\xBC"      => "\x61",
1468
 
                "\xF0\x9D\x98\xBD"      => "\x62",
1469
 
                "\xF0\x9D\x98\xBE"      => "\x63",
1470
 
                "\xF0\x9D\x98\xBF"      => "\x64",
1471
 
                "\xF0\x9D\x99\x80"      => "\x65",
1472
 
                "\xF0\x9D\x99\x81"      => "\x66",
1473
 
                "\xF0\x9D\x99\x82"      => "\x67",
1474
 
                "\xF0\x9D\x99\x83"      => "\x68",
1475
 
                "\xF0\x9D\x99\x84"      => "\x69",
1476
 
                "\xF0\x9D\x99\x85"      => "\x6A",
1477
 
                "\xF0\x9D\x99\x86"      => "\x6B",
1478
 
                "\xF0\x9D\x99\x87"      => "\x6C",
1479
 
                "\xF0\x9D\x99\x88"      => "\x6D",
1480
 
                "\xF0\x9D\x99\x89"      => "\x6E",
1481
 
                "\xF0\x9D\x99\x8A"      => "\x6F",
1482
 
                "\xF0\x9D\x99\x8B"      => "\x70",
1483
 
                "\xF0\x9D\x99\x8C"      => "\x71",
1484
 
                "\xF0\x9D\x99\x8D"      => "\x72",
1485
 
                "\xF0\x9D\x99\x8E"      => "\x73",
1486
 
                "\xF0\x9D\x99\x8F"      => "\x74",
1487
 
                "\xF0\x9D\x99\x90"      => "\x75",
1488
 
                "\xF0\x9D\x99\x91"      => "\x76",
1489
 
                "\xF0\x9D\x99\x92"      => "\x77",
1490
 
                "\xF0\x9D\x99\x93"      => "\x78",
1491
 
                "\xF0\x9D\x99\x94"      => "\x79",
1492
 
                "\xF0\x9D\x99\x95"      => "\x7A",
1493
 
                "\xF0\x9D\x99\xB0"      => "\x61",
1494
 
                "\xF0\x9D\x99\xB1"      => "\x62",
1495
 
                "\xF0\x9D\x99\xB2"      => "\x63",
1496
 
                "\xF0\x9D\x99\xB3"      => "\x64",
1497
 
                "\xF0\x9D\x99\xB4"      => "\x65",
1498
 
                "\xF0\x9D\x99\xB5"      => "\x66",
1499
 
                "\xF0\x9D\x99\xB6"      => "\x67",
1500
 
                "\xF0\x9D\x99\xB7"      => "\x68",
1501
 
                "\xF0\x9D\x99\xB8"      => "\x69",
1502
 
                "\xF0\x9D\x99\xB9"      => "\x6A",
1503
 
                "\xF0\x9D\x99\xBA"      => "\x6B",
1504
 
                "\xF0\x9D\x99\xBB"      => "\x6C",
1505
 
                "\xF0\x9D\x99\xBC"      => "\x6D",
1506
 
                "\xF0\x9D\x99\xBD"      => "\x6E",
1507
 
                "\xF0\x9D\x99\xBE"      => "\x6F",
1508
 
                "\xF0\x9D\x99\xBF"      => "\x70",
1509
 
                "\xF0\x9D\x9A\x80"      => "\x71",
1510
 
                "\xF0\x9D\x9A\x81"      => "\x72",
1511
 
                "\xF0\x9D\x9A\x82"      => "\x73",
1512
 
                "\xF0\x9D\x9A\x83"      => "\x74",
1513
 
                "\xF0\x9D\x9A\x84"      => "\x75",
1514
 
                "\xF0\x9D\x9A\x85"      => "\x76",
1515
 
                "\xF0\x9D\x9A\x86"      => "\x77",
1516
 
                "\xF0\x9D\x9A\x87"      => "\x78",
1517
 
                "\xF0\x9D\x9A\x88"      => "\x79",
1518
 
                "\xF0\x9D\x9A\x89"      => "\x7A",
1519
 
                "\xF0\x9D\x9A\xA8"      => "\xCE\xB1",
1520
 
                "\xF0\x9D\x9A\xA9"      => "\xCE\xB2",
1521
 
                "\xF0\x9D\x9A\xAA"      => "\xCE\xB3",
1522
 
                "\xF0\x9D\x9A\xAB"      => "\xCE\xB4",
1523
 
                "\xF0\x9D\x9A\xAC"      => "\xCE\xB5",
1524
 
                "\xF0\x9D\x9A\xAD"      => "\xCE\xB6",
1525
 
                "\xF0\x9D\x9A\xAE"      => "\xCE\xB7",
1526
 
                "\xF0\x9D\x9A\xAF"      => "\xCE\xB8",
1527
 
                "\xF0\x9D\x9A\xB0"      => "\xCE\xB9",
1528
 
                "\xF0\x9D\x9A\xB1"      => "\xCE\xBA",
1529
 
                "\xF0\x9D\x9A\xB2"      => "\xCE\xBB",
1530
 
                "\xF0\x9D\x9A\xB3"      => "\xCE\xBC",
1531
 
                "\xF0\x9D\x9A\xB4"      => "\xCE\xBD",
1532
 
                "\xF0\x9D\x9A\xB5"      => "\xCE\xBE",
1533
 
                "\xF0\x9D\x9A\xB6"      => "\xCE\xBF",
1534
 
                "\xF0\x9D\x9A\xB7"      => "\xCF\x80",
1535
 
                "\xF0\x9D\x9A\xB8"      => "\xCF\x81",
1536
 
                "\xF0\x9D\x9A\xB9"      => "\xCE\xB8",
1537
 
                "\xF0\x9D\x9A\xBA"      => "\xCF\x83",
1538
 
                "\xF0\x9D\x9A\xBB"      => "\xCF\x84",
1539
 
                "\xF0\x9D\x9A\xBC"      => "\xCF\x85",
1540
 
                "\xF0\x9D\x9A\xBD"      => "\xCF\x86",
1541
 
                "\xF0\x9D\x9A\xBE"      => "\xCF\x87",
1542
 
                "\xF0\x9D\x9A\xBF"      => "\xCF\x88",
1543
 
                "\xF0\x9D\x9B\x80"      => "\xCF\x89",
1544
 
                "\xF0\x9D\x9B\x93"      => "\xCF\x83",
1545
 
                "\xF0\x9D\x9B\xA2"      => "\xCE\xB1",
1546
 
                "\xF0\x9D\x9B\xA3"      => "\xCE\xB2",
1547
 
                "\xF0\x9D\x9B\xA4"      => "\xCE\xB3",
1548
 
                "\xF0\x9D\x9B\xA5"      => "\xCE\xB4",
1549
 
                "\xF0\x9D\x9B\xA6"      => "\xCE\xB5",
1550
 
                "\xF0\x9D\x9B\xA7"      => "\xCE\xB6",
1551
 
                "\xF0\x9D\x9B\xA8"      => "\xCE\xB7",
1552
 
                "\xF0\x9D\x9B\xA9"      => "\xCE\xB8",
1553
 
                "\xF0\x9D\x9B\xAA"      => "\xCE\xB9",
1554
 
                "\xF0\x9D\x9B\xAB"      => "\xCE\xBA",
1555
 
                "\xF0\x9D\x9B\xAC"      => "\xCE\xBB",
1556
 
                "\xF0\x9D\x9B\xAD"      => "\xCE\xBC",
1557
 
                "\xF0\x9D\x9B\xAE"      => "\xCE\xBD",
1558
 
                "\xF0\x9D\x9B\xAF"      => "\xCE\xBE",
1559
 
                "\xF0\x9D\x9B\xB0"      => "\xCE\xBF",
1560
 
                "\xF0\x9D\x9B\xB1"      => "\xCF\x80",
1561
 
                "\xF0\x9D\x9B\xB2"      => "\xCF\x81",
1562
 
                "\xF0\x9D\x9B\xB3"      => "\xCE\xB8",
1563
 
                "\xF0\x9D\x9B\xB4"      => "\xCF\x83",
1564
 
                "\xF0\x9D\x9B\xB5"      => "\xCF\x84",
1565
 
                "\xF0\x9D\x9B\xB6"      => "\xCF\x85",
1566
 
                "\xF0\x9D\x9B\xB7"      => "\xCF\x86",
1567
 
                "\xF0\x9D\x9B\xB8"      => "\xCF\x87",
1568
 
                "\xF0\x9D\x9B\xB9"      => "\xCF\x88",
1569
 
                "\xF0\x9D\x9B\xBA"      => "\xCF\x89",
1570
 
                "\xF0\x9D\x9C\x8D"      => "\xCF\x83",
1571
 
                "\xF0\x9D\x9C\x9C"      => "\xCE\xB1",
1572
 
                "\xF0\x9D\x9C\x9D"      => "\xCE\xB2",
1573
 
                "\xF0\x9D\x9C\x9E"      => "\xCE\xB3",
1574
 
                "\xF0\x9D\x9C\x9F"      => "\xCE\xB4",
1575
 
                "\xF0\x9D\x9C\xA0"      => "\xCE\xB5",
1576
 
                "\xF0\x9D\x9C\xA1"      => "\xCE\xB6",
1577
 
                "\xF0\x9D\x9C\xA2"      => "\xCE\xB7",
1578
 
                "\xF0\x9D\x9C\xA3"      => "\xCE\xB8",
1579
 
                "\xF0\x9D\x9C\xA4"      => "\xCE\xB9",
1580
 
                "\xF0\x9D\x9C\xA5"      => "\xCE\xBA",
1581
 
                "\xF0\x9D\x9C\xA6"      => "\xCE\xBB",
1582
 
                "\xF0\x9D\x9C\xA7"      => "\xCE\xBC",
1583
 
                "\xF0\x9D\x9C\xA8"      => "\xCE\xBD",
1584
 
                "\xF0\x9D\x9C\xA9"      => "\xCE\xBE",
1585
 
                "\xF0\x9D\x9C\xAA"      => "\xCE\xBF",
1586
 
                "\xF0\x9D\x9C\xAB"      => "\xCF\x80",
1587
 
                "\xF0\x9D\x9C\xAC"      => "\xCF\x81",
1588
 
                "\xF0\x9D\x9C\xAD"      => "\xCE\xB8",
1589
 
                "\xF0\x9D\x9C\xAE"      => "\xCF\x83",
1590
 
                "\xF0\x9D\x9C\xAF"      => "\xCF\x84",
1591
 
                "\xF0\x9D\x9C\xB0"      => "\xCF\x85",
1592
 
                "\xF0\x9D\x9C\xB1"      => "\xCF\x86",
1593
 
                "\xF0\x9D\x9C\xB2"      => "\xCF\x87",
1594
 
                "\xF0\x9D\x9C\xB3"      => "\xCF\x88",
1595
 
                "\xF0\x9D\x9C\xB4"      => "\xCF\x89",
1596
 
                "\xF0\x9D\x9D\x87"      => "\xCF\x83",
1597
 
                "\xF0\x9D\x9D\x96"      => "\xCE\xB1",
1598
 
                "\xF0\x9D\x9D\x97"      => "\xCE\xB2",
1599
 
                "\xF0\x9D\x9D\x98"      => "\xCE\xB3",
1600
 
                "\xF0\x9D\x9D\x99"      => "\xCE\xB4",
1601
 
                "\xF0\x9D\x9D\x9A"      => "\xCE\xB5",
1602
 
                "\xF0\x9D\x9D\x9B"      => "\xCE\xB6",
1603
 
                "\xF0\x9D\x9D\x9C"      => "\xCE\xB7",
1604
 
                "\xF0\x9D\x9D\x9D"      => "\xCE\xB8",
1605
 
                "\xF0\x9D\x9D\x9E"      => "\xCE\xB9",
1606
 
                "\xF0\x9D\x9D\x9F"      => "\xCE\xBA",
1607
 
                "\xF0\x9D\x9D\xA0"      => "\xCE\xBB",
1608
 
                "\xF0\x9D\x9D\xA1"      => "\xCE\xBC",
1609
 
                "\xF0\x9D\x9D\xA2"      => "\xCE\xBD",
1610
 
                "\xF0\x9D\x9D\xA3"      => "\xCE\xBE",
1611
 
                "\xF0\x9D\x9D\xA4"      => "\xCE\xBF",
1612
 
                "\xF0\x9D\x9D\xA5"      => "\xCF\x80",
1613
 
                "\xF0\x9D\x9D\xA6"      => "\xCF\x81",
1614
 
                "\xF0\x9D\x9D\xA7"      => "\xCE\xB8",
1615
 
                "\xF0\x9D\x9D\xA8"      => "\xCF\x83",
1616
 
                "\xF0\x9D\x9D\xA9"      => "\xCF\x84",
1617
 
                "\xF0\x9D\x9D\xAA"      => "\xCF\x85",
1618
 
                "\xF0\x9D\x9D\xAB"      => "\xCF\x86",
1619
 
                "\xF0\x9D\x9D\xAC"      => "\xCF\x87",
1620
 
                "\xF0\x9D\x9D\xAD"      => "\xCF\x88",
1621
 
                "\xF0\x9D\x9D\xAE"      => "\xCF\x89",
1622
 
                "\xF0\x9D\x9E\x81"      => "\xCF\x83",
1623
 
                "\xF0\x9D\x9E\x90"      => "\xCE\xB1",
1624
 
                "\xF0\x9D\x9E\x91"      => "\xCE\xB2",
1625
 
                "\xF0\x9D\x9E\x92"      => "\xCE\xB3",
1626
 
                "\xF0\x9D\x9E\x93"      => "\xCE\xB4",
1627
 
                "\xF0\x9D\x9E\x94"      => "\xCE\xB5",
1628
 
                "\xF0\x9D\x9E\x95"      => "\xCE\xB6",
1629
 
                "\xF0\x9D\x9E\x96"      => "\xCE\xB7",
1630
 
                "\xF0\x9D\x9E\x97"      => "\xCE\xB8",
1631
 
                "\xF0\x9D\x9E\x98"      => "\xCE\xB9",
1632
 
                "\xF0\x9D\x9E\x99"      => "\xCE\xBA",
1633
 
                "\xF0\x9D\x9E\x9A"      => "\xCE\xBB",
1634
 
                "\xF0\x9D\x9E\x9B"      => "\xCE\xBC",
1635
 
                "\xF0\x9D\x9E\x9C"      => "\xCE\xBD",
1636
 
                "\xF0\x9D\x9E\x9D"      => "\xCE\xBE",
1637
 
                "\xF0\x9D\x9E\x9E"      => "\xCE\xBF",
1638
 
                "\xF0\x9D\x9E\x9F"      => "\xCF\x80",
1639
 
                "\xF0\x9D\x9E\xA0"      => "\xCF\x81",
1640
 
                "\xF0\x9D\x9E\xA1"      => "\xCE\xB8",
1641
 
                "\xF0\x9D\x9E\xA2"      => "\xCF\x83",
1642
 
                "\xF0\x9D\x9E\xA3"      => "\xCF\x84",
1643
 
                "\xF0\x9D\x9E\xA4"      => "\xCF\x85",
1644
 
                "\xF0\x9D\x9E\xA5"      => "\xCF\x86",
1645
 
                "\xF0\x9D\x9E\xA6"      => "\xCF\x87",
1646
 
                "\xF0\x9D\x9E\xA7"      => "\xCF\x88",
1647
 
                "\xF0\x9D\x9E\xA8"      => "\xCF\x89",
1648
 
                "\xF0\x9D\x9E\xBB"      => "\xCF\x83",
1649
 
                "\xF0\x9D\x9F\x8A"      => "\xCF\x9D",
1650
 
        );
1651
 
        global $phpbb_root_path, $phpEx;
1652
 
 
1653
 
        // do the case fold
1654
 
        $text = utf8_case_fold($text, $option);
1655
 
 
1656
 
        if (!class_exists('utf_normalizer'))
1657
 
        {
1658
 
                global $phpbb_root_path, $phpEx;
1659
 
                include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
1660
 
        }
1661
 
 
1662
 
        // convert to NFKC
1663
 
        utf_normalizer::nfkc($text);
1664
 
 
1665
 
        // FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
1666
 
        $text = strtr($text, $fc_nfkc_closure);
1667
 
 
1668
 
        return $text;
1669
 
}
1670
 
 
1671
 
/**
1672
 
* Assume the input is NFC:
1673
 
* Takes the input and does a "special" case fold. It does minor normalization as well.
1674
 
*
1675
 
* @param        string  $text   text to be case folded
1676
 
* @param        string  $option determines how we will fold the cases
1677
 
* @return       string                  case folded text
1678
 
*/
1679
 
function utf8_case_fold_nfc($text, $option = 'full')
1680
 
{
1681
 
        static $uniarray = array();
1682
 
        static $ypogegrammeni = array(
1683
 
                "\xCD\xBA"              => "\x20\xCD\x85",
1684
 
                "\xE1\xBE\x80"  => "\xE1\xBC\x80\xCD\x85",
1685
 
                "\xE1\xBE\x81"  => "\xE1\xBC\x81\xCD\x85",
1686
 
                "\xE1\xBE\x82"  => "\xE1\xBC\x82\xCD\x85",
1687
 
                "\xE1\xBE\x83"  => "\xE1\xBC\x83\xCD\x85",
1688
 
                "\xE1\xBE\x84"  => "\xE1\xBC\x84\xCD\x85",
1689
 
                "\xE1\xBE\x85"  => "\xE1\xBC\x85\xCD\x85",
1690
 
                "\xE1\xBE\x86"  => "\xE1\xBC\x86\xCD\x85",
1691
 
                "\xE1\xBE\x87"  => "\xE1\xBC\x87\xCD\x85",
1692
 
                "\xE1\xBE\x88"  => "\xE1\xBC\x88\xCD\x85",
1693
 
                "\xE1\xBE\x89"  => "\xE1\xBC\x89\xCD\x85",
1694
 
                "\xE1\xBE\x8A"  => "\xE1\xBC\x8A\xCD\x85",
1695
 
                "\xE1\xBE\x8B"  => "\xE1\xBC\x8B\xCD\x85",
1696
 
                "\xE1\xBE\x8C"  => "\xE1\xBC\x8C\xCD\x85",
1697
 
                "\xE1\xBE\x8D"  => "\xE1\xBC\x8D\xCD\x85",
1698
 
                "\xE1\xBE\x8E"  => "\xE1\xBC\x8E\xCD\x85",
1699
 
                "\xE1\xBE\x8F"  => "\xE1\xBC\x8F\xCD\x85",
1700
 
                "\xE1\xBE\x90"  => "\xE1\xBC\xA0\xCD\x85",
1701
 
                "\xE1\xBE\x91"  => "\xE1\xBC\xA1\xCD\x85",
1702
 
                "\xE1\xBE\x92"  => "\xE1\xBC\xA2\xCD\x85",
1703
 
                "\xE1\xBE\x93"  => "\xE1\xBC\xA3\xCD\x85",
1704
 
                "\xE1\xBE\x94"  => "\xE1\xBC\xA4\xCD\x85",
1705
 
                "\xE1\xBE\x95"  => "\xE1\xBC\xA5\xCD\x85",
1706
 
                "\xE1\xBE\x96"  => "\xE1\xBC\xA6\xCD\x85",
1707
 
                "\xE1\xBE\x97"  => "\xE1\xBC\xA7\xCD\x85",
1708
 
                "\xE1\xBE\x98"  => "\xE1\xBC\xA8\xCD\x85",
1709
 
                "\xE1\xBE\x99"  => "\xE1\xBC\xA9\xCD\x85",
1710
 
                "\xE1\xBE\x9A"  => "\xE1\xBC\xAA\xCD\x85",
1711
 
                "\xE1\xBE\x9B"  => "\xE1\xBC\xAB\xCD\x85",
1712
 
                "\xE1\xBE\x9C"  => "\xE1\xBC\xAC\xCD\x85",
1713
 
                "\xE1\xBE\x9D"  => "\xE1\xBC\xAD\xCD\x85",
1714
 
                "\xE1\xBE\x9E"  => "\xE1\xBC\xAE\xCD\x85",
1715
 
                "\xE1\xBE\x9F"  => "\xE1\xBC\xAF\xCD\x85",
1716
 
                "\xE1\xBE\xA0"  => "\xE1\xBD\xA0\xCD\x85",
1717
 
                "\xE1\xBE\xA1"  => "\xE1\xBD\xA1\xCD\x85",
1718
 
                "\xE1\xBE\xA2"  => "\xE1\xBD\xA2\xCD\x85",
1719
 
                "\xE1\xBE\xA3"  => "\xE1\xBD\xA3\xCD\x85",
1720
 
                "\xE1\xBE\xA4"  => "\xE1\xBD\xA4\xCD\x85",
1721
 
                "\xE1\xBE\xA5"  => "\xE1\xBD\xA5\xCD\x85",
1722
 
                "\xE1\xBE\xA6"  => "\xE1\xBD\xA6\xCD\x85",
1723
 
                "\xE1\xBE\xA7"  => "\xE1\xBD\xA7\xCD\x85",
1724
 
                "\xE1\xBE\xA8"  => "\xE1\xBD\xA8\xCD\x85",
1725
 
                "\xE1\xBE\xA9"  => "\xE1\xBD\xA9\xCD\x85",
1726
 
                "\xE1\xBE\xAA"  => "\xE1\xBD\xAA\xCD\x85",
1727
 
                "\xE1\xBE\xAB"  => "\xE1\xBD\xAB\xCD\x85",
1728
 
                "\xE1\xBE\xAC"  => "\xE1\xBD\xAC\xCD\x85",
1729
 
                "\xE1\xBE\xAD"  => "\xE1\xBD\xAD\xCD\x85",
1730
 
                "\xE1\xBE\xAE"  => "\xE1\xBD\xAE\xCD\x85",
1731
 
                "\xE1\xBE\xAF"  => "\xE1\xBD\xAF\xCD\x85",
1732
 
                "\xE1\xBE\xB2"  => "\xE1\xBD\xB0\xCD\x85",
1733
 
                "\xE1\xBE\xB3"  => "\xCE\xB1\xCD\x85",
1734
 
                "\xE1\xBE\xB4"  => "\xCE\xAC\xCD\x85",
1735
 
                "\xE1\xBE\xB7"  => "\xE1\xBE\xB6\xCD\x85",
1736
 
                "\xE1\xBE\xBC"  => "\xCE\x91\xCD\x85",
1737
 
                "\xE1\xBF\x82"  => "\xE1\xBD\xB4\xCD\x85",
1738
 
                "\xE1\xBF\x83"  => "\xCE\xB7\xCD\x85",
1739
 
                "\xE1\xBF\x84"  => "\xCE\xAE\xCD\x85",
1740
 
                "\xE1\xBF\x87"  => "\xE1\xBF\x86\xCD\x85",
1741
 
                "\xE1\xBF\x8C"  => "\xCE\x97\xCD\x85",
1742
 
                "\xE1\xBF\xB2"  => "\xE1\xBD\xBC\xCD\x85",
1743
 
                "\xE1\xBF\xB3"  => "\xCF\x89\xCD\x85",
1744
 
                "\xE1\xBF\xB4"  => "\xCF\x8E\xCD\x85",
1745
 
                "\xE1\xBF\xB7"  => "\xE1\xBF\xB6\xCD\x85",
1746
 
                "\xE1\xBF\xBC"  => "\xCE\xA9\xCD\x85",
1747
 
        );
1748
 
        global $phpbb_root_path, $phpEx;
1749
 
 
1750
 
        // perform a small trick, avoid further normalization on composed points that contain U+0345 in their decomposition
1751
 
        $text = strtr($text, $ypogegrammeni);
1752
 
 
1753
 
        // do the case fold
1754
 
        $text = utf8_case_fold($text, $option);
1755
 
 
1756
 
        return $text;
1757
 
}
1758
 
 
1759
 
/**
1760
 
* A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
1761
 
* to be in NFC (Normalization Form Composition).
1762
 
*
1763
 
* @param        mixed   $strings        a string or an array of strings to normalize
1764
 
* @return       mixed                           the normalized content, preserving array keys if array given.
1765
 
*/
1766
 
function utf8_normalize_nfc($strings)
1767
 
{
1768
 
        if (empty($strings))
1769
 
        {
1770
 
                return $strings;
1771
 
        }
1772
 
 
1773
 
        if (!class_exists('utf_normalizer'))
1774
 
        {
1775
 
                global $phpbb_root_path, $phpEx;
1776
 
                include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
1777
 
        }
1778
 
 
1779
 
        if (!is_array($strings))
1780
 
        {
1781
 
                utf_normalizer::nfc($strings);
1782
 
        }
1783
 
        else if (is_array($strings))
1784
 
        {
1785
 
                foreach ($strings as $key => $string)
1786
 
                {
1787
 
                        if (is_array($string))
1788
 
                        {
1789
 
                                foreach ($string as $_key => $_string)
1790
 
                                {
1791
 
                                        utf_normalizer::nfc($strings[$key][$_key]);
1792
 
                                }
1793
 
                        }
1794
 
                        else
1795
 
                        {
1796
 
                                utf_normalizer::nfc($strings[$key]);
1797
 
                        }
1798
 
                }
1799
 
        }
1800
 
 
1801
 
        return $strings;
1802
 
}
1803
 
 
1804
 
/**
1805
 
* This function is used to generate a "clean" version of a string.
1806
 
* Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
1807
 
* Additionally a homographs of one character are transformed into one specific character (preferably ASCII
1808
 
* if it is an ASCII character).
1809
 
*
1810
 
* Please be aware that if you change something within this function or within
1811
 
* functions used here you need to rebuild/update the username_clean column in the users table. And all other
1812
 
* columns that store a clean string otherwise you will break this functionality.
1813
 
*
1814
 
* @param        string  $text   An unclean string, mabye user input (has to be valid UTF-8!)
1815
 
* @return       string                  Cleaned up version of the input string
1816
 
*/
1817
 
function utf8_clean_string($text)
1818
 
{
1819
 
        global $phpbb_root_path, $phpEx;
1820
 
 
1821
 
        static $homographs = array();
1822
 
        if (empty($homographs))
1823
 
        {
1824
 
                $homographs = include($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx);
1825
 
        }
1826
 
 
1827
 
        $text = utf8_case_fold_nfkc($text);
1828
 
        $text = strtr($text, $homographs);
1829
 
        // Other control characters
1830
 
        $text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text);
1831
 
 
1832
 
        // we need to reduce multiple spaces to a single one
1833
 
        $text = preg_replace('# {2,}#', ' ', $text);
1834
 
 
1835
 
        // we can use trim here as all the other space characters should have been turned
1836
 
        // into normal ASCII spaces by now
1837
 
        return trim($text);
1838
 
}
1839
 
 
1840
 
/**
1841
 
* A wrapper for htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
1842
 
*/
1843
 
function utf8_htmlspecialchars(&$value)
1844
 
{
1845
 
        return htmlspecialchars($value, ENT_COMPAT, 'UTF-8');
1846
 
}
1847
 
 
1848
 
/**
1849
 
* Trying to convert returned system message to utf8
1850
 
*
1851
 
* PHP assumes such messages are ISO-8859-1 so we'll do that too
1852
 
* and if it breaks messages we'll blame it on them ;-)
1853
 
*/
1854
 
function utf8_convert_message($message)
1855
 
{
1856
 
        // First of all check if conversion is neded at all, as there is no point
1857
 
        // in converting ASCII messages from ISO-8859-1 to UTF-8
1858
 
        if (!preg_match('/[\x80-\xFF]/', $message))
1859
 
        {
1860
 
                return utf8_htmlspecialchars($message);
1861
 
        }
1862
 
 
1863
 
        // else we need to convert some part of the message
1864
 
        return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1'));
1865
 
}
1866
 
 
1867
 
/**
1868
 
* UTF8-compatible wordwrap replacement
1869
 
*
1870
 
* @param        string  $string The input string
1871
 
* @param        int             $width  The column width. Defaults to 75.
1872
 
* @param        string  $break  The line is broken using the optional break parameter. Defaults to '\n'.
1873
 
* @param        bool    $cut    If the cut is set to TRUE, the string is always wrapped at the specified width. So if you have a word that is larger than the given width, it is broken apart.
1874
 
*
1875
 
* @return       string                  the given string wrapped at the specified column.
1876
 
*
1877
 
*/
1878
 
function utf8_wordwrap($string, $width = 75, $break = "\n", $cut = false)
1879
 
{
1880
 
        // We first need to explode on $break, not destroying existing (intended) breaks
1881
 
        $lines = explode($break, $string);
1882
 
        $new_lines = array(0 => '');
1883
 
        $index = 0;
1884
 
 
1885
 
        foreach ($lines as $line)
1886
 
        {
1887
 
                $words = explode(' ', $line);
1888
 
 
1889
 
                for ($i = 0, $size = sizeof($words); $i < $size; $i++)
1890
 
                {
1891
 
                        $word = $words[$i];
1892
 
 
1893
 
                        // If cut is true we need to cut the word if it is > width chars
1894
 
                        if ($cut && utf8_strlen($word) > $width)
1895
 
                        {
1896
 
                                $words[$i] = utf8_substr($word, $width);
1897
 
                                $word = utf8_substr($word, 0, $width);
1898
 
                                $i--;
1899
 
                        }
1900
 
 
1901
 
                        if (utf8_strlen($new_lines[$index] . $word) > $width)
1902
 
                        {
1903
 
                                $new_lines[$index] = substr($new_lines[$index], 0, -1);
1904
 
                                $index++;
1905
 
                                $new_lines[$index] = '';
1906
 
                        }
1907
 
 
1908
 
                        $new_lines[$index] .= $word . ' ';
1909
 
                }
1910
 
 
1911
 
                $new_lines[$index] = substr($new_lines[$index], 0, -1);
1912
 
                $index++;
1913
 
                $new_lines[$index] = '';
1914
 
        }
1915
 
 
1916
 
        unset($new_lines[$index]);
1917
 
        return implode($break, $new_lines);
1918
 
}
1919
 
 
1920
 
?>
 
 
b'\\ No newline at end of file'