443
by dcoles
Added Forum application along with unmodifed version of phpBB3 "Olympus" 3.0.0 |
1 |
<?php
|
2 |
/**
|
|
3 |
*
|
|
4 |
* @package utf
|
|
5 |
* @version $Id: utf_tools.php,v 1.72 2007/12/04 16:20:38 naderman Exp $
|
|
6 |
* @copyright (c) 2006 phpBB Group
|
|
7 |
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
|
|
8 |
*
|
|
9 |
*/
|
|
10 |
||
11 |
/**
|
|
12 |
*/
|
|
13 |
if (!defined('IN_PHPBB')) |
|
14 |
{
|
|
15 |
exit; |
|
16 |
}
|
|
17 |
||
18 |
// Enforce ASCII only string handling
|
|
19 |
setlocale(LC_CTYPE, 'C'); |
|
20 |
||
21 |
/**
|
|
22 |
* UTF-8 tools
|
|
23 |
*
|
|
24 |
* Whenever possible, these functions will try to use PHP's built-in functions or
|
|
25 |
* extensions, otherwise they will default to custom routines.
|
|
26 |
*
|
|
27 |
* @package utf
|
|
28 |
*/
|
|
29 |
||
30 |
if (!extension_loaded('xml')) |
|
31 |
{
|
|
32 |
/**
|
|
33 |
* Implementation of PHP's native utf8_encode for people without XML support
|
|
34 |
* This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
|
|
35 |
*
|
|
36 |
* @param string $str ISO-8859-1 encoded data
|
|
37 |
* @return string UTF-8 encoded data
|
|
38 |
*/
|
|
39 |
function utf8_encode($str) |
|
40 |
{
|
|
41 |
$out = ''; |
|
42 |
for ($i = 0, $len = strlen($str); $i < $len; $i++) |
|
43 |
{
|
|
44 |
$letter = $str[$i]; |
|
45 |
$num = ord($letter); |
|
46 |
if ($num < 0x80) |
|
47 |
{
|
|
48 |
$out .= $letter; |
|
49 |
}
|
|
50 |
else if ($num < 0xC0) |
|
51 |
{
|
|
52 |
$out .= "\xC2" . $letter; |
|
53 |
}
|
|
54 |
else
|
|
55 |
{
|
|
56 |
$out .= "\xC3" . chr($num - 64); |
|
57 |
}
|
|
58 |
}
|
|
59 |
return $out; |
|
60 |
}
|
|
61 |
||
62 |
/**
|
|
63 |
* Implementation of PHP's native utf8_decode for people without XML support
|
|
64 |
*
|
|
65 |
* @param string $str UTF-8 encoded data
|
|
66 |
* @return string ISO-8859-1 encoded data
|
|
67 |
*/
|
|
68 |
function utf8_decode($str) |
|
69 |
{
|
|
70 |
$pos = 0; |
|
71 |
$len = strlen($str); |
|
72 |
$ret = ''; |
|
73 |
||
74 |
while ($pos < $len) |
|
75 |
{
|
|
76 |
$ord = ord($str[$pos]) & 0xF0; |
|
77 |
if ($ord === 0xC0 || $ord === 0xD0) |
|
78 |
{
|
|
79 |
$charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F); |
|
80 |
$pos += 2; |
|
81 |
$ret .= (($charval < 256) ? chr($charval) : '?'); |
|
82 |
}
|
|
83 |
else if ($ord === 0xE0) |
|
84 |
{
|
|
85 |
$ret .= '?'; |
|
86 |
$pos += 3; |
|
87 |
}
|
|
88 |
else if ($ord === 0xF0) |
|
89 |
{
|
|
90 |
$ret .= '?'; |
|
91 |
$pos += 4; |
|
92 |
}
|
|
93 |
else
|
|
94 |
{
|
|
95 |
$ret .= $str[$pos]; |
|
96 |
++$pos; |
|
97 |
}
|
|
98 |
}
|
|
99 |
return $ret; |
|
100 |
}
|
|
101 |
}
|
|
102 |
||
103 |
// mbstring is old and has it's functions around for older versions of PHP.
|
|
104 |
// if mbstring is not loaded, we go into native mode.
|
|
105 |
if (extension_loaded('mbstring')) |
|
106 |
{
|
|
107 |
mb_internal_encoding('UTF-8'); |
|
108 |
||
109 |
/**
|
|
110 |
* UTF-8 aware alternative to strrpos
|
|
111 |
* Find position of last occurrence of a char in a string
|
|
112 |
*
|
|
113 |
* Notes:
|
|
114 |
* - offset for mb_strrpos was added in 5.2.0, we emulate if it is lower
|
|
115 |
*/
|
|
116 |
if (version_compare(PHP_VERSION, '5.2.0', '>=')) |
|
117 |
{
|
|
118 |
/**
|
|
119 |
* UTF-8 aware alternative to strrpos
|
|
120 |
* @ignore
|
|
121 |
*/
|
|
122 |
function utf8_strrpos($str, $needle, $offset = null) |
|
123 |
{
|
|
124 |
// Emulate behaviour of strrpos rather than raising warning
|
|
125 |
if (empty($str)) |
|
126 |
{
|
|
127 |
return false; |
|
128 |
}
|
|
129 |
||
130 |
if (is_null($offset)) |
|
131 |
{
|
|
132 |
return mb_strrpos($str, $needle); |
|
133 |
}
|
|
134 |
else
|
|
135 |
{
|
|
136 |
return mb_strrpos($str, $needle, $offset); |
|
137 |
}
|
|
138 |
}
|
|
139 |
}
|
|
140 |
else
|
|
141 |
{
|
|
142 |
/**
|
|
143 |
* UTF-8 aware alternative to strrpos
|
|
144 |
* @ignore
|
|
145 |
*/
|
|
146 |
function utf8_strrpos($str, $needle, $offset = null) |
|
147 |
{
|
|
148 |
// offset for mb_strrpos was added in 5.2.0
|
|
149 |
if (is_null($offset)) |
|
150 |
{
|
|
151 |
// Emulate behaviour of strrpos rather than raising warning
|
|
152 |
if (empty($str)) |
|
153 |
{
|
|
154 |
return false; |
|
155 |
}
|
|
156 |
||
157 |
return mb_strrpos($str, $needle); |
|
158 |
}
|
|
159 |
else
|
|
160 |
{
|
|
161 |
if (!is_int($offset)) |
|
162 |
{
|
|
163 |
trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR); |
|
164 |
return false; |
|
165 |
}
|
|
166 |
||
167 |
$str = mb_substr($str, $offset); |
|
168 |
||
169 |
if (false !== ($pos = mb_strrpos($str, $needle))) |
|
170 |
{
|
|
171 |
return $pos + $offset; |
|
172 |
}
|
|
173 |
||
174 |
return false; |
|
175 |
}
|
|
176 |
}
|
|
177 |
}
|
|
178 |
||
179 |
/**
|
|
180 |
* UTF-8 aware alternative to strpos
|
|
181 |
* @ignore
|
|
182 |
*/
|
|
183 |
function utf8_strpos($str, $needle, $offset = null) |
|
184 |
{
|
|
185 |
if (is_null($offset)) |
|
186 |
{
|
|
187 |
return mb_strpos($str, $needle); |
|
188 |
}
|
|
189 |
else
|
|
190 |
{
|
|
191 |
return mb_strpos($str, $needle, $offset); |
|
192 |
}
|
|
193 |
}
|
|
194 |
||
195 |
/**
|
|
196 |
* UTF-8 aware alternative to strtolower
|
|
197 |
* @ignore
|
|
198 |
*/
|
|
199 |
function utf8_strtolower($str) |
|
200 |
{
|
|
201 |
return mb_strtolower($str); |
|
202 |
}
|
|
203 |
||
204 |
/**
|
|
205 |
* UTF-8 aware alternative to strtoupper
|
|
206 |
* @ignore
|
|
207 |
*/
|
|
208 |
function utf8_strtoupper($str) |
|
209 |
{
|
|
210 |
return mb_strtoupper($str); |
|
211 |
}
|
|
212 |
||
213 |
/**
|
|
214 |
* UTF-8 aware alternative to substr
|
|
215 |
* @ignore
|
|
216 |
*/
|
|
217 |
function utf8_substr($str, $offset, $length = null) |
|
218 |
{
|
|
219 |
if (is_null($length)) |
|
220 |
{
|
|
221 |
return mb_substr($str, $offset); |
|
222 |
}
|
|
223 |
else
|
|
224 |
{
|
|
225 |
return mb_substr($str, $offset, $length); |
|
226 |
}
|
|
227 |
}
|
|
228 |
||
229 |
/**
|
|
230 |
* Return the length (in characters) of a UTF-8 string
|
|
231 |
* @ignore
|
|
232 |
*/
|
|
233 |
function utf8_strlen($text) |
|
234 |
{
|
|
235 |
return mb_strlen($text, 'utf-8'); |
|
236 |
}
|
|
237 |
}
|
|
238 |
else
|
|
239 |
{
|
|
240 |
/**
|
|
241 |
* UTF-8 aware alternative to strrpos
|
|
242 |
* Find position of last occurrence of a char in a string
|
|
243 |
*
|
|
244 |
* @author Harry Fuecks
|
|
245 |
* @param string $str haystack
|
|
246 |
* @param string $needle needle
|
|
247 |
* @param integer $offset (optional) offset (from left)
|
|
248 |
* @return mixed integer position or FALSE on failure
|
|
249 |
*/
|
|
250 |
function utf8_strrpos($str, $needle, $offset = null) |
|
251 |
{
|
|
252 |
if (is_null($offset)) |
|
253 |
{
|
|
254 |
$ar = explode($needle, $str); |
|
255 |
||
256 |
if (sizeof($ar) > 1) |
|
257 |
{
|
|
258 |
// Pop off the end of the string where the last match was made
|
|
259 |
array_pop($ar); |
|
260 |
$str = join($needle, $ar); |
|
261 |
||
262 |
return utf8_strlen($str); |
|
263 |
}
|
|
264 |
return false; |
|
265 |
}
|
|
266 |
else
|
|
267 |
{
|
|
268 |
if (!is_int($offset)) |
|
269 |
{
|
|
270 |
trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR); |
|
271 |
return false; |
|
272 |
}
|
|
273 |
||
274 |
$str = utf8_substr($str, $offset); |
|
275 |
||
276 |
if (false !== ($pos = utf8_strrpos($str, $needle))) |
|
277 |
{
|
|
278 |
return $pos + $offset; |
|
279 |
}
|
|
280 |
||
281 |
return false; |
|
282 |
}
|
|
283 |
}
|
|
284 |
||
285 |
/**
|
|
286 |
* UTF-8 aware alternative to strpos
|
|
287 |
* Find position of first occurrence of a string
|
|
288 |
*
|
|
289 |
* @author Harry Fuecks
|
|
290 |
* @param string $str haystack
|
|
291 |
* @param string $needle needle
|
|
292 |
* @param integer $offset offset in characters (from left)
|
|
293 |
* @return mixed integer position or FALSE on failure
|
|
294 |
*/
|
|
295 |
function utf8_strpos($str, $needle, $offset = null) |
|
296 |
{
|
|
297 |
if (is_null($offset)) |
|
298 |
{
|
|
299 |
$ar = explode($needle, $str); |
|
300 |
if (sizeof($ar) > 1) |
|
301 |
{
|
|
302 |
return utf8_strlen($ar[0]); |
|
303 |
}
|
|
304 |
return false; |
|
305 |
}
|
|
306 |
else
|
|
307 |
{
|
|
308 |
if (!is_int($offset)) |
|
309 |
{
|
|
310 |
trigger_error('utf8_strpos: Offset must be an integer', E_USER_ERROR); |
|
311 |
return false; |
|
312 |
}
|
|
313 |
||
314 |
$str = utf8_substr($str, $offset); |
|
315 |
||
316 |
if (false !== ($pos = utf8_strpos($str, $needle))) |
|
317 |
{
|
|
318 |
return $pos + $offset; |
|
319 |
}
|
|
320 |
||
321 |
return false; |
|
322 |
}
|
|
323 |
}
|
|
324 |
||
325 |
/**
|
|
326 |
* UTF-8 aware alternative to strtolower
|
|
327 |
* Make a string lowercase
|
|
328 |
* Note: The concept of a characters "case" only exists is some alphabets
|
|
329 |
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
|
|
330 |
* not exist in the Chinese alphabet, for example. See Unicode Standard
|
|
331 |
* Annex #21: Case Mappings
|
|
332 |
*
|
|
333 |
* @param string
|
|
334 |
* @return string string in lowercase
|
|
335 |
*/
|
|
336 |
function utf8_strtolower($string) |
|
337 |
{
|
|
338 |
static $utf8_upper_to_lower = array( |
|
339 |
"\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1", |
|
340 |
"\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5", |
|
341 |
"\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9", |
|
342 |
"\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD", |
|
343 |
"\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1", |
|
344 |
"\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5", |
|
345 |
"\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA", |
|
346 |
"\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE", |
|
347 |
"\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87", |
|
348 |
"\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F", |
|
349 |
"\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99", |
|
350 |
"\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1", |
|
351 |
"\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9", |
|
352 |
"\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7", |
|
353 |
"\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82", |
|
354 |
"\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B", |
|
355 |
"\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97", |
|
356 |
"\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F", |
|
357 |
"\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7", |
|
358 |
"\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF", |
|
359 |
"\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7", |
|
360 |
"\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE", |
|
361 |
"\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B", |
|
362 |
"\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF", |
|
363 |
"\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1", |
|
364 |
"\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5", |
|
365 |
"\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9", |
|
366 |
"\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD", |
|
367 |
"\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81", |
|
368 |
"\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86", |
|
369 |
"\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A", |
|
370 |
"\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93", |
|
371 |
"\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97", |
|
372 |
"\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B", |
|
373 |
"\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0", |
|
374 |
"\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4", |
|
375 |
"\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8", |
|
376 |
"\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC", |
|
377 |
"\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80", |
|
378 |
"\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84", |
|
379 |
"\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88", |
|
380 |
"\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C", |
|
381 |
"\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91", |
|
382 |
"\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81", |
|
383 |
"\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81", |
|
384 |
"\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3" |
|
385 |
);
|
|
386 |
||
387 |
return strtr(strtolower($string), $utf8_upper_to_lower); |
|
388 |
}
|
|
389 |
||
390 |
/**
|
|
391 |
* UTF-8 aware alternative to strtoupper
|
|
392 |
* Make a string uppercase
|
|
393 |
* Note: The concept of a characters "case" only exists is some alphabets
|
|
394 |
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
|
|
395 |
* not exist in the Chinese alphabet, for example. See Unicode Standard
|
|
396 |
* Annex #21: Case Mappings
|
|
397 |
*
|
|
398 |
* @param string
|
|
399 |
* @return string string in uppercase
|
|
400 |
*/
|
|
401 |
function utf8_strtoupper($string) |
|
402 |
{
|
|
403 |
static $utf8_lower_to_upper = array( |
|
404 |
"\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81", |
|
405 |
"\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85", |
|
406 |
"\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89", |
|
407 |
"\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D", |
|
408 |
"\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91", |
|
409 |
"\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95", |
|
410 |
"\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A", |
|
411 |
"\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E", |
|
412 |
"\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84", |
|
413 |
"\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C", |
|
414 |
"\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96", |
|
415 |
"\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E", |
|
416 |
"\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6", |
|
417 |
"\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4", |
|
418 |
"\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD", |
|
419 |
"\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87", |
|
420 |
"\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94", |
|
421 |
"\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C", |
|
422 |
"\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4", |
|
423 |
"\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC", |
|
424 |
"\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4", |
|
425 |
"\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD", |
|
426 |
"\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A", |
|
427 |
"\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A", |
|
428 |
"\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94", |
|
429 |
"\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98", |
|
430 |
"\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C", |
|
431 |
"\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0", |
|
432 |
"\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5", |
|
433 |
"\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9", |
|
434 |
"\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E", |
|
435 |
"\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92", |
|
436 |
"\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96", |
|
437 |
"\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A", |
|
438 |
"\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E", |
|
439 |
"\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2", |
|
440 |
"\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6", |
|
441 |
"\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA", |
|
442 |
"\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE", |
|
443 |
"\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83", |
|
444 |
"\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87", |
|
445 |
"\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B", |
|
446 |
"\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90", |
|
447 |
"\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80", |
|
448 |
"\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80", |
|
449 |
"\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2" |
|
450 |
);
|
|
451 |
||
452 |
return strtr(strtoupper($string), $utf8_lower_to_upper); |
|
453 |
}
|
|
454 |
||
455 |
/**
|
|
456 |
* UTF-8 aware alternative to substr
|
|
457 |
* Return part of a string given character offset (and optionally length)
|
|
458 |
*
|
|
459 |
* Note arguments: comparied to substr - if offset or length are
|
|
460 |
* not integers, this version will not complain but rather massages them
|
|
461 |
* into an integer.
|
|
462 |
*
|
|
463 |
* Note on returned values: substr documentation states false can be
|
|
464 |
* returned in some cases (e.g. offset > string length)
|
|
465 |
* mb_substr never returns false, it will return an empty string instead.
|
|
466 |
* This adopts the mb_substr approach
|
|
467 |
*
|
|
468 |
* Note on implementation: PCRE only supports repetitions of less than
|
|
469 |
* 65536, in order to accept up to MAXINT values for offset and length,
|
|
470 |
* we'll repeat a group of 65535 characters when needed.
|
|
471 |
*
|
|
472 |
* Note on implementation: calculating the number of characters in the
|
|
473 |
* string is a relatively expensive operation, so we only carry it out when
|
|
474 |
* necessary. It isn't necessary for +ve offsets and no specified length
|
|
475 |
*
|
|
476 |
* @author Chris Smith<chris@jalakai.co.uk>
|
|
477 |
* @param string $str
|
|
478 |
* @param integer $offset number of UTF-8 characters offset (from left)
|
|
479 |
* @param integer $length (optional) length in UTF-8 characters from offset
|
|
480 |
* @return mixed string or FALSE if failure
|
|
481 |
*/
|
|
482 |
function utf8_substr($str, $offset, $length = NULL) |
|
483 |
{
|
|
484 |
// generates E_NOTICE
|
|
485 |
// for PHP4 objects, but not PHP5 objects
|
|
486 |
$str = (string) $str; |
|
487 |
$offset = (int) $offset; |
|
488 |
if (!is_null($length)) |
|
489 |
{
|
|
490 |
$length = (int) $length; |
|
491 |
}
|
|
492 |
||
493 |
// handle trivial cases
|
|
494 |
if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset)) |
|
495 |
{
|
|
496 |
return ''; |
|
497 |
}
|
|
498 |
||
499 |
// normalise negative offsets (we could use a tail
|
|
500 |
// anchored pattern, but they are horribly slow!)
|
|
501 |
if ($offset < 0) |
|
502 |
{
|
|
503 |
// see notes
|
|
504 |
$strlen = utf8_strlen($str); |
|
505 |
$offset = $strlen + $offset; |
|
506 |
if ($offset < 0) |
|
507 |
{
|
|
508 |
$offset = 0; |
|
509 |
}
|
|
510 |
}
|
|
511 |
||
512 |
$op = ''; |
|
513 |
$lp = ''; |
|
514 |
||
515 |
// establish a pattern for offset, a
|
|
516 |
// non-captured group equal in length to offset
|
|
517 |
if ($offset > 0) |
|
518 |
{
|
|
519 |
$ox = (int) ($offset / 65535); |
|
520 |
$oy = $offset % 65535; |
|
521 |
||
522 |
if ($ox) |
|
523 |
{
|
|
524 |
$op = '(?:.{65535}){' . $ox . '}'; |
|
525 |
}
|
|
526 |
||
527 |
$op = '^(?:' . $op . '.{' . $oy . '})'; |
|
528 |
}
|
|
529 |
else
|
|
530 |
{
|
|
531 |
// offset == 0; just anchor the pattern
|
|
532 |
$op = '^'; |
|
533 |
}
|
|
534 |
||
535 |
// establish a pattern for length
|
|
536 |
if (is_null($length)) |
|
537 |
{
|
|
538 |
// the rest of the string
|
|
539 |
$lp = '(.*)$'; |
|
540 |
}
|
|
541 |
else
|
|
542 |
{
|
|
543 |
if (!isset($strlen)) |
|
544 |
{
|
|
545 |
// see notes
|
|
546 |
$strlen = utf8_strlen($str); |
|
547 |
}
|
|
548 |
||
549 |
// another trivial case
|
|
550 |
if ($offset > $strlen) |
|
551 |
{
|
|
552 |
return ''; |
|
553 |
}
|
|
554 |
||
555 |
if ($length > 0) |
|
556 |
{
|
|
557 |
// reduce any length that would
|
|
558 |
// go passed the end of the string
|
|
559 |
$length = min($strlen - $offset, $length); |
|
560 |
||
561 |
$lx = (int) ($length / 65535); |
|
562 |
$ly = $length % 65535; |
|
563 |
||
564 |
// negative length requires a captured group
|
|
565 |
// of length characters
|
|
566 |
if ($lx) |
|
567 |
{
|
|
568 |
$lp = '(?:.{65535}){' . $lx . '}'; |
|
569 |
}
|
|
570 |
$lp = '(' . $lp . '.{'. $ly . '})'; |
|
571 |
}
|
|
572 |
else if ($length < 0) |
|
573 |
{
|
|
574 |
if ($length < ($offset - $strlen)) |
|
575 |
{
|
|
576 |
return ''; |
|
577 |
}
|
|
578 |
||
579 |
$lx = (int)((-$length) / 65535); |
|
580 |
$ly = (-$length) % 65535; |
|
581 |
||
582 |
// negative length requires ... capture everything
|
|
583 |
// except a group of -length characters
|
|
584 |
// anchored at the tail-end of the string
|
|
585 |
if ($lx) |
|
586 |
{
|
|
587 |
$lp = '(?:.{65535}){' . $lx . '}'; |
|
588 |
}
|
|
589 |
$lp = '(.*)(?:' . $lp . '.{' . $ly . '})$'; |
|
590 |
}
|
|
591 |
}
|
|
592 |
||
593 |
if (!preg_match('#' . $op . $lp . '#us', $str, $match)) |
|
594 |
{
|
|
595 |
return ''; |
|
596 |
}
|
|
597 |
||
598 |
return $match[1]; |
|
599 |
}
|
|
600 |
||
601 |
/**
|
|
602 |
* Return the length (in characters) of a UTF-8 string
|
|
603 |
*
|
|
604 |
* @param string $text UTF-8 string
|
|
605 |
* @return integer Length (in chars) of given string
|
|
606 |
*/
|
|
607 |
function utf8_strlen($text) |
|
608 |
{
|
|
609 |
// Since utf8_decode is replacing multibyte characters to ? strlen works fine
|
|
610 |
return strlen(utf8_decode($text)); |
|
611 |
}
|
|
612 |
}
|
|
613 |
||
614 |
/**
|
|
615 |
* UTF-8 aware alternative to str_split
|
|
616 |
* Convert a string to an array
|
|
617 |
*
|
|
618 |
* @author Harry Fuecks
|
|
619 |
* @param string $str UTF-8 encoded
|
|
620 |
* @param int $split_len number to characters to split string by
|
|
621 |
* @return array characters in string reverses
|
|
622 |
*/
|
|
623 |
function utf8_str_split($str, $split_len = 1) |
|
624 |
{
|
|
625 |
if (!is_int($split_len) || $split_len < 1) |
|
626 |
{
|
|
627 |
return false; |
|
628 |
}
|
|
629 |
||
630 |
$len = utf8_strlen($str); |
|
631 |
if ($len <= $split_len) |
|
632 |
{
|
|
633 |
return array($str); |
|
634 |
}
|
|
635 |
||
636 |
preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar); |
|
637 |
return $ar[0]; |
|
638 |
}
|
|
639 |
||
640 |
/**
|
|
641 |
* UTF-8 aware alternative to strspn
|
|
642 |
* Find length of initial segment matching the mask
|
|
643 |
*
|
|
644 |
* @author Harry Fuecks
|
|
645 |
*/
|
|
646 |
function utf8_strspn($str, $mask, $start = null, $length = null) |
|
647 |
{
|
|
648 |
if ($start !== null || $length !== null) |
|
649 |
{
|
|
650 |
$str = utf8_substr($str, $start, $length); |
|
651 |
}
|
|
652 |
||
653 |
preg_match('/^[' . $mask . ']+/u', $str, $matches); |
|
654 |
||
655 |
if (isset($matches[0])) |
|
656 |
{
|
|
657 |
return utf8_strlen($matches[0]); |
|
658 |
}
|
|
659 |
||
660 |
return 0; |
|
661 |
}
|
|
662 |
||
663 |
/**
|
|
664 |
* UTF-8 aware alternative to ucfirst
|
|
665 |
* Make a string's first character uppercase
|
|
666 |
*
|
|
667 |
* @author Harry Fuecks
|
|
668 |
* @param string
|
|
669 |
* @return string with first character as upper case (if applicable)
|
|
670 |
*/
|
|
671 |
function utf8_ucfirst($str) |
|
672 |
{
|
|
673 |
switch (utf8_strlen($str)) |
|
674 |
{
|
|
675 |
case 0: |
|
676 |
return ''; |
|
677 |
break; |
|
678 |
||
679 |
case 1: |
|
680 |
return utf8_strtoupper($str); |
|
681 |
break; |
|
682 |
||
683 |
default: |
|
684 |
preg_match('/^(.{1})(.*)$/us', $str, $matches); |
|
685 |
return utf8_strtoupper($matches[1]) . $matches[2]; |
|
686 |
break; |
|
687 |
}
|
|
688 |
}
|
|
689 |
||
690 |
/**
|
|
691 |
* Recode a string to UTF-8
|
|
692 |
*
|
|
693 |
* If the encoding is not supported, the string is returned as-is
|
|
694 |
*
|
|
695 |
* @param string $string Original string
|
|
696 |
* @param string $encoding Original encoding (lowered)
|
|
697 |
* @return string The string, encoded in UTF-8
|
|
698 |
*/
|
|
699 |
function utf8_recode($string, $encoding) |
|
700 |
{
|
|
701 |
$encoding = strtolower($encoding); |
|
702 |
||
703 |
if ($encoding == 'utf-8' || !is_string($string) || empty($string)) |
|
704 |
{
|
|
705 |
return $string; |
|
706 |
}
|
|
707 |
||
708 |
// we force iso-8859-1 to be cp1252
|
|
709 |
if ($encoding == 'iso-8859-1') |
|
710 |
{
|
|
711 |
$encoding = 'cp1252'; |
|
712 |
}
|
|
713 |
// convert iso-8859-8-i to iso-8859-8
|
|
714 |
else if ($encoding == 'iso-8859-8-i') |
|
715 |
{
|
|
716 |
$encoding = 'iso-8859-8'; |
|
717 |
$string = hebrev($string); |
|
718 |
}
|
|
719 |
||
720 |
// First, try iconv()
|
|
721 |
if (function_exists('iconv')) |
|
722 |
{
|
|
723 |
$ret = @iconv($encoding, 'utf-8', $string); |
|
724 |
||
725 |
if (!empty($ret)) |
|
726 |
{
|
|
727 |
return $ret; |
|
728 |
}
|
|
729 |
}
|
|
730 |
||
731 |
// Try the mb_string extension
|
|
732 |
if (function_exists('mb_convert_encoding')) |
|
733 |
{
|
|
734 |
// mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
|
|
735 |
switch ($encoding) |
|
736 |
{
|
|
737 |
case 'iso-8859-1': |
|
738 |
case 'iso-8859-2': |
|
739 |
case 'iso-8859-4': |
|
740 |
case 'iso-8859-7': |
|
741 |
case 'iso-8859-9': |
|
742 |
case 'iso-8859-15': |
|
743 |
case 'windows-1251': |
|
744 |
case 'windows-1252': |
|
745 |
case 'cp1252': |
|
746 |
case 'shift_jis': |
|
747 |
case 'euc-kr': |
|
748 |
case 'big5': |
|
749 |
case 'gb2312': |
|
750 |
$ret = @mb_convert_encoding($string, 'utf-8', $encoding); |
|
751 |
||
752 |
if (!empty($ret)) |
|
753 |
{
|
|
754 |
return $ret; |
|
755 |
}
|
|
756 |
}
|
|
757 |
}
|
|
758 |
||
759 |
// Try the recode extension
|
|
760 |
if (function_exists('recode_string')) |
|
761 |
{
|
|
762 |
$ret = @recode_string($encoding . '..utf-8', $string); |
|
763 |
||
764 |
if (!empty($ret)) |
|
765 |
{
|
|
766 |
return $ret; |
|
767 |
}
|
|
768 |
}
|
|
769 |
||
770 |
// If nothing works, check if we have a custom transcoder available
|
|
771 |
if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding)) |
|
772 |
{
|
|
773 |
// Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
|
|
774 |
trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); |
|
775 |
}
|
|
776 |
||
777 |
global $phpbb_root_path, $phpEx; |
|
778 |
||
779 |
// iso-8859-* character encoding
|
|
780 |
if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array)) |
|
781 |
{
|
|
782 |
switch ($array[1]) |
|
783 |
{
|
|
784 |
case '1': |
|
785 |
case '2': |
|
786 |
case '4': |
|
787 |
case '7': |
|
788 |
case '8': |
|
789 |
case '9': |
|
790 |
case '15': |
|
791 |
if (!function_exists('iso_8859_' . $array[1])) |
|
792 |
{
|
|
793 |
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx)) |
|
794 |
{
|
|
795 |
trigger_error('Basic reencoder file is missing', E_USER_ERROR); |
|
796 |
}
|
|
797 |
include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx); |
|
798 |
}
|
|
799 |
return call_user_func('iso_8859_' . $array[1], $string); |
|
800 |
break; |
|
801 |
||
802 |
default: |
|
803 |
trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); |
|
804 |
break; |
|
805 |
}
|
|
806 |
}
|
|
807 |
||
808 |
// CP/WIN character encoding
|
|
809 |
if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array)) |
|
810 |
{
|
|
811 |
switch ($array[1]) |
|
812 |
{
|
|
813 |
case '932': |
|
814 |
break; |
|
815 |
case '1250': |
|
816 |
case '1251': |
|
817 |
case '1252': |
|
818 |
case '1254': |
|
819 |
case '1255': |
|
820 |
case '1256': |
|
821 |
case '1257': |
|
822 |
case '874': |
|
823 |
if (!function_exists('cp' . $array[1])) |
|
824 |
{
|
|
825 |
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx)) |
|
826 |
{
|
|
827 |
trigger_error('Basic reencoder file is missing', E_USER_ERROR); |
|
828 |
}
|
|
829 |
include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx); |
|
830 |
}
|
|
831 |
return call_user_func('cp' . $array[1], $string); |
|
832 |
break; |
|
833 |
||
834 |
default: |
|
835 |
trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); |
|
836 |
break; |
|
837 |
}
|
|
838 |
}
|
|
839 |
||
840 |
// TIS-620
|
|
841 |
if (preg_match('/tis[_ -]?620/', $encoding)) |
|
842 |
{
|
|
843 |
if (!function_exists('tis_620')) |
|
844 |
{
|
|
845 |
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx)) |
|
846 |
{
|
|
847 |
trigger_error('Basic reencoder file is missing', E_USER_ERROR); |
|
848 |
}
|
|
849 |
include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx); |
|
850 |
}
|
|
851 |
return tis_620($string); |
|
852 |
}
|
|
853 |
||
854 |
// SJIS
|
|
855 |
if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding)) |
|
856 |
{
|
|
857 |
if (!function_exists('sjis')) |
|
858 |
{
|
|
859 |
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx)) |
|
860 |
{
|
|
861 |
trigger_error('CJK reencoder file is missing', E_USER_ERROR); |
|
862 |
}
|
|
863 |
include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx); |
|
864 |
}
|
|
865 |
return sjis($string); |
|
866 |
}
|
|
867 |
||
868 |
// EUC_KR
|
|
869 |
if (preg_match('/euc[_ -]?kr/', $encoding)) |
|
870 |
{
|
|
871 |
if (!function_exists('euc_kr')) |
|
872 |
{
|
|
873 |
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx)) |
|
874 |
{
|
|
875 |
trigger_error('CJK reencoder file is missing', E_USER_ERROR); |
|
876 |
}
|
|
877 |
include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx); |
|
878 |
}
|
|
879 |
return euc_kr($string); |
|
880 |
}
|
|
881 |
||
882 |
// BIG-5
|
|
883 |
if (preg_match('/big[_ -]?5/', $encoding)) |
|
884 |
{
|
|
885 |
if (!function_exists('big5')) |
|
886 |
{
|
|
887 |
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx)) |
|
888 |
{
|
|
889 |
trigger_error('CJK reencoder file is missing', E_USER_ERROR); |
|
890 |
}
|
|
891 |
include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx); |
|
892 |
}
|
|
893 |
return big5($string); |
|
894 |
}
|
|
895 |
||
896 |
// GB2312
|
|
897 |
if (preg_match('/gb[_ -]?2312/', $encoding)) |
|
898 |
{
|
|
899 |
if (!function_exists('gb2312')) |
|
900 |
{
|
|
901 |
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx)) |
|
902 |
{
|
|
903 |
trigger_error('CJK reencoder file is missing', E_USER_ERROR); |
|
904 |
}
|
|
905 |
include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx); |
|
906 |
}
|
|
907 |
return gb2312($string); |
|
908 |
}
|
|
909 |
||
910 |
// Trigger an error?! Fow now just give bad data :-(
|
|
911 |
trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); |
|
912 |
//return $string; // use utf_normalizer::cleanup() ?
|
|
913 |
}
|
|
914 |
||
915 |
/**
|
|
916 |
* Replace all UTF-8 chars that are not in ASCII with their NCR
|
|
917 |
*
|
|
918 |
* @param string $text UTF-8 string in NFC
|
|
919 |
* @return string ASCII string using NCRs for non-ASCII chars
|
|
920 |
*/
|
|
921 |
function utf8_encode_ncr($text) |
|
922 |
{
|
|
923 |
return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text); |
|
924 |
}
|
|
925 |
||
926 |
/**
|
|
927 |
* Callback used in encode_ncr()
|
|
928 |
*
|
|
929 |
* Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
|
|
930 |
*
|
|
931 |
* @param array $m 0-based numerically indexed array passed by preg_replace_callback()
|
|
932 |
* @return string A HTML NCR if the character is valid, or the original string otherwise
|
|
933 |
*/
|
|
934 |
function utf8_encode_ncr_callback($m) |
|
935 |
{
|
|
936 |
return '&#' . utf8_ord($m[0]) . ';'; |
|
937 |
}
|
|
938 |
||
939 |
/**
|
|
940 |
* Converts a UTF-8 char to an NCR
|
|
941 |
*
|
|
942 |
* @param string $chr UTF-8 char
|
|
943 |
* @return integer UNICODE code point
|
|
944 |
*/
|
|
945 |
function utf8_ord($chr) |
|
946 |
{
|
|
947 |
switch (strlen($chr)) |
|
948 |
{
|
|
949 |
case 1: |
|
950 |
return ord($chr); |
|
951 |
break; |
|
952 |
||
953 |
case 2: |
|
954 |
return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F); |
|
955 |
break; |
|
956 |
||
957 |
case 3: |
|
958 |
return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F); |
|
959 |
break; |
|
960 |
||
961 |
case 4: |
|
962 |
return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F); |
|
963 |
break; |
|
964 |
||
965 |
default: |
|
966 |
return $chr; |
|
967 |
}
|
|
968 |
}
|
|
969 |
||
970 |
/**
|
|
971 |
* Converts an NCR to a UTF-8 char
|
|
972 |
*
|
|
973 |
* @param int $cp UNICODE code point
|
|
974 |
* @return string UTF-8 char
|
|
975 |
*/
|
|
976 |
function utf8_chr($cp) |
|
977 |
{
|
|
978 |
if ($cp > 0xFFFF) |
|
979 |
{
|
|
980 |
return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F)); |
|
981 |
}
|
|
982 |
else if ($cp > 0x7FF) |
|
983 |
{
|
|
984 |
return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F)); |
|
985 |
}
|
|
986 |
else if ($cp > 0x7F) |
|
987 |
{
|
|
988 |
return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F)); |
|
989 |
}
|
|
990 |
else
|
|
991 |
{
|
|
992 |
return chr($cp); |
|
993 |
}
|
|
994 |
}
|
|
995 |
||
996 |
/**
|
|
997 |
* Convert Numeric Character References to UTF-8 chars
|
|
998 |
*
|
|
999 |
* Notes:
|
|
1000 |
* - we do not convert NCRs recursively, if you pass &#38; it will return &
|
|
1001 |
* - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
|
|
1002 |
*
|
|
1003 |
* @param string $text String to convert, encoded in UTF-8 (no normal form required)
|
|
1004 |
* @return string UTF-8 string where NCRs have been replaced with the actual chars
|
|
1005 |
*/
|
|
1006 |
function utf8_decode_ncr($text) |
|
1007 |
{
|
|
1008 |
return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text); |
|
1009 |
}
|
|
1010 |
||
1011 |
/**
|
|
1012 |
* Callback used in decode_ncr()
|
|
1013 |
*
|
|
1014 |
* Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
|
|
1015 |
* It will ignore most of invalid NCRs, but not all!
|
|
1016 |
*
|
|
1017 |
* @param array $m 0-based numerically indexed array passed by preg_replace_callback()
|
|
1018 |
* @return string UTF-8 char
|
|
1019 |
*/
|
|
1020 |
function utf8_decode_ncr_callback($m) |
|
1021 |
{
|
|
1022 |
$cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1)); |
|
1023 |
||
1024 |
return utf8_chr($cp); |
|
1025 |
}
|
|
1026 |
||
1027 |
/**
|
|
1028 |
* Case folds a unicode string as per Unicode 5.0, section 3.13
|
|
1029 |
*
|
|
1030 |
* @param string $text text to be case folded
|
|
1031 |
* @param string $option determines how we will fold the cases
|
|
1032 |
* @return string case folded text
|
|
1033 |
*/
|
|
1034 |
function utf8_case_fold($text, $option = 'full') |
|
1035 |
{
|
|
1036 |
static $uniarray = array(); |
|
1037 |
global $phpbb_root_path, $phpEx; |
|
1038 |
||
1039 |
// common is always set
|
|
1040 |
if (!isset($uniarray['c'])) |
|
1041 |
{
|
|
1042 |
$uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx); |
|
1043 |
}
|
|
1044 |
||
1045 |
// only set full if we need to
|
|
1046 |
if ($option === 'full' && !isset($uniarray['f'])) |
|
1047 |
{
|
|
1048 |
$uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx); |
|
1049 |
}
|
|
1050 |
||
1051 |
// only set simple if we need to
|
|
1052 |
if ($option !== 'full' && !isset($uniarray['s'])) |
|
1053 |
{
|
|
1054 |
$uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx); |
|
1055 |
}
|
|
1056 |
||
1057 |
// common is always replaced
|
|
1058 |
$text = strtr($text, $uniarray['c']); |
|
1059 |
||
1060 |
if ($option === 'full') |
|
1061 |
{
|
|
1062 |
// full replaces a character with multiple characters
|
|
1063 |
$text = strtr($text, $uniarray['f']); |
|
1064 |
}
|
|
1065 |
else
|
|
1066 |
{
|
|
1067 |
// simple replaces a character with another character
|
|
1068 |
$text = strtr($text, $uniarray['s']); |
|
1069 |
}
|
|
1070 |
||
1071 |
return $text; |
|
1072 |
}
|
|
1073 |
||
1074 |
/**
|
|
1075 |
* Takes the input and does a "special" case fold. It does minor normalization
|
|
1076 |
* and returns NFKC compatable text
|
|
1077 |
*
|
|
1078 |
* @param string $text text to be case folded
|
|
1079 |
* @param string $option determines how we will fold the cases
|
|
1080 |
* @return string case folded text
|
|
1081 |
*/
|
|
1082 |
function utf8_case_fold_nfkc($text, $option = 'full') |
|
1083 |
{
|
|
1084 |
static $fc_nfkc_closure = array( |
|
1085 |
"\xCD\xBA" => "\x20\xCE\xB9", |
|
1086 |
"\xCF\x92" => "\xCF\x85", |
|
1087 |
"\xCF\x93" => "\xCF\x8D", |
|
1088 |
"\xCF\x94" => "\xCF\x8B", |
|
1089 |
"\xCF\xB2" => "\xCF\x83", |
|
1090 |
"\xCF\xB9" => "\xCF\x83", |
|
1091 |
"\xE1\xB4\xAC" => "\x61", |
|
1092 |
"\xE1\xB4\xAD" => "\xC3\xA6", |
|
1093 |
"\xE1\xB4\xAE" => "\x62", |
|
1094 |
"\xE1\xB4\xB0" => "\x64", |
|
1095 |
"\xE1\xB4\xB1" => "\x65", |
|
1096 |
"\xE1\xB4\xB2" => "\xC7\x9D", |
|
1097 |
"\xE1\xB4\xB3" => "\x67", |
|
1098 |
"\xE1\xB4\xB4" => "\x68", |
|
1099 |
"\xE1\xB4\xB5" => "\x69", |
|
1100 |
"\xE1\xB4\xB6" => "\x6A", |
|
1101 |
"\xE1\xB4\xB7" => "\x6B", |
|
1102 |
"\xE1\xB4\xB8" => "\x6C", |
|
1103 |
"\xE1\xB4\xB9" => "\x6D", |
|
1104 |
"\xE1\xB4\xBA" => "\x6E", |
|
1105 |
"\xE1\xB4\xBC" => "\x6F", |
|
1106 |
"\xE1\xB4\xBD" => "\xC8\xA3", |
|
1107 |
"\xE1\xB4\xBE" => "\x70", |
|
1108 |
"\xE1\xB4\xBF" => "\x72", |
|
1109 |
"\xE1\xB5\x80" => "\x74", |
|
1110 |
"\xE1\xB5\x81" => "\x75", |
|
1111 |
"\xE1\xB5\x82" => "\x77", |
|
1112 |
"\xE2\x82\xA8" => "\x72\x73", |
|
1113 |
"\xE2\x84\x82" => "\x63", |
|
1114 |
"\xE2\x84\x83" => "\xC2\xB0\x63", |
|
1115 |
"\xE2\x84\x87" => "\xC9\x9B", |
|
1116 |
"\xE2\x84\x89" => "\xC2\xB0\x66", |
|
1117 |
"\xE2\x84\x8B" => "\x68", |
|
1118 |
"\xE2\x84\x8C" => "\x68", |
|
1119 |
"\xE2\x84\x8D" => "\x68", |
|
1120 |
"\xE2\x84\x90" => "\x69", |
|
1121 |
"\xE2\x84\x91" => "\x69", |
|
1122 |
"\xE2\x84\x92" => "\x6C", |
|
1123 |
"\xE2\x84\x95" => "\x6E", |
|
1124 |
"\xE2\x84\x96" => "\x6E\x6F", |
|
1125 |
"\xE2\x84\x99" => "\x70", |
|
1126 |
"\xE2\x84\x9A" => "\x71", |
|
1127 |
"\xE2\x84\x9B" => "\x72", |
|
1128 |
"\xE2\x84\x9C" => "\x72", |
|
1129 |
"\xE2\x84\x9D" => "\x72", |
|
1130 |
"\xE2\x84\xA0" => "\x73\x6D", |
|
1131 |
"\xE2\x84\xA1" => "\x74\x65\x6C", |
|
1132 |
"\xE2\x84\xA2" => "\x74\x6D", |
|
1133 |
"\xE2\x84\xA4" => "\x7A", |
|
1134 |
"\xE2\x84\xA8" => "\x7A", |
|
1135 |
"\xE2\x84\xAC" => "\x62", |
|
1136 |
"\xE2\x84\xAD" => "\x63", |
|
1137 |
"\xE2\x84\xB0" => "\x65", |
|
1138 |
"\xE2\x84\xB1" => "\x66", |
|
1139 |
"\xE2\x84\xB3" => "\x6D", |
|
1140 |
"\xE2\x84\xBB" => "\x66\x61\x78", |
|
1141 |
"\xE2\x84\xBE" => "\xCE\xB3", |
|
1142 |
"\xE2\x84\xBF" => "\xCF\x80", |
|
1143 |
"\xE2\x85\x85" => "\x64", |
|
1144 |
"\xE3\x89\x90" => "\x70\x74\x65", |
|
1145 |
"\xE3\x8B\x8C" => "\x68\x67", |
|
1146 |
"\xE3\x8B\x8E" => "\x65\x76", |
|
1147 |
"\xE3\x8B\x8F" => "\x6C\x74\x64", |
|
1148 |
"\xE3\x8D\xB1" => "\x68\x70\x61", |
|
1149 |
"\xE3\x8D\xB3" => "\x61\x75", |
|
1150 |
"\xE3\x8D\xB5" => "\x6F\x76", |
|
1151 |
"\xE3\x8D\xBA" => "\x69\x75", |
|
1152 |
"\xE3\x8E\x80" => "\x70\x61", |
|
1153 |
"\xE3\x8E\x81" => "\x6E\x61", |
|
1154 |
"\xE3\x8E\x82" => "\xCE\xBC\x61", |
|
1155 |
"\xE3\x8E\x83" => "\x6D\x61", |
|
1156 |
"\xE3\x8E\x84" => "\x6B\x61", |
|
1157 |
"\xE3\x8E\x85" => "\x6B\x62", |
|
1158 |
"\xE3\x8E\x86" => "\x6D\x62", |
|
1159 |
"\xE3\x8E\x87" => "\x67\x62", |
|
1160 |
"\xE3\x8E\x8A" => "\x70\x66", |
|
1161 |
"\xE3\x8E\x8B" => "\x6E\x66", |
|
1162 |
"\xE3\x8E\x8C" => "\xCE\xBC\x66", |
|
1163 |
"\xE3\x8E\x90" => "\x68\x7A", |
|
1164 |
"\xE3\x8E\x91" => "\x6B\x68\x7A", |
|
1165 |
"\xE3\x8E\x92" => "\x6D\x68\x7A", |
|
1166 |
"\xE3\x8E\x93" => "\x67\x68\x7A", |
|
1167 |
"\xE3\x8E\x94" => "\x74\x68\x7A", |
|
1168 |
"\xE3\x8E\xA9" => "\x70\x61", |
|
1169 |
"\xE3\x8E\xAA" => "\x6B\x70\x61", |
|
1170 |
"\xE3\x8E\xAB" => "\x6D\x70\x61", |
|
1171 |
"\xE3\x8E\xAC" => "\x67\x70\x61", |
|
1172 |
"\xE3\x8E\xB4" => "\x70\x76", |
|
1173 |
"\xE3\x8E\xB5" => "\x6E\x76", |
|
1174 |
"\xE3\x8E\xB6" => "\xCE\xBC\x76", |
|
1175 |
"\xE3\x8E\xB7" => "\x6D\x76", |
|
1176 |
"\xE3\x8E\xB8" => "\x6B\x76", |
|
1177 |
"\xE3\x8E\xB9" => "\x6D\x76", |
|
1178 |
"\xE3\x8E\xBA" => "\x70\x77", |
|
1179 |
"\xE3\x8E\xBB" => "\x6E\x77", |
|
1180 |
"\xE3\x8E\xBC" => "\xCE\xBC\x77", |
|
1181 |
"\xE3\x8E\xBD" => "\x6D\x77", |
|
1182 |
"\xE3\x8E\xBE" => "\x6B\x77", |
|
1183 |
"\xE3\x8E\xBF" => "\x6D\x77", |
|
1184 |
"\xE3\x8F\x80" => "\x6B\xCF\x89", |
|
1185 |
"\xE3\x8F\x81" => "\x6D\xCF\x89", |
|
1186 |
"\xE3\x8F\x83" => "\x62\x71", |
|
1187 |
"\xE3\x8F\x86" => "\x63\xE2\x88\x95\x6B\x67", |
|
1188 |
"\xE3\x8F\x87" => "\x63\x6F\x2E", |
|
1189 |
"\xE3\x8F\x88" => "\x64\x62", |
|
1190 |
"\xE3\x8F\x89" => "\x67\x79", |
|
1191 |
"\xE3\x8F\x8B" => "\x68\x70", |
|
1192 |
"\xE3\x8F\x8D" => "\x6B\x6B", |
|
1193 |
"\xE3\x8F\x8E" => "\x6B\x6D", |
|
1194 |
"\xE3\x8F\x97" => "\x70\x68", |
|
1195 |
"\xE3\x8F\x99" => "\x70\x70\x6D", |
|
1196 |
"\xE3\x8F\x9A" => "\x70\x72", |
|
1197 |
"\xE3\x8F\x9C" => "\x73\x76", |
|
1198 |
"\xE3\x8F\x9D" => "\x77\x62", |
|
1199 |
"\xE3\x8F\x9E" => "\x76\xE2\x88\x95\x6D", |
|
1200 |
"\xE3\x8F\x9F" => "\x61\xE2\x88\x95\x6D", |
|
1201 |
"\xF0\x9D\x90\x80" => "\x61", |
|
1202 |
"\xF0\x9D\x90\x81" => "\x62", |
|
1203 |
"\xF0\x9D\x90\x82" => "\x63", |
|
1204 |
"\xF0\x9D\x90\x83" => "\x64", |
|
1205 |
"\xF0\x9D\x90\x84" => "\x65", |
|
1206 |
"\xF0\x9D\x90\x85" => "\x66", |
|
1207 |
"\xF0\x9D\x90\x86" => "\x67", |
|
1208 |
"\xF0\x9D\x90\x87" => "\x68", |
|
1209 |
"\xF0\x9D\x90\x88" => "\x69", |
|
1210 |
"\xF0\x9D\x90\x89" => "\x6A", |
|
1211 |
"\xF0\x9D\x90\x8A" => "\x6B", |
|
1212 |
"\xF0\x9D\x90\x8B" => "\x6C", |
|
1213 |
"\xF0\x9D\x90\x8C" => "\x6D", |
|
1214 |
"\xF0\x9D\x90\x8D" => "\x6E", |
|
1215 |
"\xF0\x9D\x90\x8E" => "\x6F", |
|
1216 |
"\xF0\x9D\x90\x8F" => "\x70", |
|
1217 |
"\xF0\x9D\x90\x90" => "\x71", |
|
1218 |
"\xF0\x9D\x90\x91" => "\x72", |
|
1219 |
"\xF0\x9D\x90\x92" => "\x73", |
|
1220 |
"\xF0\x9D\x90\x93" => "\x74", |
|
1221 |
"\xF0\x9D\x90\x94" => "\x75", |
|
1222 |
"\xF0\x9D\x90\x95" => "\x76", |
|
1223 |
"\xF0\x9D\x90\x96" => "\x77", |
|
1224 |
"\xF0\x9D\x90\x97" => "\x78", |
|
1225 |
"\xF0\x9D\x90\x98" => "\x79", |
|
1226 |
"\xF0\x9D\x90\x99" => "\x7A", |
|
1227 |
"\xF0\x9D\x90\xB4" => "\x61", |
|
1228 |
"\xF0\x9D\x90\xB5" => "\x62", |
|
1229 |
"\xF0\x9D\x90\xB6" => "\x63", |
|
1230 |
"\xF0\x9D\x90\xB7" => "\x64", |
|
1231 |
"\xF0\x9D\x90\xB8" => "\x65", |
|
1232 |
"\xF0\x9D\x90\xB9" => "\x66", |
|
1233 |
"\xF0\x9D\x90\xBA" => "\x67", |
|
1234 |
"\xF0\x9D\x90\xBB" => "\x68", |
|
1235 |
"\xF0\x9D\x90\xBC" => "\x69", |
|
1236 |
"\xF0\x9D\x90\xBD" => "\x6A", |
|
1237 |
"\xF0\x9D\x90\xBE" => "\x6B", |
|
1238 |
"\xF0\x9D\x90\xBF" => "\x6C", |
|
1239 |
"\xF0\x9D\x91\x80" => "\x6D", |
|
1240 |
"\xF0\x9D\x91\x81" => "\x6E", |
|
1241 |
"\xF0\x9D\x91\x82" => "\x6F", |
|
1242 |
"\xF0\x9D\x91\x83" => "\x70", |
|
1243 |
"\xF0\x9D\x91\x84" => "\x71", |
|
1244 |
"\xF0\x9D\x91\x85" => "\x72", |
|
1245 |
"\xF0\x9D\x91\x86" => "\x73", |
|
1246 |
"\xF0\x9D\x91\x87" => "\x74", |
|
1247 |
"\xF0\x9D\x91\x88" => "\x75", |
|
1248 |
"\xF0\x9D\x91\x89" => "\x76", |
|
1249 |
"\xF0\x9D\x91\x8A" => "\x77", |
|
1250 |
"\xF0\x9D\x91\x8B" => "\x78", |
|
1251 |
"\xF0\x9D\x91\x8C" => "\x79", |
|
1252 |
"\xF0\x9D\x91\x8D" => "\x7A", |
|
1253 |
"\xF0\x9D\x91\xA8" => "\x61", |
|
1254 |
"\xF0\x9D\x91\xA9" => "\x62", |
|
1255 |
"\xF0\x9D\x91\xAA" => "\x63", |
|
1256 |
"\xF0\x9D\x91\xAB" => "\x64", |
|
1257 |
"\xF0\x9D\x91\xAC" => "\x65", |
|
1258 |
"\xF0\x9D\x91\xAD" => "\x66", |
|
1259 |
"\xF0\x9D\x91\xAE" => "\x67", |
|
1260 |
"\xF0\x9D\x91\xAF" => "\x68", |
|
1261 |
"\xF0\x9D\x91\xB0" => "\x69", |
|
1262 |
"\xF0\x9D\x91\xB1" => "\x6A", |
|
1263 |
"\xF0\x9D\x91\xB2" => "\x6B", |
|
1264 |
"\xF0\x9D\x91\xB3" => "\x6C", |
|
1265 |
"\xF0\x9D\x91\xB4" => "\x6D", |
|
1266 |
"\xF0\x9D\x91\xB5" => "\x6E", |
|
1267 |
"\xF0\x9D\x91\xB6" => "\x6F", |
|
1268 |
"\xF0\x9D\x91\xB7" => "\x70", |
|
1269 |
"\xF0\x9D\x91\xB8" => "\x71", |
|
1270 |
"\xF0\x9D\x91\xB9" => "\x72", |
|
1271 |
"\xF0\x9D\x91\xBA" => "\x73", |
|
1272 |
"\xF0\x9D\x91\xBB" => "\x74", |
|
1273 |
"\xF0\x9D\x91\xBC" => "\x75", |
|
1274 |
"\xF0\x9D\x91\xBD" => "\x76", |
|
1275 |
"\xF0\x9D\x91\xBE" => "\x77", |
|
1276 |
"\xF0\x9D\x91\xBF" => "\x78", |
|
1277 |
"\xF0\x9D\x92\x80" => "\x79", |
|
1278 |
"\xF0\x9D\x92\x81" => "\x7A", |
|
1279 |
"\xF0\x9D\x92\x9C" => "\x61", |
|
1280 |
"\xF0\x9D\x92\x9E" => "\x63", |
|
1281 |
"\xF0\x9D\x92\x9F" => "\x64", |
|
1282 |
"\xF0\x9D\x92\xA2" => "\x67", |
|
1283 |
"\xF0\x9D\x92\xA5" => "\x6A", |
|
1284 |
"\xF0\x9D\x92\xA6" => "\x6B", |
|
1285 |
"\xF0\x9D\x92\xA9" => "\x6E", |
|
1286 |
"\xF0\x9D\x92\xAA" => "\x6F", |
|
1287 |
"\xF0\x9D\x92\xAB" => "\x70", |
|
1288 |
"\xF0\x9D\x92\xAC" => "\x71", |
|
1289 |
"\xF0\x9D\x92\xAE" => "\x73", |
|
1290 |
"\xF0\x9D\x92\xAF" => "\x74", |
|
1291 |
"\xF0\x9D\x92\xB0" => "\x75", |
|
1292 |
"\xF0\x9D\x92\xB1" => "\x76", |
|
1293 |
"\xF0\x9D\x92\xB2" => "\x77", |
|
1294 |
"\xF0\x9D\x92\xB3" => "\x78", |
|
1295 |
"\xF0\x9D\x92\xB4" => "\x79", |
|
1296 |
"\xF0\x9D\x92\xB5" => "\x7A", |
|
1297 |
"\xF0\x9D\x93\x90" => "\x61", |
|
1298 |
"\xF0\x9D\x93\x91" => "\x62", |
|
1299 |
"\xF0\x9D\x93\x92" => "\x63", |
|
1300 |
"\xF0\x9D\x93\x93" => "\x64", |
|
1301 |
"\xF0\x9D\x93\x94" => "\x65", |
|
1302 |
"\xF0\x9D\x93\x95" => "\x66", |
|
1303 |
"\xF0\x9D\x93\x96" => "\x67", |
|
1304 |
"\xF0\x9D\x93\x97" => "\x68", |
|
1305 |
"\xF0\x9D\x93\x98" => "\x69", |
|
1306 |
"\xF0\x9D\x93\x99" => "\x6A", |
|
1307 |
"\xF0\x9D\x93\x9A" => "\x6B", |
|
1308 |
"\xF0\x9D\x93\x9B" => "\x6C", |
|
1309 |
"\xF0\x9D\x93\x9C" => "\x6D", |
|
1310 |
"\xF0\x9D\x93\x9D" => "\x6E", |
|
1311 |
"\xF0\x9D\x93\x9E" => "\x6F", |
|
1312 |
"\xF0\x9D\x93\x9F" => "\x70", |
|
1313 |
"\xF0\x9D\x93\xA0" => "\x71", |
|
1314 |
"\xF0\x9D\x93\xA1" => "\x72", |
|
1315 |
"\xF0\x9D\x93\xA2" => "\x73", |
|
1316 |
"\xF0\x9D\x93\xA3" => "\x74", |
|
1317 |
"\xF0\x9D\x93\xA4" => "\x75", |
|
1318 |
"\xF0\x9D\x93\xA5" => "\x76", |
|
1319 |
"\xF0\x9D\x93\xA6" => "\x77", |
|
1320 |
"\xF0\x9D\x93\xA7" => "\x78", |
|
1321 |
"\xF0\x9D\x93\xA8" => "\x79", |
|
1322 |
"\xF0\x9D\x93\xA9" => "\x7A", |
|
1323 |
"\xF0\x9D\x94\x84" => "\x61", |
|
1324 |
"\xF0\x9D\x94\x85" => "\x62", |
|
1325 |
"\xF0\x9D\x94\x87" => "\x64", |
|
1326 |
"\xF0\x9D\x94\x88" => "\x65", |
|
1327 |
"\xF0\x9D\x94\x89" => "\x66", |
|
1328 |
"\xF0\x9D\x94\x8A" => "\x67", |
|
1329 |
"\xF0\x9D\x94\x8D" => "\x6A", |
|
1330 |
"\xF0\x9D\x94\x8E" => "\x6B", |
|
1331 |
"\xF0\x9D\x94\x8F" => "\x6C", |
|
1332 |
"\xF0\x9D\x94\x90" => "\x6D", |
|
1333 |
"\xF0\x9D\x94\x91" => "\x6E", |
|
1334 |
"\xF0\x9D\x94\x92" => "\x6F", |
|
1335 |
"\xF0\x9D\x94\x93" => "\x70", |
|
1336 |
"\xF0\x9D\x94\x94" => "\x71", |
|
1337 |
"\xF0\x9D\x94\x96" => "\x73", |
|
1338 |
"\xF0\x9D\x94\x97" => "\x74", |
|
1339 |
"\xF0\x9D\x94\x98" => "\x75", |
|
1340 |
"\xF0\x9D\x94\x99" => "\x76", |
|
1341 |
"\xF0\x9D\x94\x9A" => "\x77", |
|
1342 |
"\xF0\x9D\x94\x9B" => "\x78", |
|
1343 |
"\xF0\x9D\x94\x9C" => "\x79", |
|
1344 |
"\xF0\x9D\x94\xB8" => "\x61", |
|
1345 |
"\xF0\x9D\x94\xB9" => "\x62", |
|
1346 |
"\xF0\x9D\x94\xBB" => "\x64", |
|
1347 |
"\xF0\x9D\x94\xBC" => "\x65", |
|
1348 |
"\xF0\x9D\x94\xBD" => "\x66", |
|
1349 |
"\xF0\x9D\x94\xBE" => "\x67", |
|
1350 |
"\xF0\x9D\x95\x80" => "\x69", |
|
1351 |
"\xF0\x9D\x95\x81" => "\x6A", |
|
1352 |
"\xF0\x9D\x95\x82" => "\x6B", |
|
1353 |
"\xF0\x9D\x95\x83" => "\x6C", |
|
1354 |
"\xF0\x9D\x95\x84" => "\x6D", |
|
1355 |
"\xF0\x9D\x95\x86" => "\x6F", |
|
1356 |
"\xF0\x9D\x95\x8A" => "\x73", |
|
1357 |
"\xF0\x9D\x95\x8B" => "\x74", |
|
1358 |
"\xF0\x9D\x95\x8C" => "\x75", |
|
1359 |
"\xF0\x9D\x95\x8D" => "\x76", |
|
1360 |
"\xF0\x9D\x95\x8E" => "\x77", |
|
1361 |
"\xF0\x9D\x95\x8F" => "\x78", |
|
1362 |
"\xF0\x9D\x95\x90" => "\x79", |
|
1363 |
"\xF0\x9D\x95\xAC" => "\x61", |
|
1364 |
"\xF0\x9D\x95\xAD" => "\x62", |
|
1365 |
"\xF0\x9D\x95\xAE" => "\x63", |
|
1366 |
"\xF0\x9D\x95\xAF" => "\x64", |
|
1367 |
"\xF0\x9D\x95\xB0" => "\x65", |
|
1368 |
"\xF0\x9D\x95\xB1" => "\x66", |
|
1369 |
"\xF0\x9D\x95\xB2" => "\x67", |
|
1370 |
"\xF0\x9D\x95\xB3" => "\x68", |
|
1371 |
"\xF0\x9D\x95\xB4" => "\x69", |
|
1372 |
"\xF0\x9D\x95\xB5" => "\x6A", |
|
1373 |
"\xF0\x9D\x95\xB6" => "\x6B", |
|
1374 |
"\xF0\x9D\x95\xB7" => "\x6C", |
|
1375 |
"\xF0\x9D\x95\xB8" => "\x6D", |
|
1376 |
"\xF0\x9D\x95\xB9" => "\x6E", |
|
1377 |
"\xF0\x9D\x95\xBA" => "\x6F", |
|
1378 |
"\xF0\x9D\x95\xBB" => "\x70", |
|
1379 |
"\xF0\x9D\x95\xBC" => "\x71", |
|
1380 |
"\xF0\x9D\x95\xBD" => "\x72", |
|
1381 |
"\xF0\x9D\x95\xBE" => "\x73", |
|
1382 |
"\xF0\x9D\x95\xBF" => "\x74", |
|
1383 |
"\xF0\x9D\x96\x80" => "\x75", |
|
1384 |
"\xF0\x9D\x96\x81" => "\x76", |
|
1385 |
"\xF0\x9D\x96\x82" => "\x77", |
|
1386 |
"\xF0\x9D\x96\x83" => "\x78", |
|
1387 |
"\xF0\x9D\x96\x84" => "\x79", |
|
1388 |
"\xF0\x9D\x96\x85" => "\x7A", |
|
1389 |
"\xF0\x9D\x96\xA0" => "\x61", |
|
1390 |
"\xF0\x9D\x96\xA1" => "\x62", |
|
1391 |
"\xF0\x9D\x96\xA2" => "\x63", |
|
1392 |
"\xF0\x9D\x96\xA3" => "\x64", |
|
1393 |
"\xF0\x9D\x96\xA4" => "\x65", |
|
1394 |
"\xF0\x9D\x96\xA5" => "\x66", |
|
1395 |
"\xF0\x9D\x96\xA6" => "\x67", |
|
1396 |
"\xF0\x9D\x96\xA7" => "\x68", |
|
1397 |
"\xF0\x9D\x96\xA8" => "\x69", |
|
1398 |
"\xF0\x9D\x96\xA9" => "\x6A", |
|
1399 |
"\xF0\x9D\x96\xAA" => "\x6B", |
|
1400 |
"\xF0\x9D\x96\xAB" => "\x6C", |
|
1401 |
"\xF0\x9D\x96\xAC" => "\x6D", |
|
1402 |
"\xF0\x9D\x96\xAD" => "\x6E", |
|
1403 |
"\xF0\x9D\x96\xAE" => "\x6F", |
|
1404 |
"\xF0\x9D\x96\xAF" => "\x70", |
|
1405 |
"\xF0\x9D\x96\xB0" => "\x71", |
|
1406 |
"\xF0\x9D\x96\xB1" => "\x72", |
|
1407 |
"\xF0\x9D\x96\xB2" => "\x73", |
|
1408 |
"\xF0\x9D\x96\xB3" => "\x74", |
|
1409 |
"\xF0\x9D\x96\xB4" => "\x75", |
|
1410 |
"\xF0\x9D\x96\xB5" => "\x76", |
|
1411 |
"\xF0\x9D\x96\xB6" => "\x77", |
|
1412 |
"\xF0\x9D\x96\xB7" => "\x78", |
|
1413 |
"\xF0\x9D\x96\xB8" => "\x79", |
|
1414 |
"\xF0\x9D\x96\xB9" => "\x7A", |
|
1415 |
"\xF0\x9D\x97\x94" => "\x61", |
|
1416 |
"\xF0\x9D\x97\x95" => "\x62", |
|
1417 |
"\xF0\x9D\x97\x96" => "\x63", |
|
1418 |
"\xF0\x9D\x97\x97" => "\x64", |
|
1419 |
"\xF0\x9D\x97\x98" => "\x65", |
|
1420 |
"\xF0\x9D\x97\x99" => "\x66", |
|
1421 |
"\xF0\x9D\x97\x9A" => "\x67", |
|
1422 |
"\xF0\x9D\x97\x9B" => "\x68", |
|
1423 |
"\xF0\x9D\x97\x9C" => "\x69", |
|
1424 |
"\xF0\x9D\x97\x9D" => "\x6A", |
|
1425 |
"\xF0\x9D\x97\x9E" => "\x6B", |
|
1426 |
"\xF0\x9D\x97\x9F" => "\x6C", |
|
1427 |
"\xF0\x9D\x97\xA0" => "\x6D", |
|
1428 |
"\xF0\x9D\x97\xA1" => "\x6E", |
|
1429 |
"\xF0\x9D\x97\xA2" => "\x6F", |
|
1430 |
"\xF0\x9D\x97\xA3" => "\x70", |
|
1431 |
"\xF0\x9D\x97\xA4" => "\x71", |
|
1432 |
"\xF0\x9D\x97\xA5" => "\x72", |
|
1433 |
"\xF0\x9D\x97\xA6" => "\x73", |
|
1434 |
"\xF0\x9D\x97\xA7" => "\x74", |
|
1435 |
"\xF0\x9D\x97\xA8" => "\x75", |
|
1436 |
"\xF0\x9D\x97\xA9" => "\x76", |
|
1437 |
"\xF0\x9D\x97\xAA" => "\x77", |
|
1438 |
"\xF0\x9D\x97\xAB" => "\x78", |
|
1439 |
"\xF0\x9D\x97\xAC" => "\x79", |
|
1440 |
"\xF0\x9D\x97\xAD" => "\x7A", |
|
1441 |
"\xF0\x9D\x98\x88" => "\x61", |
|
1442 |
"\xF0\x9D\x98\x89" => "\x62", |
|
1443 |
"\xF0\x9D\x98\x8A" => "\x63", |
|
1444 |
"\xF0\x9D\x98\x8B" => "\x64", |
|
1445 |
"\xF0\x9D\x98\x8C" => "\x65", |
|
1446 |
"\xF0\x9D\x98\x8D" => "\x66", |
|
1447 |
"\xF0\x9D\x98\x8E" => "\x67", |
|
1448 |
"\xF0\x9D\x98\x8F" => "\x68", |
|
1449 |
"\xF0\x9D\x98\x90" => "\x69", |
|
1450 |
"\xF0\x9D\x98\x91" => "\x6A", |
|
1451 |
"\xF0\x9D\x98\x92" => "\x6B", |
|
1452 |
"\xF0\x9D\x98\x93" => "\x6C", |
|
1453 |
"\xF0\x9D\x98\x94" => "\x6D", |
|
1454 |
"\xF0\x9D\x98\x95" => "\x6E", |
|
1455 |
"\xF0\x9D\x98\x96" => "\x6F", |
|
1456 |
"\xF0\x9D\x98\x97" => "\x70", |
|
1457 |
"\xF0\x9D\x98\x98" => "\x71", |
|
1458 |
"\xF0\x9D\x98\x99" => "\x72", |
|
1459 |
"\xF0\x9D\x98\x9A" => "\x73", |
|
1460 |
"\xF0\x9D\x98\x9B" => "\x74", |
|
1461 |
"\xF0\x9D\x98\x9C" => "\x75", |
|
1462 |
"\xF0\x9D\x98\x9D" => "\x76", |
|
1463 |
"\xF0\x9D\x98\x9E" => "\x77", |
|
1464 |
"\xF0\x9D\x98\x9F" => "\x78", |
|
1465 |
"\xF0\x9D\x98\xA0" => "\x79", |
|
1466 |
"\xF0\x9D\x98\xA1" => "\x7A", |
|
1467 |
"\xF0\x9D\x98\xBC" => "\x61", |
|
1468 |
"\xF0\x9D\x98\xBD" => "\x62", |
|
1469 |
"\xF0\x9D\x98\xBE" => "\x63", |
|
1470 |
"\xF0\x9D\x98\xBF" => "\x64", |
|
1471 |
"\xF0\x9D\x99\x80" => "\x65", |
|
1472 |
"\xF0\x9D\x99\x81" => "\x66", |
|
1473 |
"\xF0\x9D\x99\x82" => "\x67", |
|
1474 |
"\xF0\x9D\x99\x83" => "\x68", |
|
1475 |
"\xF0\x9D\x99\x84" => "\x69", |
|
1476 |
"\xF0\x9D\x99\x85" => "\x6A", |
|
1477 |
"\xF0\x9D\x99\x86" => "\x6B", |
|
1478 |
"\xF0\x9D\x99\x87" => "\x6C", |
|
1479 |
"\xF0\x9D\x99\x88" => "\x6D", |
|
1480 |
"\xF0\x9D\x99\x89" => "\x6E", |
|
1481 |
"\xF0\x9D\x99\x8A" => "\x6F", |
|
1482 |
"\xF0\x9D\x99\x8B" => "\x70", |
|
1483 |
"\xF0\x9D\x99\x8C" => "\x71", |
|
1484 |
"\xF0\x9D\x99\x8D" => "\x72", |
|
1485 |
"\xF0\x9D\x99\x8E" => "\x73", |
|
1486 |
"\xF0\x9D\x99\x8F" => "\x74", |
|
1487 |
"\xF0\x9D\x99\x90" => "\x75", |
|
1488 |
"\xF0\x9D\x99\x91" => "\x76", |
|
1489 |
"\xF0\x9D\x99\x92" => "\x77", |
|
1490 |
"\xF0\x9D\x99\x93" => "\x78", |
|
1491 |
"\xF0\x9D\x99\x94" => "\x79", |
|
1492 |
"\xF0\x9D\x99\x95" => "\x7A", |
|
1493 |
"\xF0\x9D\x99\xB0" => "\x61", |
|
1494 |
"\xF0\x9D\x99\xB1" => "\x62", |
|
1495 |
"\xF0\x9D\x99\xB2" => "\x63", |
|
1496 |
"\xF0\x9D\x99\xB3" => "\x64", |
|
1497 |
"\xF0\x9D\x99\xB4" => "\x65", |
|
1498 |
"\xF0\x9D\x99\xB5" => "\x66", |
|
1499 |
"\xF0\x9D\x99\xB6" => "\x67", |
|
1500 |
"\xF0\x9D\x99\xB7" => "\x68", |
|
1501 |
"\xF0\x9D\x99\xB8" => "\x69", |
|
1502 |
"\xF0\x9D\x99\xB9" => "\x6A", |
|
1503 |
"\xF0\x9D\x99\xBA" => "\x6B", |
|
1504 |
"\xF0\x9D\x99\xBB" => "\x6C", |
|
1505 |
"\xF0\x9D\x99\xBC" => "\x6D", |
|
1506 |
"\xF0\x9D\x99\xBD" => "\x6E", |
|
1507 |
"\xF0\x9D\x99\xBE" => "\x6F", |
|
1508 |
"\xF0\x9D\x99\xBF" => "\x70", |
|
1509 |
"\xF0\x9D\x9A\x80" => "\x71", |
|
1510 |
"\xF0\x9D\x9A\x81" => "\x72", |
|
1511 |
"\xF0\x9D\x9A\x82" => "\x73", |
|
1512 |
"\xF0\x9D\x9A\x83" => "\x74", |
|
1513 |
"\xF0\x9D\x9A\x84" => "\x75", |
|
1514 |
"\xF0\x9D\x9A\x85" => "\x76", |
|
1515 |
"\xF0\x9D\x9A\x86" => "\x77", |
|
1516 |
"\xF0\x9D\x9A\x87" => "\x78", |
|
1517 |
"\xF0\x9D\x9A\x88" => "\x79", |
|
1518 |
"\xF0\x9D\x9A\x89" => "\x7A", |
|
1519 |
"\xF0\x9D\x9A\xA8" => "\xCE\xB1", |
|
1520 |
"\xF0\x9D\x9A\xA9" => "\xCE\xB2", |
|
1521 |
"\xF0\x9D\x9A\xAA" => "\xCE\xB3", |
|
1522 |
"\xF0\x9D\x9A\xAB" => "\xCE\xB4", |
|
1523 |
"\xF0\x9D\x9A\xAC" => "\xCE\xB5", |
|
1524 |
"\xF0\x9D\x9A\xAD" => "\xCE\xB6", |
|
1525 |
"\xF0\x9D\x9A\xAE" => "\xCE\xB7", |
|
1526 |
"\xF0\x9D\x9A\xAF" => "\xCE\xB8", |
|
1527 |
"\xF0\x9D\x9A\xB0" => "\xCE\xB9", |
|
1528 |
"\xF0\x9D\x9A\xB1" => "\xCE\xBA", |
|
1529 |
"\xF0\x9D\x9A\xB2" => "\xCE\xBB", |
|
1530 |
"\xF0\x9D\x9A\xB3" => "\xCE\xBC", |
|
1531 |
"\xF0\x9D\x9A\xB4" => "\xCE\xBD", |
|
1532 |
"\xF0\x9D\x9A\xB5" => "\xCE\xBE", |
|
1533 |
"\xF0\x9D\x9A\xB6" => "\xCE\xBF", |
|
1534 |
"\xF0\x9D\x9A\xB7" => "\xCF\x80", |
|
1535 |
"\xF0\x9D\x9A\xB8" => "\xCF\x81", |
|
1536 |
"\xF0\x9D\x9A\xB9" => "\xCE\xB8", |
|
1537 |
"\xF0\x9D\x9A\xBA" => "\xCF\x83", |
|
1538 |
"\xF0\x9D\x9A\xBB" => "\xCF\x84", |
|
1539 |
"\xF0\x9D\x9A\xBC" => "\xCF\x85", |
|
1540 |
"\xF0\x9D\x9A\xBD" => "\xCF\x86", |
|
1541 |
"\xF0\x9D\x9A\xBE" => "\xCF\x87", |
|
1542 |
"\xF0\x9D\x9A\xBF" => "\xCF\x88", |
|
1543 |
"\xF0\x9D\x9B\x80" => "\xCF\x89", |
|
1544 |
"\xF0\x9D\x9B\x93" => "\xCF\x83", |
|
1545 |
"\xF0\x9D\x9B\xA2" => "\xCE\xB1", |
|
1546 |
"\xF0\x9D\x9B\xA3" => "\xCE\xB2", |
|
1547 |
"\xF0\x9D\x9B\xA4" => "\xCE\xB3", |
|
1548 |
"\xF0\x9D\x9B\xA5" => "\xCE\xB4", |
|
1549 |
"\xF0\x9D\x9B\xA6" => "\xCE\xB5", |
|
1550 |
"\xF0\x9D\x9B\xA7" => "\xCE\xB6", |
|
1551 |
"\xF0\x9D\x9B\xA8" => "\xCE\xB7", |
|
1552 |
"\xF0\x9D\x9B\xA9" => "\xCE\xB8", |
|
1553 |
"\xF0\x9D\x9B\xAA" => "\xCE\xB9", |
|
1554 |
"\xF0\x9D\x9B\xAB" => "\xCE\xBA", |
|
1555 |
"\xF0\x9D\x9B\xAC" => "\xCE\xBB", |
|
1556 |
"\xF0\x9D\x9B\xAD" => "\xCE\xBC", |
|
1557 |
"\xF0\x9D\x9B\xAE" => "\xCE\xBD", |
|
1558 |
"\xF0\x9D\x9B\xAF" => "\xCE\xBE", |
|
1559 |
"\xF0\x9D\x9B\xB0" => "\xCE\xBF", |
|
1560 |
"\xF0\x9D\x9B\xB1" => "\xCF\x80", |
|
1561 |
"\xF0\x9D\x9B\xB2" => "\xCF\x81", |
|
1562 |
"\xF0\x9D\x9B\xB3" => "\xCE\xB8", |
|
1563 |
"\xF0\x9D\x9B\xB4" => "\xCF\x83", |
|
1564 |
"\xF0\x9D\x9B\xB5" => "\xCF\x84", |
|
1565 |
"\xF0\x9D\x9B\xB6" => "\xCF\x85", |
|
1566 |
"\xF0\x9D\x9B\xB7" => "\xCF\x86", |
|
1567 |
"\xF0\x9D\x9B\xB8" => "\xCF\x87", |
|
1568 |
"\xF0\x9D\x9B\xB9" => "\xCF\x88", |
|
1569 |
"\xF0\x9D\x9B\xBA" => "\xCF\x89", |
|
1570 |
"\xF0\x9D\x9C\x8D" => "\xCF\x83", |
|
1571 |
"\xF0\x9D\x9C\x9C" => "\xCE\xB1", |
|
1572 |
"\xF0\x9D\x9C\x9D" => "\xCE\xB2", |
|
1573 |
"\xF0\x9D\x9C\x9E" => "\xCE\xB3", |
|
1574 |
"\xF0\x9D\x9C\x9F" => "\xCE\xB4", |
|
1575 |
"\xF0\x9D\x9C\xA0" => "\xCE\xB5", |
|
1576 |
"\xF0\x9D\x9C\xA1" => "\xCE\xB6", |
|
1577 |
"\xF0\x9D\x9C\xA2" => "\xCE\xB7", |
|
1578 |
"\xF0\x9D\x9C\xA3" => "\xCE\xB8", |
|
1579 |
"\xF0\x9D\x9C\xA4" => "\xCE\xB9", |
|
1580 |
"\xF0\x9D\x9C\xA5" => "\xCE\xBA", |
|
1581 |
"\xF0\x9D\x9C\xA6" => "\xCE\xBB", |
|
1582 |
"\xF0\x9D\x9C\xA7" => "\xCE\xBC", |
|
1583 |
"\xF0\x9D\x9C\xA8" => "\xCE\xBD", |
|
1584 |
"\xF0\x9D\x9C\xA9" => "\xCE\xBE", |
|
1585 |
"\xF0\x9D\x9C\xAA" => "\xCE\xBF", |
|
1586 |
"\xF0\x9D\x9C\xAB" => "\xCF\x80", |
|
1587 |
"\xF0\x9D\x9C\xAC" => "\xCF\x81", |
|
1588 |
"\xF0\x9D\x9C\xAD" => "\xCE\xB8", |
|
1589 |
"\xF0\x9D\x9C\xAE" => "\xCF\x83", |
|
1590 |
"\xF0\x9D\x9C\xAF" => "\xCF\x84", |
|
1591 |
"\xF0\x9D\x9C\xB0" => "\xCF\x85", |
|
1592 |
"\xF0\x9D\x9C\xB1" => "\xCF\x86", |
|
1593 |
"\xF0\x9D\x9C\xB2" => "\xCF\x87", |
|
1594 |
"\xF0\x9D\x9C\xB3" => "\xCF\x88", |
|
1595 |
"\xF0\x9D\x9C\xB4" => "\xCF\x89", |
|
1596 |
"\xF0\x9D\x9D\x87" => "\xCF\x83", |
|
1597 |
"\xF0\x9D\x9D\x96" => "\xCE\xB1", |
|
1598 |
"\xF0\x9D\x9D\x97" => "\xCE\xB2", |
|
1599 |
"\xF0\x9D\x9D\x98" => "\xCE\xB3", |
|
1600 |
"\xF0\x9D\x9D\x99" => "\xCE\xB4", |
|
1601 |
"\xF0\x9D\x9D\x9A" => "\xCE\xB5", |
|
1602 |
"\xF0\x9D\x9D\x9B" => "\xCE\xB6", |
|
1603 |
"\xF0\x9D\x9D\x9C" => "\xCE\xB7", |
|
1604 |
"\xF0\x9D\x9D\x9D" => "\xCE\xB8", |
|
1605 |
"\xF0\x9D\x9D\x9E" => "\xCE\xB9", |
|
1606 |
"\xF0\x9D\x9D\x9F" => "\xCE\xBA", |
|
1607 |
"\xF0\x9D\x9D\xA0" => "\xCE\xBB", |
|
1608 |
"\xF0\x9D\x9D\xA1" => "\xCE\xBC", |
|
1609 |
"\xF0\x9D\x9D\xA2" => "\xCE\xBD", |
|
1610 |
"\xF0\x9D\x9D\xA3" => "\xCE\xBE", |
|
1611 |
"\xF0\x9D\x9D\xA4" => "\xCE\xBF", |
|
1612 |
"\xF0\x9D\x9D\xA5" => "\xCF\x80", |
|
1613 |
"\xF0\x9D\x9D\xA6" => "\xCF\x81", |
|
1614 |
"\xF0\x9D\x9D\xA7" => "\xCE\xB8", |
|
1615 |
"\xF0\x9D\x9D\xA8" => "\xCF\x83", |
|
1616 |
"\xF0\x9D\x9D\xA9" => "\xCF\x84", |
|
1617 |
"\xF0\x9D\x9D\xAA" => "\xCF\x85", |
|
1618 |
"\xF0\x9D\x9D\xAB" => "\xCF\x86", |
|
1619 |
"\xF0\x9D\x9D\xAC" => "\xCF\x87", |
|
1620 |
"\xF0\x9D\x9D\xAD" => "\xCF\x88", |
|
1621 |
"\xF0\x9D\x9D\xAE" => "\xCF\x89", |
|
1622 |
"\xF0\x9D\x9E\x81" => "\xCF\x83", |
|
1623 |
"\xF0\x9D\x9E\x90" => "\xCE\xB1", |
|
1624 |
"\xF0\x9D\x9E\x91" => "\xCE\xB2", |
|
1625 |
"\xF0\x9D\x9E\x92" => "\xCE\xB3", |
|
1626 |
"\xF0\x9D\x9E\x93" => "\xCE\xB4", |
|
1627 |
"\xF0\x9D\x9E\x94" => "\xCE\xB5", |
|
1628 |
"\xF0\x9D\x9E\x95" => "\xCE\xB6", |
|
1629 |
"\xF0\x9D\x9E\x96" => "\xCE\xB7", |
|
1630 |
"\xF0\x9D\x9E\x97" => "\xCE\xB8", |
|
1631 |
"\xF0\x9D\x9E\x98" => "\xCE\xB9", |
|
1632 |
"\xF0\x9D\x9E\x99" => "\xCE\xBA", |
|
1633 |
"\xF0\x9D\x9E\x9A" => "\xCE\xBB", |
|
1634 |
"\xF0\x9D\x9E\x9B" => "\xCE\xBC", |
|
1635 |
"\xF0\x9D\x9E\x9C" => "\xCE\xBD", |
|
1636 |
"\xF0\x9D\x9E\x9D" => "\xCE\xBE", |
|
1637 |
"\xF0\x9D\x9E\x9E" => "\xCE\xBF", |
|
1638 |
"\xF0\x9D\x9E\x9F" => "\xCF\x80", |
|
1639 |
"\xF0\x9D\x9E\xA0" => "\xCF\x81", |
|
1640 |
"\xF0\x9D\x9E\xA1" => "\xCE\xB8", |
|
1641 |
"\xF0\x9D\x9E\xA2" => "\xCF\x83", |
|
1642 |
"\xF0\x9D\x9E\xA3" => "\xCF\x84", |
|
1643 |
"\xF0\x9D\x9E\xA4" => "\xCF\x85", |
|
1644 |
"\xF0\x9D\x9E\xA5" => "\xCF\x86", |
|
1645 |
"\xF0\x9D\x9E\xA6" => "\xCF\x87", |
|
1646 |
"\xF0\x9D\x9E\xA7" => "\xCF\x88", |
|
1647 |
"\xF0\x9D\x9E\xA8" => "\xCF\x89", |
|
1648 |
"\xF0\x9D\x9E\xBB" => "\xCF\x83", |
|
1649 |
"\xF0\x9D\x9F\x8A" => "\xCF\x9D", |
|
1650 |
);
|
|
1651 |
global $phpbb_root_path, $phpEx; |
|
1652 |
||
1653 |
// do the case fold
|
|
1654 |
$text = utf8_case_fold($text, $option); |
|
1655 |
||
1656 |
if (!class_exists('utf_normalizer')) |
|
1657 |
{
|
|
1658 |
global $phpbb_root_path, $phpEx; |
|
1659 |
include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx); |
|
1660 |
}
|
|
1661 |
||
1662 |
// convert to NFKC
|
|
1663 |
utf_normalizer::nfkc($text); |
|
1664 |
||
1665 |
// FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
|
|
1666 |
$text = strtr($text, $fc_nfkc_closure); |
|
1667 |
||
1668 |
return $text; |
|
1669 |
}
|
|
1670 |
||
1671 |
/**
|
|
1672 |
* Assume the input is NFC:
|
|
1673 |
* Takes the input and does a "special" case fold. It does minor normalization as well.
|
|
1674 |
*
|
|
1675 |
* @param string $text text to be case folded
|
|
1676 |
* @param string $option determines how we will fold the cases
|
|
1677 |
* @return string case folded text
|
|
1678 |
*/
|
|
1679 |
function utf8_case_fold_nfc($text, $option = 'full') |
|
1680 |
{
|
|
1681 |
static $uniarray = array(); |
|
1682 |
static $ypogegrammeni = array( |
|
1683 |
"\xCD\xBA" => "\x20\xCD\x85", |
|
1684 |
"\xE1\xBE\x80" => "\xE1\xBC\x80\xCD\x85", |
|
1685 |
"\xE1\xBE\x81" => "\xE1\xBC\x81\xCD\x85", |
|
1686 |
"\xE1\xBE\x82" => "\xE1\xBC\x82\xCD\x85", |
|
1687 |
"\xE1\xBE\x83" => "\xE1\xBC\x83\xCD\x85", |
|
1688 |
"\xE1\xBE\x84" => "\xE1\xBC\x84\xCD\x85", |
|
1689 |
"\xE1\xBE\x85" => "\xE1\xBC\x85\xCD\x85", |
|
1690 |
"\xE1\xBE\x86" => "\xE1\xBC\x86\xCD\x85", |
|
1691 |
"\xE1\xBE\x87" => "\xE1\xBC\x87\xCD\x85", |
|
1692 |
"\xE1\xBE\x88" => "\xE1\xBC\x88\xCD\x85", |
|
1693 |
"\xE1\xBE\x89" => "\xE1\xBC\x89\xCD\x85", |
|
1694 |
"\xE1\xBE\x8A" => "\xE1\xBC\x8A\xCD\x85", |
|
1695 |
"\xE1\xBE\x8B" => "\xE1\xBC\x8B\xCD\x85", |
|
1696 |
"\xE1\xBE\x8C" => "\xE1\xBC\x8C\xCD\x85", |
|
1697 |
"\xE1\xBE\x8D" => "\xE1\xBC\x8D\xCD\x85", |
|
1698 |
"\xE1\xBE\x8E" => "\xE1\xBC\x8E\xCD\x85", |
|
1699 |
"\xE1\xBE\x8F" => "\xE1\xBC\x8F\xCD\x85", |
|
1700 |
"\xE1\xBE\x90" => "\xE1\xBC\xA0\xCD\x85", |
|
1701 |
"\xE1\xBE\x91" => "\xE1\xBC\xA1\xCD\x85", |
|
1702 |
"\xE1\xBE\x92" => "\xE1\xBC\xA2\xCD\x85", |
|
1703 |
"\xE1\xBE\x93" => "\xE1\xBC\xA3\xCD\x85", |
|
1704 |
"\xE1\xBE\x94" => "\xE1\xBC\xA4\xCD\x85", |
|
1705 |
"\xE1\xBE\x95" => "\xE1\xBC\xA5\xCD\x85", |
|
1706 |
"\xE1\xBE\x96" => "\xE1\xBC\xA6\xCD\x85", |
|
1707 |
"\xE1\xBE\x97" => "\xE1\xBC\xA7\xCD\x85", |
|
1708 |
"\xE1\xBE\x98" => "\xE1\xBC\xA8\xCD\x85", |
|
1709 |
"\xE1\xBE\x99" => "\xE1\xBC\xA9\xCD\x85", |
|
1710 |
"\xE1\xBE\x9A" => "\xE1\xBC\xAA\xCD\x85", |
|
1711 |
"\xE1\xBE\x9B" => "\xE1\xBC\xAB\xCD\x85", |
|
1712 |
"\xE1\xBE\x9C" => "\xE1\xBC\xAC\xCD\x85", |
|
1713 |
"\xE1\xBE\x9D" => "\xE1\xBC\xAD\xCD\x85", |
|
1714 |
"\xE1\xBE\x9E" => "\xE1\xBC\xAE\xCD\x85", |
|
1715 |
"\xE1\xBE\x9F" => "\xE1\xBC\xAF\xCD\x85", |
|
1716 |
"\xE1\xBE\xA0" => "\xE1\xBD\xA0\xCD\x85", |
|
1717 |
"\xE1\xBE\xA1" => "\xE1\xBD\xA1\xCD\x85", |
|
1718 |
"\xE1\xBE\xA2" => "\xE1\xBD\xA2\xCD\x85", |
|
1719 |
"\xE1\xBE\xA3" => "\xE1\xBD\xA3\xCD\x85", |
|
1720 |
"\xE1\xBE\xA4" => "\xE1\xBD\xA4\xCD\x85", |
|
1721 |
"\xE1\xBE\xA5" => "\xE1\xBD\xA5\xCD\x85", |
|
1722 |
"\xE1\xBE\xA6" => "\xE1\xBD\xA6\xCD\x85", |
|
1723 |
"\xE1\xBE\xA7" => "\xE1\xBD\xA7\xCD\x85", |
|
1724 |
"\xE1\xBE\xA8" => "\xE1\xBD\xA8\xCD\x85", |
|
1725 |
"\xE1\xBE\xA9" => "\xE1\xBD\xA9\xCD\x85", |
|
1726 |
"\xE1\xBE\xAA" => "\xE1\xBD\xAA\xCD\x85", |
|
1727 |
"\xE1\xBE\xAB" => "\xE1\xBD\xAB\xCD\x85", |
|
1728 |
"\xE1\xBE\xAC" => "\xE1\xBD\xAC\xCD\x85", |
|
1729 |
"\xE1\xBE\xAD" => "\xE1\xBD\xAD\xCD\x85", |
|
1730 |
"\xE1\xBE\xAE" => "\xE1\xBD\xAE\xCD\x85", |
|
1731 |
"\xE1\xBE\xAF" => "\xE1\xBD\xAF\xCD\x85", |
|
1732 |
"\xE1\xBE\xB2" => "\xE1\xBD\xB0\xCD\x85", |
|
1733 |
"\xE1\xBE\xB3" => "\xCE\xB1\xCD\x85", |
|
1734 |
"\xE1\xBE\xB4" => "\xCE\xAC\xCD\x85", |
|
1735 |
"\xE1\xBE\xB7" => "\xE1\xBE\xB6\xCD\x85", |
|
1736 |
"\xE1\xBE\xBC" => "\xCE\x91\xCD\x85", |
|
1737 |
"\xE1\xBF\x82" => "\xE1\xBD\xB4\xCD\x85", |
|
1738 |
"\xE1\xBF\x83" => "\xCE\xB7\xCD\x85", |
|
1739 |
"\xE1\xBF\x84" => "\xCE\xAE\xCD\x85", |
|
1740 |
"\xE1\xBF\x87" => "\xE1\xBF\x86\xCD\x85", |
|
1741 |
"\xE1\xBF\x8C" => "\xCE\x97\xCD\x85", |
|
1742 |
"\xE1\xBF\xB2" => "\xE1\xBD\xBC\xCD\x85", |
|
1743 |
"\xE1\xBF\xB3" => "\xCF\x89\xCD\x85", |
|
1744 |
"\xE1\xBF\xB4" => "\xCF\x8E\xCD\x85", |
|
1745 |
"\xE1\xBF\xB7" => "\xE1\xBF\xB6\xCD\x85", |
|
1746 |
"\xE1\xBF\xBC" => "\xCE\xA9\xCD\x85", |
|
1747 |
);
|
|
1748 |
global $phpbb_root_path, $phpEx; |
|
1749 |
||
1750 |
// perform a small trick, avoid further normalization on composed points that contain U+0345 in their decomposition
|
|
1751 |
$text = strtr($text, $ypogegrammeni); |
|
1752 |
||
1753 |
// do the case fold
|
|
1754 |
$text = utf8_case_fold($text, $option); |
|
1755 |
||
1756 |
return $text; |
|
1757 |
}
|
|
1758 |
||
1759 |
/**
|
|
1760 |
* A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
|
|
1761 |
* to be in NFC (Normalization Form Composition).
|
|
1762 |
*
|
|
1763 |
* @param mixed $strings a string or an array of strings to normalize
|
|
1764 |
* @return mixed the normalized content, preserving array keys if array given.
|
|
1765 |
*/
|
|
1766 |
function utf8_normalize_nfc($strings) |
|
1767 |
{
|
|
1768 |
if (empty($strings)) |
|
1769 |
{
|
|
1770 |
return $strings; |
|
1771 |
}
|
|
1772 |
||
1773 |
if (!class_exists('utf_normalizer')) |
|
1774 |
{
|
|
1775 |
global $phpbb_root_path, $phpEx; |
|
1776 |
include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx); |
|
1777 |
}
|
|
1778 |
||
1779 |
if (!is_array($strings)) |
|
1780 |
{
|
|
1781 |
utf_normalizer::nfc($strings); |
|
1782 |
}
|
|
1783 |
else if (is_array($strings)) |
|
1784 |
{
|
|
1785 |
foreach ($strings as $key => $string) |
|
1786 |
{
|
|
1787 |
if (is_array($string)) |
|
1788 |
{
|
|
1789 |
foreach ($string as $_key => $_string) |
|
1790 |
{
|
|
1791 |
utf_normalizer::nfc($strings[$key][$_key]); |
|
1792 |
}
|
|
1793 |
}
|
|
1794 |
else
|
|
1795 |
{
|
|
1796 |
utf_normalizer::nfc($strings[$key]); |
|
1797 |
}
|
|
1798 |
}
|
|
1799 |
}
|
|
1800 |
||
1801 |
return $strings; |
|
1802 |
}
|
|
1803 |
||
1804 |
/**
|
|
1805 |
* This function is used to generate a "clean" version of a string.
|
|
1806 |
* Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
|
|
1807 |
* Additionally a homographs of one character are transformed into one specific character (preferably ASCII
|
|
1808 |
* if it is an ASCII character).
|
|
1809 |
*
|
|
1810 |
* Please be aware that if you change something within this function or within
|
|
1811 |
* functions used here you need to rebuild/update the username_clean column in the users table. And all other
|
|
1812 |
* columns that store a clean string otherwise you will break this functionality.
|
|
1813 |
*
|
|
1814 |
* @param string $text An unclean string, mabye user input (has to be valid UTF-8!)
|
|
1815 |
* @return string Cleaned up version of the input string
|
|
1816 |
*/
|
|
1817 |
function utf8_clean_string($text) |
|
1818 |
{
|
|
1819 |
global $phpbb_root_path, $phpEx; |
|
1820 |
||
1821 |
static $homographs = array(); |
|
1822 |
if (empty($homographs)) |
|
1823 |
{
|
|
1824 |
$homographs = include($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx); |
|
1825 |
}
|
|
1826 |
||
1827 |
$text = utf8_case_fold_nfkc($text); |
|
1828 |
$text = strtr($text, $homographs); |
|
1829 |
// Other control characters
|
|
1830 |
$text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text); |
|
1831 |
||
1832 |
// we need to reduce multiple spaces to a single one
|
|
1833 |
$text = preg_replace('# {2,}#', ' ', $text); |
|
1834 |
||
1835 |
// we can use trim here as all the other space characters should have been turned
|
|
1836 |
// into normal ASCII spaces by now
|
|
1837 |
return trim($text); |
|
1838 |
}
|
|
1839 |
||
1840 |
/**
|
|
1841 |
* A wrapper for htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
|
|
1842 |
*/
|
|
1843 |
function utf8_htmlspecialchars(&$value) |
|
1844 |
{
|
|
1845 |
return htmlspecialchars($value, ENT_COMPAT, 'UTF-8'); |
|
1846 |
}
|
|
1847 |
||
1848 |
/**
|
|
1849 |
* Trying to convert returned system message to utf8
|
|
1850 |
*
|
|
1851 |
* PHP assumes such messages are ISO-8859-1 so we'll do that too
|
|
1852 |
* and if it breaks messages we'll blame it on them ;-)
|
|
1853 |
*/
|
|
1854 |
function utf8_convert_message($message) |
|
1855 |
{
|
|
1856 |
// First of all check if conversion is neded at all, as there is no point
|
|
1857 |
// in converting ASCII messages from ISO-8859-1 to UTF-8
|
|
1858 |
if (!preg_match('/[\x80-\xFF]/', $message)) |
|
1859 |
{
|
|
1860 |
return utf8_htmlspecialchars($message); |
|
1861 |
}
|
|
1862 |
||
1863 |
// else we need to convert some part of the message
|
|
1864 |
return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1')); |
|
1865 |
}
|
|
1866 |
||
1867 |
/**
|
|
1868 |
* UTF8-compatible wordwrap replacement
|
|
1869 |
*
|
|
1870 |
* @param string $string The input string
|
|
1871 |
* @param int $width The column width. Defaults to 75.
|
|
1872 |
* @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
|
|
1873 |
* @param bool $cut If the cut is set to TRUE, the string is always wrapped at the specified width. So if you have a word that is larger than the given width, it is broken apart.
|
|
1874 |
*
|
|
1875 |
* @return string the given string wrapped at the specified column.
|
|
1876 |
*
|
|
1877 |
*/
|
|
1878 |
function utf8_wordwrap($string, $width = 75, $break = "\n", $cut = false) |
|
1879 |
{
|
|
1880 |
// We first need to explode on $break, not destroying existing (intended) breaks
|
|
1881 |
$lines = explode($break, $string); |
|
1882 |
$new_lines = array(0 => ''); |
|
1883 |
$index = 0; |
|
1884 |
||
1885 |
foreach ($lines as $line) |
|
1886 |
{
|
|
1887 |
$words = explode(' ', $line); |
|
1888 |
||
1889 |
for ($i = 0, $size = sizeof($words); $i < $size; $i++) |
|
1890 |
{
|
|
1891 |
$word = $words[$i]; |
|
1892 |
||
1893 |
// If cut is true we need to cut the word if it is > width chars
|
|
1894 |
if ($cut && utf8_strlen($word) > $width) |
|
1895 |
{
|
|
1896 |
$words[$i] = utf8_substr($word, $width); |
|
1897 |
$word = utf8_substr($word, 0, $width); |
|
1898 |
$i--; |
|
1899 |
}
|
|
1900 |
||
1901 |
if (utf8_strlen($new_lines[$index] . $word) > $width) |
|
1902 |
{
|
|
1903 |
$new_lines[$index] = substr($new_lines[$index], 0, -1); |
|
1904 |
$index++; |
|
1905 |
$new_lines[$index] = ''; |
|
1906 |
}
|
|
1907 |
||
1908 |
$new_lines[$index] .= $word . ' '; |
|
1909 |
}
|
|
1910 |
||
1911 |
$new_lines[$index] = substr($new_lines[$index], 0, -1); |
|
1912 |
$index++; |
|
1913 |
$new_lines[$index] = ''; |
|
1914 |
}
|
|
1915 |
||
1916 |
unset($new_lines[$index]); |
|
1917 |
return implode($break, $new_lines); |
|
1918 |
}
|
|
1919 |
||
1920 |
?>
|