1
/* Copyright (C) 2008 PrimeBase Technologies GmbH, Germany
3
* PrimeBase Media Stream for MySQL
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
* Original author: Paul McCullagh (H&G2JCtL)
20
* Continued development: Barry Leslie
25
* Unicode / UTF-8 convertion
38
size_t cs_utf_to_uni_char(const u_char *in_string, uint32_t *ret_value)
40
const u_char *i_string = in_string;
41
size_t s_len = strlen((char*)in_string);
47
if ((ch & 0x80) == 0x00) {
48
val = (uint32_t) ch & 0x0000007F;
51
else if ((ch & 0xE0) == 0xC0) {
53
val = ((i_string[0] & 0x0000001F) << 6) |
54
(i_string[1] & 0x0000003F);
64
else if ((ch & 0xF0) == 0xE0) {
66
val = ((i_string[0] & 0x0000000F) << 12) |
67
((i_string[1] & 0x0000003F) << 6) |
68
(i_string[2] & 0x0000003F);
69
if (val < 0x000000800)
78
else if ((ch & 0xF8) == 0xF0) {
80
val = ((i_string[0] & 0x00000007) << 18) |
81
((i_string[1] & 0x0000003F) << 12) |
82
((i_string[2] & 0x0000003F) << 6) |
83
(i_string[3] & 0x0000003F);
93
else if ((ch & 0xFC) == 0xF8) {
95
val = ((i_string[0] & 0x00000003) << 24) |
96
((i_string[1] & 0x0000003F) << 18) |
97
((i_string[2] & 0x0000003F) << 12) |
98
((i_string[3] & 0x0000003F) << 6) |
99
(i_string[4] & 0x0000003F);
100
if (val < 0x00200000)
109
else if ((ch & 0xFE) == 0xFC) {
111
val = ((i_string[0] & 0x00000001) << 30) |
112
((i_string[1] & 0x0000003F) << 24) |
113
((i_string[2] & 0x0000003F) << 18) |
114
((i_string[3] & 0x0000003F) << 12) |
115
((i_string[4] & 0x0000003F) << 6) |
116
(i_string[5] & 0x0000003F);
117
if (val < 0x04000000)
127
// Should not happen!
135
void cs_utf8_to_uni(size_t out_len, unichar *out_string, const u_char *in_string)
139
out_len--; // Space for zero terminator
141
in_string += cs_utf_to_uni_char(in_string, &utf_value);
144
if (utf_value > 0x0000FFFF)
145
*out_string = (unichar) '?';
147
*out_string = (unichar) utf_value;
154
void cs_utf8_to_uni_no_term(size_t out_len, unichar *out_string, const u_char *in_string)
159
in_string += cs_utf_to_uni_char(in_string, &utf_value);
162
if (utf_value > 0x0000FFFF)
163
*out_string = (unichar) '?';
165
*out_string = (unichar) utf_value;
171
void cs_uni_to_utf8(size_t out_len, char *out_string, const unichar *in_string)
173
out_len--; // Space for zero terminator
175
if (*in_string <= 0x007F) {
178
*out_string++ = (char) (u_char) *in_string;
181
else if (*in_string <= 0x07FF) {
184
*out_string++ = (char) (u_char) ((0x00C0) | ((*in_string >> 6) & 0x001F));
185
*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F));
188
else /* <= 0xFFFF */ {
191
*out_string++ = (char) (u_char) ((0x00E0) | ((*in_string >> 12) & 0x000F));
192
*out_string++ = (char) (u_char) ((0x0080) | ((*in_string >> 6) & 0x003F));
193
*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F));
201
void cs_uni_to_utf8(size_t out_len, char *out_string, const unichar *in_string, s_int in_len)
203
out_len--; // Space for zero terminator
205
if (*in_string <= 0x007F) {
208
*out_string++ = (char) (u_char) *in_string;
211
else if (*in_string <= 0x07FF) {
214
*out_string++ = (char) (u_char) ((0x00C0) | ((*in_string >> 6) & 0x001F));
215
*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F));
218
else /* <= 0xFFFF */ {
221
*out_string++ = (char) (u_char) ((0x00E0) | ((*in_string >> 12) & 0x000F));
222
*out_string++ = (char) (u_char) ((0x0080) | ((*in_string >> 6) & 0x003F));
223
*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F));
231
size_t cs_utf8_to_uni_len(const char *in_string)
236
if ((*((u_char *) in_string) & 0xC0) == 0x80)
237
// These are char data bytes (10xxxxxx)
240
// These are single char (00xxxxx, 01xxxxx), or char start bytes (11xxxxxx)
247
size_t cs_uni_to_utf8_len(const unichar *in_string, s_int in_len)
252
if (*in_string <= 0x000007F) {
255
else if (*in_string <= 0x00007FF)
265
size_t cs_uni_len(const unichar *in_string)
269
while (*in_string++) len++;