1999.6.1
by kalebral at gmail
update Copyright strings to a more common format to help with creating the master debian copyright file |
1 |
/* Copyright (C) 2008 PrimeBase Technologies GmbH, Germany
|
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
2 |
*
|
3 |
* PrimeBase Media Stream for MySQL
|
|
4 |
*
|
|
5 |
* This program is free software; you can redistribute it and/or modify
|
|
6 |
* it under the terms of the GNU General Public License as published by
|
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
8 |
* (at your option) any later version.
|
|
9 |
*
|
|
10 |
* This program is distributed in the hope that it will be useful,
|
|
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 |
* GNU General Public License for more details.
|
|
14 |
*
|
|
15 |
* You should have received a copy of the GNU General Public License
|
|
16 |
* along with this program; if not, write to the Free Software
|
|
1802.10.2
by Monty Taylor
Update all of the copyright headers to include the correct address. |
17 |
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
18 |
*
|
19 |
* Original author: Paul McCullagh (H&G2JCtL)
|
|
20 |
* Continued development: Barry Leslie
|
|
21 |
*
|
|
22 |
* 2007-06-14
|
|
23 |
*
|
|
24 |
* CORE SYSTEM:
|
|
25 |
* Unicode / UTF-8 convertion
|
|
26 |
*
|
|
27 |
*/
|
|
28 |
||
29 |
#include "CSConfig.h" |
|
30 |
||
31 |
#include <assert.h> |
|
32 |
#include <string.h> |
|
33 |
||
34 |
#include "CSUTF8.h" |
|
35 |
#include "CSMemory.h" |
|
36 |
#include "CSGlobal.h" |
|
37 |
||
1548.2.23
by Barry.Leslie at PrimeBase
And more cleanup. |
38 |
size_t cs_utf_to_uni_char(const u_char *in_string, uint32_t *ret_value) |
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
39 |
{
|
1548.2.23
by Barry.Leslie at PrimeBase
And more cleanup. |
40 |
const u_char *i_string = in_string; |
41 |
size_t s_len = strlen((char*)in_string); |
|
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
42 |
u_char ch; |
1548.2.2
by Barry.Leslie at PrimeBase
A lot of minor changes to clean up the code and to get it to build with Drizzle. |
43 |
uint32_t val; |
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
44 |
size_t clen; |
45 |
||
46 |
ch = *i_string; |
|
47 |
if ((ch & 0x80) == 0x00) { |
|
1548.2.2
by Barry.Leslie at PrimeBase
A lot of minor changes to clean up the code and to get it to build with Drizzle. |
48 |
val = (uint32_t) ch & 0x0000007F; |
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
49 |
clen = 1; |
50 |
}
|
|
51 |
else if ((ch & 0xE0) == 0xC0) { |
|
52 |
if (s_len > 1) { |
|
53 |
val = ((i_string[0] & 0x0000001F) << 6) | |
|
54 |
(i_string[1] & 0x0000003F); |
|
55 |
if (val < 0x00000080) |
|
56 |
val = '?'; |
|
57 |
clen = 2; |
|
58 |
}
|
|
59 |
else { |
|
60 |
val = '?'; |
|
61 |
clen = s_len; |
|
62 |
}
|
|
63 |
}
|
|
64 |
else if ((ch & 0xF0) == 0xE0) { |
|
65 |
if (s_len > 2) { |
|
66 |
val = ((i_string[0] & 0x0000000F) << 12) | |
|
67 |
((i_string[1] & 0x0000003F) << 6) | |
|
68 |
(i_string[2] & 0x0000003F); |
|
69 |
if (val < 0x000000800) |
|
70 |
val = '?'; |
|
71 |
clen = 3; |
|
72 |
}
|
|
73 |
else { |
|
74 |
val = '?'; |
|
75 |
clen = s_len; |
|
76 |
}
|
|
77 |
}
|
|
78 |
else if ((ch & 0xF8) == 0xF0) { |
|
79 |
if (s_len > 3) { |
|
80 |
val = ((i_string[0] & 0x00000007) << 18) | |
|
81 |
((i_string[1] & 0x0000003F) << 12) | |
|
82 |
((i_string[2] & 0x0000003F) << 6) | |
|
83 |
(i_string[3] & 0x0000003F); |
|
84 |
if (val < 0x00010000) |
|
85 |
val = '?'; |
|
86 |
clen = 4; |
|
87 |
}
|
|
88 |
else { |
|
89 |
val = '?'; |
|
90 |
clen = s_len; |
|
91 |
}
|
|
92 |
}
|
|
93 |
else if ((ch & 0xFC) == 0xF8) { |
|
94 |
if (s_len > 4) { |
|
95 |
val = ((i_string[0] & 0x00000003) << 24) | |
|
96 |
((i_string[1] & 0x0000003F) << 18) | |
|
97 |
((i_string[2] & 0x0000003F) << 12) | |
|
98 |
((i_string[3] & 0x0000003F) << 6) | |
|
99 |
(i_string[4] & 0x0000003F); |
|
100 |
if (val < 0x00200000) |
|
101 |
val = '?'; |
|
102 |
clen = 5; |
|
103 |
}
|
|
104 |
else { |
|
105 |
val = '?'; |
|
106 |
clen = s_len; |
|
107 |
}
|
|
108 |
}
|
|
109 |
else if ((ch & 0xFE) == 0xFC) { |
|
110 |
if (s_len > 5) { |
|
111 |
val = ((i_string[0] & 0x00000001) << 30) | |
|
112 |
((i_string[1] & 0x0000003F) << 24) | |
|
113 |
((i_string[2] & 0x0000003F) << 18) | |
|
114 |
((i_string[3] & 0x0000003F) << 12) | |
|
115 |
((i_string[4] & 0x0000003F) << 6) | |
|
116 |
(i_string[5] & 0x0000003F); |
|
117 |
if (val < 0x04000000) |
|
118 |
val = '?'; |
|
119 |
clen = 6; |
|
120 |
}
|
|
121 |
else { |
|
122 |
val = '?'; |
|
123 |
clen = s_len; |
|
124 |
}
|
|
125 |
}
|
|
126 |
else { |
|
127 |
// Should not happen!
|
|
128 |
val = '?'; |
|
129 |
clen = 1; |
|
130 |
}
|
|
131 |
*ret_value = val; |
|
132 |
return(clen); |
|
133 |
}
|
|
134 |
||
1548.2.23
by Barry.Leslie at PrimeBase
And more cleanup. |
135 |
void cs_utf8_to_uni(size_t out_len, unichar *out_string, const u_char *in_string) |
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
136 |
{
|
1548.2.2
by Barry.Leslie at PrimeBase
A lot of minor changes to clean up the code and to get it to build with Drizzle. |
137 |
uint32_t utf_value; |
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
138 |
|
139 |
out_len--; // Space for zero terminator |
|
140 |
while (*in_string) { |
|
141 |
in_string += cs_utf_to_uni_char(in_string, &utf_value); |
|
142 |
if (out_len == 0) |
|
143 |
break; |
|
144 |
if (utf_value > 0x0000FFFF) |
|
145 |
*out_string = (unichar) '?'; |
|
146 |
else
|
|
147 |
*out_string = (unichar) utf_value; |
|
148 |
out_string++; |
|
149 |
out_len--; |
|
150 |
}
|
|
151 |
*out_string = 0; |
|
152 |
}
|
|
153 |
||
1548.2.23
by Barry.Leslie at PrimeBase
And more cleanup. |
154 |
void cs_utf8_to_uni_no_term(size_t out_len, unichar *out_string, const u_char *in_string) |
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
155 |
{
|
1548.2.2
by Barry.Leslie at PrimeBase
A lot of minor changes to clean up the code and to get it to build with Drizzle. |
156 |
uint32_t utf_value; |
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
157 |
|
158 |
while (*in_string) { |
|
159 |
in_string += cs_utf_to_uni_char(in_string, &utf_value); |
|
160 |
if (out_len == 0) |
|
161 |
break; |
|
162 |
if (utf_value > 0x0000FFFF) |
|
163 |
*out_string = (unichar) '?'; |
|
164 |
else
|
|
165 |
*out_string = (unichar) utf_value; |
|
166 |
out_string++; |
|
167 |
out_len--; |
|
168 |
}
|
|
169 |
}
|
|
170 |
||
171 |
void cs_uni_to_utf8(size_t out_len, char *out_string, const unichar *in_string) |
|
172 |
{
|
|
173 |
out_len--; // Space for zero terminator |
|
174 |
while (*in_string) { |
|
175 |
if (*in_string <= 0x007F) { |
|
176 |
if (out_len < 1) |
|
177 |
break; |
|
178 |
*out_string++ = (char) (u_char) *in_string; |
|
179 |
out_len--; |
|
180 |
}
|
|
181 |
else if (*in_string <= 0x07FF) { |
|
182 |
if (out_len < 3) |
|
183 |
break; |
|
184 |
*out_string++ = (char) (u_char) ((0x00C0) | ((*in_string >> 6) & 0x001F)); |
|
185 |
*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F)); |
|
186 |
out_len -= 2; |
|
187 |
}
|
|
188 |
else /* <= 0xFFFF */ { |
|
189 |
if (out_len < 3) |
|
190 |
break; |
|
191 |
*out_string++ = (char) (u_char) ((0x00E0) | ((*in_string >> 12) & 0x000F)); |
|
192 |
*out_string++ = (char) (u_char) ((0x0080) | ((*in_string >> 6) & 0x003F)); |
|
193 |
*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F)); |
|
194 |
out_len -= 3; |
|
195 |
}
|
|
196 |
in_string++; |
|
197 |
}
|
|
198 |
*out_string = 0; |
|
199 |
}
|
|
200 |
||
201 |
void cs_uni_to_utf8(size_t out_len, char *out_string, const unichar *in_string, s_int in_len) |
|
202 |
{
|
|
203 |
out_len--; // Space for zero terminator |
|
204 |
while (in_len--) { |
|
205 |
if (*in_string <= 0x007F) { |
|
206 |
if (out_len < 1) |
|
207 |
break; |
|
208 |
*out_string++ = (char) (u_char) *in_string; |
|
209 |
out_len--; |
|
210 |
}
|
|
211 |
else if (*in_string <= 0x07FF) { |
|
212 |
if (out_len < 3) |
|
213 |
break; |
|
214 |
*out_string++ = (char) (u_char) ((0x00C0) | ((*in_string >> 6) & 0x001F)); |
|
215 |
*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F)); |
|
216 |
out_len -= 2; |
|
217 |
}
|
|
218 |
else /* <= 0xFFFF */ { |
|
219 |
if (out_len < 3) |
|
220 |
break; |
|
221 |
*out_string++ = (char) (u_char) ((0x00E0) | ((*in_string >> 12) & 0x000F)); |
|
222 |
*out_string++ = (char) (u_char) ((0x0080) | ((*in_string >> 6) & 0x003F)); |
|
223 |
*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F)); |
|
224 |
out_len -= 3; |
|
225 |
}
|
|
226 |
in_string++; |
|
227 |
}
|
|
228 |
*out_string = 0; |
|
229 |
}
|
|
230 |
||
231 |
size_t cs_utf8_to_uni_len(const char *in_string) |
|
232 |
{
|
|
233 |
size_t slen = 0; |
|
234 |
||
235 |
while (*in_string) { |
|
236 |
if ((*((u_char *) in_string) & 0xC0) == 0x80) |
|
237 |
// These are char data bytes (10xxxxxx)
|
|
238 |
;
|
|
239 |
else
|
|
240 |
// These are single char (00xxxxx, 01xxxxx), or char start bytes (11xxxxxx)
|
|
241 |
slen++; |
|
242 |
in_string++; |
|
243 |
}
|
|
244 |
return slen; |
|
245 |
}
|
|
246 |
||
247 |
size_t cs_uni_to_utf8_len(const unichar *in_string, s_int in_len) |
|
248 |
{
|
|
249 |
size_t slen = 0; |
|
250 |
||
251 |
while (in_len--) { |
|
252 |
if (*in_string <= 0x000007F) { |
|
253 |
slen++; |
|
254 |
}
|
|
255 |
else if (*in_string <= 0x00007FF) |
|
256 |
slen += 2; |
|
257 |
else /* <= 0xFFFF */ |
|
258 |
slen += 3; |
|
259 |
in_string++; |
|
260 |
}
|
|
261 |
return slen; |
|
262 |
}
|
|
263 |
||
1548.2.23
by Barry.Leslie at PrimeBase
And more cleanup. |
264 |
/*
|
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
265 |
size_t cs_uni_len(const unichar *in_string)
|
266 |
{
|
|
267 |
size_t len = 0;
|
|
268 |
|
|
269 |
while (*in_string++) len++;
|
|
270 |
return len;
|
|
271 |
}
|
|
1548.2.23
by Barry.Leslie at PrimeBase
And more cleanup. |
272 |
*/
|
1548.2.1
by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin. |
273 |