~drizzle-trunk/drizzle/development

1999.6.1 by kalebral at gmail
update Copyright strings to a more common format to help with creating the master debian copyright file
1
/* Copyright (C) 2008 PrimeBase Technologies GmbH, Germany
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
2
 *
3
 * PrimeBase Media Stream for MySQL
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
1802.10.2 by Monty Taylor
Update all of the copyright headers to include the correct address.
17
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
18
 *
19
 * Original author: Paul McCullagh (H&G2JCtL)
20
 * Continued development: Barry Leslie
21
 *
22
 * 2007-06-14
23
 *
24
 * CORE SYSTEM:
25
 * Unicode / UTF-8 convertion
26
 *
27
 */
28
29
#include "CSConfig.h"
30
31
#include <assert.h>
32
#include <string.h>
33
34
#include "CSUTF8.h"
35
#include "CSMemory.h"
36
#include "CSGlobal.h"
37
1548.2.23 by Barry.Leslie at PrimeBase
And more cleanup.
38
size_t cs_utf_to_uni_char(const u_char *in_string, uint32_t *ret_value)
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
39
{
1548.2.23 by Barry.Leslie at PrimeBase
And more cleanup.
40
	const u_char *i_string =  in_string;
41
	size_t	s_len = strlen((char*)in_string);
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
42
	u_char	ch;
1548.2.2 by Barry.Leslie at PrimeBase
A lot of minor changes to clean up the code and to get it to build with Drizzle.
43
	uint32_t	val;
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
44
	size_t	clen;
45
46
	ch = *i_string;
47
	if ((ch & 0x80) == 0x00) {
1548.2.2 by Barry.Leslie at PrimeBase
A lot of minor changes to clean up the code and to get it to build with Drizzle.
48
		val = (uint32_t) ch & 0x0000007F;
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
49
		clen = 1;
50
	}
51
	else if ((ch & 0xE0) == 0xC0) {
52
		if (s_len > 1) {
53
			val = ((i_string[0] & 0x0000001F) << 6) |
54
						(i_string[1] & 0x0000003F);
55
			if (val < 0x00000080)
56
				val = '?';
57
			clen = 2;
58
		}
59
		else {
60
			val = '?';
61
			clen = s_len;
62
		}
63
	}
64
	else if ((ch & 0xF0) == 0xE0) {
65
		if (s_len > 2) {
66
			val = ((i_string[0] & 0x0000000F) << 12) |
67
						((i_string[1] & 0x0000003F) << 6) |
68
						(i_string[2] & 0x0000003F);
69
			if (val < 0x000000800)
70
				val = '?';
71
			clen = 3;
72
		}
73
		else {
74
			val = '?';
75
			clen = s_len;
76
		}
77
	}
78
	else if ((ch & 0xF8) == 0xF0) {
79
		if (s_len > 3) {
80
			val = ((i_string[0] & 0x00000007) << 18) |
81
						((i_string[1] & 0x0000003F) << 12) |
82
						((i_string[2] & 0x0000003F) << 6) |
83
						(i_string[3] & 0x0000003F);
84
			if (val < 0x00010000)
85
				val = '?';
86
			clen = 4;
87
		}
88
		else {
89
			val = '?';
90
			clen = s_len;
91
		}
92
	}
93
	else if ((ch & 0xFC) == 0xF8) {
94
		if (s_len > 4) {
95
			val = ((i_string[0] & 0x00000003) << 24) |
96
						((i_string[1] & 0x0000003F) << 18) |
97
						((i_string[2] & 0x0000003F) << 12) |
98
						((i_string[3] & 0x0000003F) << 6) |
99
						(i_string[4] & 0x0000003F);
100
			if (val < 0x00200000)
101
				val = '?';
102
			clen = 5;
103
		}
104
		else {
105
			val = '?';
106
			clen = s_len;
107
		}
108
	}
109
	else if ((ch & 0xFE) == 0xFC) {
110
		if (s_len > 5) {
111
			val = ((i_string[0] & 0x00000001) << 30) |
112
						((i_string[1] & 0x0000003F) << 24) |
113
						((i_string[2] & 0x0000003F) << 18) |
114
						((i_string[3] & 0x0000003F) << 12) |
115
						((i_string[4] & 0x0000003F) << 6) |
116
						(i_string[5] & 0x0000003F);
117
			if (val < 0x04000000)
118
				val = '?';
119
			clen = 6;
120
		}
121
		else {
122
			val = '?';
123
			clen = s_len;
124
		}
125
	}
126
	else {
127
		// Should not happen!
128
		val = '?';
129
		clen = 1;
130
	}
131
	*ret_value = val;
132
	return(clen);
133
}
134
1548.2.23 by Barry.Leslie at PrimeBase
And more cleanup.
135
void cs_utf8_to_uni(size_t out_len, unichar *out_string, const u_char *in_string)
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
136
{
1548.2.2 by Barry.Leslie at PrimeBase
A lot of minor changes to clean up the code and to get it to build with Drizzle.
137
	uint32_t	utf_value;
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
138
139
	out_len--;  // Space for zero terminator
140
	while (*in_string) {
141
		in_string += cs_utf_to_uni_char(in_string, &utf_value);
142
		if (out_len == 0)
143
			break;
144
		if (utf_value > 0x0000FFFF)
145
			*out_string = (unichar) '?';
146
		else
147
			*out_string = (unichar) utf_value;
148
		out_string++;
149
		out_len--;
150
	}
151
	*out_string = 0;
152
}
153
1548.2.23 by Barry.Leslie at PrimeBase
And more cleanup.
154
void cs_utf8_to_uni_no_term(size_t out_len, unichar *out_string, const u_char *in_string)
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
155
{
1548.2.2 by Barry.Leslie at PrimeBase
A lot of minor changes to clean up the code and to get it to build with Drizzle.
156
	uint32_t	utf_value;
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
157
158
	while (*in_string) {
159
		in_string += cs_utf_to_uni_char(in_string, &utf_value);
160
		if (out_len == 0)
161
			break;
162
		if (utf_value > 0x0000FFFF)
163
			*out_string = (unichar) '?';
164
		else
165
			*out_string = (unichar) utf_value;
166
		out_string++;
167
		out_len--;
168
	}
169
}
170
171
void cs_uni_to_utf8(size_t out_len, char *out_string, const unichar *in_string)
172
{
173
	out_len--;  // Space for zero terminator
174
	while (*in_string) {
175
		if (*in_string <= 0x007F) {
176
			if (out_len < 1)
177
				break;
178
			*out_string++ = (char) (u_char) *in_string;
179
			out_len--;
180
		}
181
		else if (*in_string <= 0x07FF) {
182
			if (out_len < 3)
183
				break;
184
			*out_string++ = (char) (u_char) ((0x00C0) | ((*in_string >> 6) & 0x001F));
185
			*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F));
186
			out_len -= 2;
187
		}
188
		else /* <= 0xFFFF */ {
189
			if (out_len < 3)
190
				break;
191
			*out_string++ = (char) (u_char) ((0x00E0) | ((*in_string >> 12) & 0x000F));
192
			*out_string++ = (char) (u_char) ((0x0080) | ((*in_string >> 6) & 0x003F));
193
			*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F));
194
			out_len -= 3;
195
		}
196
		in_string++;
197
	}
198
	*out_string = 0;
199
}
200
201
void cs_uni_to_utf8(size_t out_len, char *out_string, const unichar *in_string, s_int in_len)
202
{
203
	out_len--;  // Space for zero terminator
204
	while (in_len--) {
205
		if (*in_string <= 0x007F) {
206
			if (out_len < 1)
207
				break;
208
			*out_string++ = (char) (u_char) *in_string;
209
			out_len--;
210
		}
211
		else if (*in_string <= 0x07FF) {
212
			if (out_len < 3)
213
				break;
214
			*out_string++ = (char) (u_char) ((0x00C0) | ((*in_string >> 6) & 0x001F));
215
			*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F));
216
			out_len -= 2;
217
		}
218
		else /* <= 0xFFFF */ {
219
			if (out_len < 3)
220
				break;
221
			*out_string++ = (char) (u_char) ((0x00E0) | ((*in_string >> 12) & 0x000F));
222
			*out_string++ = (char) (u_char) ((0x0080) | ((*in_string >> 6) & 0x003F));
223
			*out_string++ = (char) (u_char) ((0x0080) | (*in_string & 0x003F));
224
			out_len -= 3;
225
		}
226
		in_string++;
227
	}
228
	*out_string = 0;
229
}
230
231
size_t cs_utf8_to_uni_len(const char *in_string)
232
{
233
	size_t slen = 0;
234
235
	while (*in_string) {
236
		if ((*((u_char *) in_string) & 0xC0) == 0x80)
237
			// These are char data bytes (10xxxxxx)
238
			;
239
		else
240
			// These are single char (00xxxxx, 01xxxxx), or char start bytes (11xxxxxx)
241
			slen++;
242
		in_string++;
243
	}
244
	return slen;
245
}
246
247
size_t cs_uni_to_utf8_len(const unichar *in_string, s_int in_len)
248
{
249
	size_t slen = 0;
250
251
	while (in_len--) {
252
		if (*in_string <= 0x000007F) {
253
			slen++;
254
		}
255
		else if (*in_string <= 0x00007FF)
256
			slen += 2;
257
		else /* <= 0xFFFF */
258
			slen += 3;
259
		in_string++;
260
	}
261
	return slen;
262
}
263
1548.2.23 by Barry.Leslie at PrimeBase
And more cleanup.
264
/*
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
265
size_t cs_uni_len(const unichar *in_string)
266
{
267
	size_t len = 0;
268
	
269
	while (*in_string++) len++;
270
	return len;
271
}
1548.2.23 by Barry.Leslie at PrimeBase
And more cleanup.
272
*/
1548.2.1 by Barry.Leslie at PrimeBase
Added the PBMS daemon plugin.
273