1
by brian
clean slate |
1 |
/* Copyright (C) 2000 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
1802.10.2
by Monty Taylor
Update all of the copyright headers to include the correct address. |
14 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
1
by brian
clean slate |
15 |
|
2173.2.1
by Monty Taylor
Fixes incorrect usage of include |
16 |
#include <config.h> |
1241.9.57
by Monty Taylor
Oy. Bigger change than I normally like - but this stuff is all intertwined. |
17 |
|
2173.2.1
by Monty Taylor
Fixes incorrect usage of include |
18 |
#include <drizzled/charset.h> |
19 |
#include <drizzled/error.h> |
|
20 |
#include <drizzled/charset_info.h> |
|
21 |
#include <drizzled/internal/m_string.h> |
|
722.1.4
by Monty Taylor
Removed all the setting of DEFS everywhere. Use configmake.h to get the values |
22 |
#include <drizzled/configmake.h> |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
23 |
#include <vector> |
24 |
||
2173.2.1
by Monty Taylor
Fixes incorrect usage of include |
25 |
#include <drizzled/visibility.h> |
2119.4.1
by Monty Taylor
Turns on -fvisibility=hidden by default. Symbols intended to be used by |
26 |
|
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
27 |
using namespace std; |
28 |
||
1280.1.10
by Monty Taylor
Put everything in drizzled into drizzled namespace. |
29 |
namespace drizzled |
30 |
{
|
|
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
31 |
|
32 |
/*
|
|
33 |
We collect memory in this vector that we free on delete.
|
|
34 |
*/
|
|
2160.1.2
by Olaf van der Spek
casts |
35 |
static vector<unsigned char*> memory_vector; |
1
by brian
clean slate |
36 |
|
37 |
/*
|
|
38 |
The code below implements this functionality:
|
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
39 |
|
1
by brian
clean slate |
40 |
- Initializing charset related structures
|
41 |
- Loading dynamic charsets
|
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
42 |
- Searching for a proper CHARSET_INFO
|
1
by brian
clean slate |
43 |
using charset name, collation name or collation ID
|
44 |
- Setting server default character set
|
|
45 |
*/
|
|
46 |
||
236.3.9
by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets) |
47 |
bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2) |
1
by brian
clean slate |
48 |
{
|
49 |
return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname)); |
|
50 |
}
|
|
51 |
||
52 |
||
53 |
static uint |
|
54 |
get_collation_number_internal(const char *name) |
|
55 |
{
|
|
2160.1.2
by Olaf van der Spek
casts |
56 |
for (CHARSET_INFO **cs= all_charsets; |
57 |
cs < all_charsets+array_elements(all_charsets)-1; |
|
1
by brian
clean slate |
58 |
cs++) |
59 |
{
|
|
2085.2.3
by Brian Aker
Fix strcasecmp issues (ie, check UTF-8). |
60 |
if ( cs[0] && cs[0]->name && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name)) |
61 |
{
|
|
1
by brian
clean slate |
62 |
return cs[0]->number; |
2085.2.3
by Brian Aker
Fix strcasecmp issues (ie, check UTF-8). |
63 |
}
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
64 |
}
|
1
by brian
clean slate |
65 |
return 0; |
66 |
}
|
|
67 |
||
2160.1.2
by Olaf van der Spek
casts |
68 |
static unsigned char *cs_alloc(size_t size) |
69 |
{
|
|
70 |
memory_vector.push_back(new unsigned char[size]); |
|
71 |
return memory_vector.back(); |
|
72 |
}
|
|
1
by brian
clean slate |
73 |
|
146
by Brian Aker
my_bool cleanup. |
74 |
static bool init_state_maps(CHARSET_INFO *cs) |
1
by brian
clean slate |
75 |
{
|
2160.1.2
by Olaf van der Spek
casts |
76 |
if (!(cs->state_map= cs_alloc(256))) |
1
by brian
clean slate |
77 |
return 1; |
78 |
||
2160.1.2
by Olaf van der Spek
casts |
79 |
if (!(cs->ident_map= cs_alloc(256))) |
1
by brian
clean slate |
80 |
return 1; |
81 |
||
2160.1.2
by Olaf van der Spek
casts |
82 |
unsigned char *state_map= cs->state_map; |
83 |
unsigned char *ident_map= cs->ident_map; |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
84 |
|
1
by brian
clean slate |
85 |
/* Fill state_map with states to get a faster parser */
|
2160.1.2
by Olaf van der Spek
casts |
86 |
for (int i= 0; i < 256; i++) |
1
by brian
clean slate |
87 |
{
|
88 |
if (my_isalpha(cs,i)) |
|
2160.1.2
by Olaf van der Spek
casts |
89 |
state_map[i]= MY_LEX_IDENT; |
1
by brian
clean slate |
90 |
else if (my_isdigit(cs,i)) |
2160.1.2
by Olaf van der Spek
casts |
91 |
state_map[i]= MY_LEX_NUMBER_IDENT; |
1
by brian
clean slate |
92 |
else if (my_mbcharlen(cs, i)>1) |
2160.1.2
by Olaf van der Spek
casts |
93 |
state_map[i]= MY_LEX_IDENT; |
1
by brian
clean slate |
94 |
else if (my_isspace(cs,i)) |
2160.1.2
by Olaf van der Spek
casts |
95 |
state_map[i]= MY_LEX_SKIP; |
1
by brian
clean slate |
96 |
else
|
2160.1.2
by Olaf van der Spek
casts |
97 |
state_map[i]= MY_LEX_CHAR; |
1
by brian
clean slate |
98 |
}
|
2160.1.2
by Olaf van der Spek
casts |
99 |
state_map['_']=state_map['$']= MY_LEX_IDENT; |
100 |
state_map['\'']= MY_LEX_STRING; |
|
101 |
state_map['.']= MY_LEX_REAL_OR_POINT; |
|
102 |
state_map['>']=state_map['=']=state_map['!']= MY_LEX_CMP_OP; |
|
103 |
state_map['<']= MY_LEX_LONG_CMP_OP; |
|
104 |
state_map['&']=state_map['|']= MY_LEX_BOOL; |
|
105 |
state_map['#']= MY_LEX_COMMENT; |
|
106 |
state_map[';']= MY_LEX_SEMICOLON; |
|
107 |
state_map[':']= MY_LEX_SET_VAR; |
|
108 |
state_map[0]= MY_LEX_EOL; |
|
109 |
state_map['\\']= MY_LEX_ESCAPE; |
|
110 |
state_map['/']= MY_LEX_LONG_COMMENT; |
|
111 |
state_map['*']= MY_LEX_END_LONG_COMMENT; |
|
112 |
state_map['@']= MY_LEX_USER_END; |
|
113 |
state_map['`']= MY_LEX_USER_VARIABLE_DELIMITER; |
|
114 |
state_map['"']= MY_LEX_STRING_OR_DELIMITER; |
|
1
by brian
clean slate |
115 |
|
116 |
/*
|
|
117 |
Create a second map to make it faster to find identifiers
|
|
118 |
*/
|
|
2160.1.2
by Olaf van der Spek
casts |
119 |
for (int i= 0; i < 256; i++) |
1
by brian
clean slate |
120 |
{
|
2160.1.2
by Olaf van der Spek
casts |
121 |
ident_map[i]= state_map[i] == MY_LEX_IDENT || state_map[i] == MY_LEX_NUMBER_IDENT; |
1
by brian
clean slate |
122 |
}
|
123 |
||
124 |
/* Special handling of hex and binary strings */
|
|
2160.1.2
by Olaf van der Spek
casts |
125 |
state_map['x']= state_map['X']= MY_LEX_IDENT_OR_HEX; |
126 |
state_map['b']= state_map['B']= MY_LEX_IDENT_OR_BIN; |
|
1
by brian
clean slate |
127 |
return 0; |
128 |
}
|
|
129 |
||
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
130 |
static bool charset_initialized= false; |
1
by brian
clean slate |
131 |
|
2119.4.1
by Monty Taylor
Turns on -fvisibility=hidden by default. Symbols intended to be used by |
132 |
DRIZZLED_API CHARSET_INFO *all_charsets[256]; |
133 |
const DRIZZLED_API CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci; |
|
1
by brian
clean slate |
134 |
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
135 |
void add_compiled_collation(CHARSET_INFO * cs) |
1
by brian
clean slate |
136 |
{
|
137 |
all_charsets[cs->number]= cs; |
|
138 |
cs->state|= MY_CS_AVAILABLE; |
|
139 |
}
|
|
140 |
||
146
by Brian Aker
my_bool cleanup. |
141 |
static bool init_available_charsets(myf myflags) |
1
by brian
clean slate |
142 |
{
|
862
by Brian Aker
Remove charset directory code. |
143 |
bool error= false; |
1
by brian
clean slate |
144 |
/*
|
145 |
We have to use charset_initialized to not lock on THR_LOCK_charset
|
|
146 |
inside get_internal_charset...
|
|
147 |
*/
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
148 |
if (charset_initialized == false) |
1
by brian
clean slate |
149 |
{
|
150 |
CHARSET_INFO **cs; |
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
151 |
memset(&all_charsets, 0, sizeof(all_charsets)); |
152 |
init_compiled_charsets(myflags); |
|
153 |
||
154 |
/* Copy compiled charsets */
|
|
155 |
for (cs=all_charsets; |
|
156 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
157 |
cs++) |
|
1
by brian
clean slate |
158 |
{
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
159 |
if (*cs) |
1
by brian
clean slate |
160 |
{
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
161 |
if (cs[0]->ctype) |
162 |
if (init_state_maps(*cs)) |
|
163 |
*cs= NULL; |
|
1
by brian
clean slate |
164 |
}
|
165 |
}
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
166 |
|
167 |
charset_initialized= true; |
|
1
by brian
clean slate |
168 |
}
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
169 |
assert(charset_initialized); |
170 |
||
1
by brian
clean slate |
171 |
return error; |
172 |
}
|
|
173 |
||
174 |
||
2160.1.2
by Olaf van der Spek
casts |
175 |
void free_charsets() |
1
by brian
clean slate |
176 |
{
|
2160.1.7
by Olaf van der Spek
fix |
177 |
charset_initialized= false; |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
178 |
|
2160.1.6
by Olaf van der Spek
USe "not" instead of "!" |
179 |
while (not memory_vector.empty()) |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
180 |
{
|
2160.1.2
by Olaf van der Spek
casts |
181 |
delete[] memory_vector.back(); |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
182 |
memory_vector.pop_back(); |
183 |
}
|
|
1
by brian
clean slate |
184 |
}
|
185 |
||
186 |
||
482
by Brian Aker
Remove uint. |
187 |
uint32_t get_collation_number(const char *name) |
1
by brian
clean slate |
188 |
{
|
189 |
init_available_charsets(MYF(0)); |
|
190 |
return get_collation_number_internal(name); |
|
191 |
}
|
|
192 |
||
193 |
||
482
by Brian Aker
Remove uint. |
194 |
uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags) |
1
by brian
clean slate |
195 |
{
|
196 |
CHARSET_INFO **cs; |
|
197 |
init_available_charsets(MYF(0)); |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
198 |
|
1
by brian
clean slate |
199 |
for (cs= all_charsets; |
200 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
201 |
cs++) |
|
202 |
{
|
|
2085.2.3
by Brian Aker
Fix strcasecmp issues (ie, check UTF-8). |
203 |
if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name)) |
1
by brian
clean slate |
204 |
return cs[0]->number; |
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
205 |
}
|
1
by brian
clean slate |
206 |
return 0; |
207 |
}
|
|
208 |
||
209 |
||
482
by Brian Aker
Remove uint. |
210 |
const char *get_charset_name(uint32_t charset_number) |
1
by brian
clean slate |
211 |
{
|
212 |
init_available_charsets(MYF(0)); |
|
213 |
||
2160.1.2
by Olaf van der Spek
casts |
214 |
const CHARSET_INFO *cs= all_charsets[charset_number]; |
1
by brian
clean slate |
215 |
if (cs && (cs->number == charset_number) && cs->name ) |
2160.1.2
by Olaf van der Spek
casts |
216 |
return cs->name; |
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
217 |
|
2160.1.2
by Olaf van der Spek
casts |
218 |
return "?"; /* this mimics find_type() */ |
1
by brian
clean slate |
219 |
}
|
220 |
||
221 |
||
482
by Brian Aker
Remove uint. |
222 |
static const CHARSET_INFO *get_internal_charset(uint32_t cs_number) |
1
by brian
clean slate |
223 |
{
|
224 |
CHARSET_INFO *cs; |
|
225 |
/*
|
|
226 |
To make things thread safe we are not allowing other threads to interfere
|
|
227 |
while we may changing the cs_info_table
|
|
228 |
*/
|
|
229 |
if ((cs= all_charsets[cs_number])) |
|
230 |
{
|
|
231 |
if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED)) |
|
232 |
{
|
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
233 |
assert(0); |
1
by brian
clean slate |
234 |
}
|
235 |
cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL; |
|
236 |
}
|
|
237 |
if (cs && !(cs->state & MY_CS_READY)) |
|
238 |
{
|
|
239 |
if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) || |
|
240 |
(cs->coll->init && cs->coll->init(cs, cs_alloc))) |
|
241 |
cs= NULL; |
|
242 |
else
|
|
243 |
cs->state|= MY_CS_READY; |
|
244 |
}
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
245 |
|
1
by brian
clean slate |
246 |
return cs; |
247 |
}
|
|
248 |
||
249 |
||
862
by Brian Aker
Remove charset directory code. |
250 |
const CHARSET_INFO *get_charset(uint32_t cs_number) |
1
by brian
clean slate |
251 |
{
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
252 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
253 |
if (cs_number == default_charset_info->number) |
254 |
return default_charset_info; |
|
255 |
||
256 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
257 |
|
1
by brian
clean slate |
258 |
if (!cs_number || cs_number >= array_elements(all_charsets)-1) |
259 |
return NULL; |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
260 |
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
261 |
cs= get_internal_charset(cs_number); |
1
by brian
clean slate |
262 |
|
263 |
return cs; |
|
264 |
}
|
|
265 |
||
862
by Brian Aker
Remove charset directory code. |
266 |
const CHARSET_INFO *get_charset_by_name(const char *cs_name) |
1
by brian
clean slate |
267 |
{
|
482
by Brian Aker
Remove uint. |
268 |
uint32_t cs_number; |
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
269 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
270 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
271 |
||
1014.3.1
by Brian Aker
Simplify the calling stack for getting schema collation. We need to extend |
272 |
cs_number= get_collation_number(cs_name); |
383.1.7
by Brian Aker
Remove homebrew xml parser. |
273 |
cs= cs_number ? get_internal_charset(cs_number) : NULL; |
1
by brian
clean slate |
274 |
|
275 |
return cs; |
|
276 |
}
|
|
277 |
||
278 |
||
862
by Brian Aker
Remove charset directory code. |
279 |
const CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint32_t cs_flags) |
1
by brian
clean slate |
280 |
{
|
482
by Brian Aker
Remove uint. |
281 |
uint32_t cs_number; |
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
282 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
283 |
|
284 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
285 |
||
286 |
cs_number= get_charset_number(cs_name, cs_flags); |
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
287 |
cs= cs_number ? get_internal_charset(cs_number) : NULL; |
1
by brian
clean slate |
288 |
|
51.3.22
by Jay Pipes
Final round of removal of DBUG in mysys/, including Makefile |
289 |
return(cs); |
1
by brian
clean slate |
290 |
}
|
291 |
||
292 |
||
293 |
/*
|
|
294 |
Escape apostrophes by doubling them up
|
|
295 |
||
296 |
SYNOPSIS
|
|
236.3.4
by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle |
297 |
escape_quotes_for_drizzle()
|
1
by brian
clean slate |
298 |
charset_info Charset of the strings
|
299 |
to Buffer for escaped string
|
|
300 |
to_length Length of destination buffer, or 0
|
|
301 |
from The string to escape
|
|
302 |
length The length of the string to escape
|
|
303 |
||
304 |
DESCRIPTION
|
|
305 |
This escapes the contents of a string by doubling up any apostrophes that
|
|
306 |
it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
|
|
307 |
effect on the server.
|
|
308 |
||
309 |
NOTE
|
|
310 |
To be consistent with escape_string_for_mysql(), to_length may be 0 to
|
|
311 |
mean "big enough"
|
|
312 |
||
313 |
RETURN VALUES
|
|
365.2.9
by Monty Taylor
Got rid of all instances of ~0 |
314 |
UINT32_MAX The escaped string did not fit in the to buffer
|
1
by brian
clean slate |
315 |
>=0 The length of the escaped string
|
316 |
*/
|
|
317 |
||
236.3.9
by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets) |
318 |
size_t escape_quotes_for_drizzle(const CHARSET_INFO *charset_info, |
236.3.4
by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle |
319 |
char *to, size_t to_length, |
320 |
const char *from, size_t length) |
|
1
by brian
clean slate |
321 |
{
|
322 |
const char *to_start= to; |
|
323 |
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
|
163
by Brian Aker
Merge Monty's code. |
324 |
bool overflow= false; |
146
by Brian Aker
my_bool cleanup. |
325 |
bool use_mb_flag= use_mb(charset_info); |
1
by brian
clean slate |
326 |
for (end= from + length; from < end; from++) |
327 |
{
|
|
328 |
int tmp_length; |
|
329 |
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) |
|
330 |
{
|
|
331 |
if (to + tmp_length > to_end) |
|
332 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
333 |
overflow= true; |
1
by brian
clean slate |
334 |
break; |
335 |
}
|
|
336 |
while (tmp_length--) |
|
337 |
*to++= *from++; |
|
338 |
from--; |
|
339 |
continue; |
|
340 |
}
|
|
341 |
/*
|
|
342 |
We don't have the same issue here with a non-multi-byte character being
|
|
343 |
turned into a multi-byte character by the addition of an escaping
|
|
344 |
character, because we are only escaping the ' character with itself.
|
|
345 |
*/
|
|
346 |
if (*from == '\'') |
|
347 |
{
|
|
348 |
if (to + 2 > to_end) |
|
349 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
350 |
overflow= true; |
1
by brian
clean slate |
351 |
break; |
352 |
}
|
|
353 |
*to++= '\''; |
|
354 |
*to++= '\''; |
|
355 |
}
|
|
356 |
else
|
|
357 |
{
|
|
358 |
if (to + 1 > to_end) |
|
359 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
360 |
overflow= true; |
1
by brian
clean slate |
361 |
break; |
362 |
}
|
|
363 |
*to++= *from; |
|
364 |
}
|
|
365 |
}
|
|
366 |
*to= 0; |
|
2160.1.2
by Olaf van der Spek
casts |
367 |
return overflow ? UINT32_MAX : to - to_start; |
1
by brian
clean slate |
368 |
}
|
1280.1.10
by Monty Taylor
Put everything in drizzled into drizzled namespace. |
369 |
|
370 |
} /* namespace drizzled */ |