1
by brian
clean slate |
1 |
/* Copyright (C) 2000 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
14 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
15 |
||
994.2.4
by Monty Taylor
Blast. Fixed some make distcheck issues. |
16 |
#include "mysys/mysys_priv.h" |
17 |
#include "mysys/mysys_err.h" |
|
212.5.18
by Monty Taylor
Moved m_ctype, m_string and my_bitmap. Removed t_ctype. |
18 |
#include <mystrings/m_ctype.h> |
19 |
#include <mystrings/m_string.h> |
|
722.1.4
by Monty Taylor
Removed all the setting of DEFS everywhere. Use configmake.h to get the values |
20 |
#include <drizzled/configmake.h> |
1
by brian
clean slate |
21 |
|
22 |
||
23 |
/*
|
|
24 |
The code below implements this functionality:
|
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
25 |
|
1
by brian
clean slate |
26 |
- Initializing charset related structures
|
27 |
- Loading dynamic charsets
|
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
28 |
- Searching for a proper CHARSET_INFO
|
1
by brian
clean slate |
29 |
using charset name, collation name or collation ID
|
30 |
- Setting server default character set
|
|
31 |
*/
|
|
32 |
||
236.3.9
by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets) |
33 |
bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2) |
1
by brian
clean slate |
34 |
{
|
35 |
return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname)); |
|
36 |
}
|
|
37 |
||
38 |
||
39 |
static uint |
|
40 |
get_collation_number_internal(const char *name) |
|
41 |
{
|
|
42 |
CHARSET_INFO **cs; |
|
43 |
for (cs= all_charsets; |
|
44 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
45 |
cs++) |
|
46 |
{
|
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
47 |
if ( cs[0] && cs[0]->name && |
383.1.12
by Brian Aker
Much closer toward UTF8 being around all the time... |
48 |
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name)) |
1
by brian
clean slate |
49 |
return cs[0]->number; |
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
50 |
}
|
1
by brian
clean slate |
51 |
return 0; |
52 |
}
|
|
53 |
||
54 |
||
146
by Brian Aker
my_bool cleanup. |
55 |
static bool init_state_maps(CHARSET_INFO *cs) |
1
by brian
clean slate |
56 |
{
|
482
by Brian Aker
Remove uint. |
57 |
uint32_t i; |
481
by Brian Aker
Remove all of uchar. |
58 |
unsigned char *state_map; |
59 |
unsigned char *ident_map; |
|
1
by brian
clean slate |
60 |
|
656.3.1
by Monty Taylor
Got rid of my_once_alloc. |
61 |
if (!(cs->state_map= (unsigned char*) malloc(256))) |
1
by brian
clean slate |
62 |
return 1; |
63 |
||
656.3.1
by Monty Taylor
Got rid of my_once_alloc. |
64 |
if (!(cs->ident_map= (unsigned char*) malloc(256))) |
1
by brian
clean slate |
65 |
return 1; |
66 |
||
67 |
state_map= cs->state_map; |
|
68 |
ident_map= cs->ident_map; |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
69 |
|
1
by brian
clean slate |
70 |
/* Fill state_map with states to get a faster parser */
|
71 |
for (i=0; i < 256 ; i++) |
|
72 |
{
|
|
73 |
if (my_isalpha(cs,i)) |
|
481
by Brian Aker
Remove all of uchar. |
74 |
state_map[i]=(unsigned char) MY_LEX_IDENT; |
1
by brian
clean slate |
75 |
else if (my_isdigit(cs,i)) |
481
by Brian Aker
Remove all of uchar. |
76 |
state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT; |
1
by brian
clean slate |
77 |
#if defined(USE_MB) && defined(USE_MB_IDENT)
|
78 |
else if (my_mbcharlen(cs, i)>1) |
|
481
by Brian Aker
Remove all of uchar. |
79 |
state_map[i]=(unsigned char) MY_LEX_IDENT; |
1
by brian
clean slate |
80 |
#endif
|
81 |
else if (my_isspace(cs,i)) |
|
481
by Brian Aker
Remove all of uchar. |
82 |
state_map[i]=(unsigned char) MY_LEX_SKIP; |
1
by brian
clean slate |
83 |
else
|
481
by Brian Aker
Remove all of uchar. |
84 |
state_map[i]=(unsigned char) MY_LEX_CHAR; |
1
by brian
clean slate |
85 |
}
|
481
by Brian Aker
Remove all of uchar. |
86 |
state_map[(unsigned char)'_']=state_map[(unsigned char)'$']=(unsigned char) MY_LEX_IDENT; |
87 |
state_map[(unsigned char)'\'']=(unsigned char) MY_LEX_STRING; |
|
88 |
state_map[(unsigned char)'.']=(unsigned char) MY_LEX_REAL_OR_POINT; |
|
89 |
state_map[(unsigned char)'>']=state_map[(unsigned char)'=']=state_map[(unsigned char)'!']= (unsigned char) MY_LEX_CMP_OP; |
|
90 |
state_map[(unsigned char)'<']= (unsigned char) MY_LEX_LONG_CMP_OP; |
|
91 |
state_map[(unsigned char)'&']=state_map[(unsigned char)'|']=(unsigned char) MY_LEX_BOOL; |
|
92 |
state_map[(unsigned char)'#']=(unsigned char) MY_LEX_COMMENT; |
|
93 |
state_map[(unsigned char)';']=(unsigned char) MY_LEX_SEMICOLON; |
|
94 |
state_map[(unsigned char)':']=(unsigned char) MY_LEX_SET_VAR; |
|
95 |
state_map[0]=(unsigned char) MY_LEX_EOL; |
|
96 |
state_map[(unsigned char)'\\']= (unsigned char) MY_LEX_ESCAPE; |
|
97 |
state_map[(unsigned char)'/']= (unsigned char) MY_LEX_LONG_COMMENT; |
|
98 |
state_map[(unsigned char)'*']= (unsigned char) MY_LEX_END_LONG_COMMENT; |
|
99 |
state_map[(unsigned char)'@']= (unsigned char) MY_LEX_USER_END; |
|
100 |
state_map[(unsigned char) '`']= (unsigned char) MY_LEX_USER_VARIABLE_DELIMITER; |
|
101 |
state_map[(unsigned char)'"']= (unsigned char) MY_LEX_STRING_OR_DELIMITER; |
|
1
by brian
clean slate |
102 |
|
103 |
/*
|
|
104 |
Create a second map to make it faster to find identifiers
|
|
105 |
*/
|
|
106 |
for (i=0; i < 256 ; i++) |
|
107 |
{
|
|
481
by Brian Aker
Remove all of uchar. |
108 |
ident_map[i]= (unsigned char) (state_map[i] == MY_LEX_IDENT || |
1
by brian
clean slate |
109 |
state_map[i] == MY_LEX_NUMBER_IDENT); |
110 |
}
|
|
111 |
||
112 |
/* Special handling of hex and binary strings */
|
|
481
by Brian Aker
Remove all of uchar. |
113 |
state_map[(unsigned char)'x']= state_map[(unsigned char)'X']= (unsigned char) MY_LEX_IDENT_OR_HEX; |
114 |
state_map[(unsigned char)'b']= state_map[(unsigned char)'B']= (unsigned char) MY_LEX_IDENT_OR_BIN; |
|
1
by brian
clean slate |
115 |
return 0; |
116 |
}
|
|
117 |
||
118 |
||
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
119 |
static bool charset_initialized= false; |
1
by brian
clean slate |
120 |
|
121 |
CHARSET_INFO *all_charsets[256]; |
|
383.1.12
by Brian Aker
Much closer toward UTF8 being around all the time... |
122 |
const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci; |
1
by brian
clean slate |
123 |
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
124 |
void add_compiled_collation(CHARSET_INFO * cs) |
1
by brian
clean slate |
125 |
{
|
126 |
all_charsets[cs->number]= cs; |
|
127 |
cs->state|= MY_CS_AVAILABLE; |
|
128 |
}
|
|
129 |
||
632.1.11
by Monty Taylor
Fixed Sun Studio warnings in mysys. |
130 |
void *cs_alloc(size_t size) |
1
by brian
clean slate |
131 |
{
|
656.3.1
by Monty Taylor
Got rid of my_once_alloc. |
132 |
return malloc(size); |
1
by brian
clean slate |
133 |
}
|
134 |
||
135 |
||
146
by Brian Aker
my_bool cleanup. |
136 |
static bool init_available_charsets(myf myflags) |
1
by brian
clean slate |
137 |
{
|
862
by Brian Aker
Remove charset directory code. |
138 |
bool error= false; |
1
by brian
clean slate |
139 |
/*
|
140 |
We have to use charset_initialized to not lock on THR_LOCK_charset
|
|
141 |
inside get_internal_charset...
|
|
142 |
*/
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
143 |
if (charset_initialized == false) |
1
by brian
clean slate |
144 |
{
|
145 |
CHARSET_INFO **cs; |
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
146 |
memset(&all_charsets, 0, sizeof(all_charsets)); |
147 |
init_compiled_charsets(myflags); |
|
148 |
||
149 |
/* Copy compiled charsets */
|
|
150 |
for (cs=all_charsets; |
|
151 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
152 |
cs++) |
|
1
by brian
clean slate |
153 |
{
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
154 |
if (*cs) |
1
by brian
clean slate |
155 |
{
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
156 |
if (cs[0]->ctype) |
157 |
if (init_state_maps(*cs)) |
|
158 |
*cs= NULL; |
|
1
by brian
clean slate |
159 |
}
|
160 |
}
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
161 |
|
162 |
charset_initialized= true; |
|
1
by brian
clean slate |
163 |
}
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
164 |
assert(charset_initialized); |
165 |
||
1
by brian
clean slate |
166 |
return error; |
167 |
}
|
|
168 |
||
169 |
||
170 |
void free_charsets(void) |
|
171 |
{
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
172 |
charset_initialized= true; |
1
by brian
clean slate |
173 |
}
|
174 |
||
175 |
||
482
by Brian Aker
Remove uint. |
176 |
uint32_t get_collation_number(const char *name) |
1
by brian
clean slate |
177 |
{
|
178 |
init_available_charsets(MYF(0)); |
|
179 |
return get_collation_number_internal(name); |
|
180 |
}
|
|
181 |
||
182 |
||
482
by Brian Aker
Remove uint. |
183 |
uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags) |
1
by brian
clean slate |
184 |
{
|
185 |
CHARSET_INFO **cs; |
|
186 |
init_available_charsets(MYF(0)); |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
187 |
|
1
by brian
clean slate |
188 |
for (cs= all_charsets; |
189 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
190 |
cs++) |
|
191 |
{
|
|
192 |
if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && |
|
383.1.12
by Brian Aker
Much closer toward UTF8 being around all the time... |
193 |
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name)) |
1
by brian
clean slate |
194 |
return cs[0]->number; |
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
195 |
}
|
1
by brian
clean slate |
196 |
return 0; |
197 |
}
|
|
198 |
||
199 |
||
482
by Brian Aker
Remove uint. |
200 |
const char *get_charset_name(uint32_t charset_number) |
1
by brian
clean slate |
201 |
{
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
202 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
203 |
init_available_charsets(MYF(0)); |
204 |
||
205 |
cs=all_charsets[charset_number]; |
|
206 |
if (cs && (cs->number == charset_number) && cs->name ) |
|
207 |
return (char*) cs->name; |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
208 |
|
1
by brian
clean slate |
209 |
return (char*) "?"; /* this mimics find_type() */ |
210 |
}
|
|
211 |
||
212 |
||
482
by Brian Aker
Remove uint. |
213 |
static const CHARSET_INFO *get_internal_charset(uint32_t cs_number) |
1
by brian
clean slate |
214 |
{
|
215 |
CHARSET_INFO *cs; |
|
216 |
/*
|
|
217 |
To make things thread safe we are not allowing other threads to interfere
|
|
218 |
while we may changing the cs_info_table
|
|
219 |
*/
|
|
220 |
if ((cs= all_charsets[cs_number])) |
|
221 |
{
|
|
222 |
if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED)) |
|
223 |
{
|
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
224 |
assert(0); |
1
by brian
clean slate |
225 |
}
|
226 |
cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL; |
|
227 |
}
|
|
228 |
if (cs && !(cs->state & MY_CS_READY)) |
|
229 |
{
|
|
230 |
if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) || |
|
231 |
(cs->coll->init && cs->coll->init(cs, cs_alloc))) |
|
232 |
cs= NULL; |
|
233 |
else
|
|
234 |
cs->state|= MY_CS_READY; |
|
235 |
}
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
236 |
|
1
by brian
clean slate |
237 |
return cs; |
238 |
}
|
|
239 |
||
240 |
||
862
by Brian Aker
Remove charset directory code. |
241 |
const CHARSET_INFO *get_charset(uint32_t cs_number) |
1
by brian
clean slate |
242 |
{
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
243 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
244 |
if (cs_number == default_charset_info->number) |
245 |
return default_charset_info; |
|
246 |
||
247 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
248 |
|
1
by brian
clean slate |
249 |
if (!cs_number || cs_number >= array_elements(all_charsets)-1) |
250 |
return NULL; |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
251 |
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
252 |
cs= get_internal_charset(cs_number); |
1
by brian
clean slate |
253 |
|
254 |
return cs; |
|
255 |
}
|
|
256 |
||
862
by Brian Aker
Remove charset directory code. |
257 |
const CHARSET_INFO *get_charset_by_name(const char *cs_name) |
1
by brian
clean slate |
258 |
{
|
482
by Brian Aker
Remove uint. |
259 |
uint32_t cs_number; |
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
260 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
261 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
262 |
||
1014.3.1
by Brian Aker
Simplify the calling stack for getting schema collation. We need to extend |
263 |
cs_number= get_collation_number(cs_name); |
383.1.7
by Brian Aker
Remove homebrew xml parser. |
264 |
cs= cs_number ? get_internal_charset(cs_number) : NULL; |
1
by brian
clean slate |
265 |
|
266 |
return cs; |
|
267 |
}
|
|
268 |
||
269 |
||
862
by Brian Aker
Remove charset directory code. |
270 |
const CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint32_t cs_flags) |
1
by brian
clean slate |
271 |
{
|
482
by Brian Aker
Remove uint. |
272 |
uint32_t cs_number; |
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
273 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
274 |
|
275 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
276 |
||
277 |
cs_number= get_charset_number(cs_name, cs_flags); |
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
278 |
cs= cs_number ? get_internal_charset(cs_number) : NULL; |
1
by brian
clean slate |
279 |
|
51.3.22
by Jay Pipes
Final round of removal of DBUG in mysys/, including Makefile |
280 |
return(cs); |
1
by brian
clean slate |
281 |
}
|
282 |
||
283 |
||
284 |
/*
|
|
285 |
Escape apostrophes by doubling them up
|
|
286 |
||
287 |
SYNOPSIS
|
|
236.3.4
by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle |
288 |
escape_quotes_for_drizzle()
|
1
by brian
clean slate |
289 |
charset_info Charset of the strings
|
290 |
to Buffer for escaped string
|
|
291 |
to_length Length of destination buffer, or 0
|
|
292 |
from The string to escape
|
|
293 |
length The length of the string to escape
|
|
294 |
||
295 |
DESCRIPTION
|
|
296 |
This escapes the contents of a string by doubling up any apostrophes that
|
|
297 |
it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
|
|
298 |
effect on the server.
|
|
299 |
||
300 |
NOTE
|
|
301 |
To be consistent with escape_string_for_mysql(), to_length may be 0 to
|
|
302 |
mean "big enough"
|
|
303 |
||
304 |
RETURN VALUES
|
|
365.2.9
by Monty Taylor
Got rid of all instances of ~0 |
305 |
UINT32_MAX The escaped string did not fit in the to buffer
|
1
by brian
clean slate |
306 |
>=0 The length of the escaped string
|
307 |
*/
|
|
308 |
||
236.3.9
by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets) |
309 |
size_t escape_quotes_for_drizzle(const CHARSET_INFO *charset_info, |
236.3.4
by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle |
310 |
char *to, size_t to_length, |
311 |
const char *from, size_t length) |
|
1
by brian
clean slate |
312 |
{
|
313 |
const char *to_start= to; |
|
314 |
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
|
163
by Brian Aker
Merge Monty's code. |
315 |
bool overflow= false; |
1
by brian
clean slate |
316 |
#ifdef USE_MB
|
146
by Brian Aker
my_bool cleanup. |
317 |
bool use_mb_flag= use_mb(charset_info); |
1
by brian
clean slate |
318 |
#endif
|
319 |
for (end= from + length; from < end; from++) |
|
320 |
{
|
|
321 |
#ifdef USE_MB
|
|
322 |
int tmp_length; |
|
323 |
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) |
|
324 |
{
|
|
325 |
if (to + tmp_length > to_end) |
|
326 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
327 |
overflow= true; |
1
by brian
clean slate |
328 |
break; |
329 |
}
|
|
330 |
while (tmp_length--) |
|
331 |
*to++= *from++; |
|
332 |
from--; |
|
333 |
continue; |
|
334 |
}
|
|
335 |
/*
|
|
336 |
We don't have the same issue here with a non-multi-byte character being
|
|
337 |
turned into a multi-byte character by the addition of an escaping
|
|
338 |
character, because we are only escaping the ' character with itself.
|
|
339 |
*/
|
|
340 |
#endif
|
|
341 |
if (*from == '\'') |
|
342 |
{
|
|
343 |
if (to + 2 > to_end) |
|
344 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
345 |
overflow= true; |
1
by brian
clean slate |
346 |
break; |
347 |
}
|
|
348 |
*to++= '\''; |
|
349 |
*to++= '\''; |
|
350 |
}
|
|
351 |
else
|
|
352 |
{
|
|
353 |
if (to + 1 > to_end) |
|
354 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
355 |
overflow= true; |
1
by brian
clean slate |
356 |
break; |
357 |
}
|
|
358 |
*to++= *from; |
|
359 |
}
|
|
360 |
}
|
|
361 |
*to= 0; |
|
365.2.9
by Monty Taylor
Got rid of all instances of ~0 |
362 |
return overflow ? UINT32_MAX : (uint32_t) (to - to_start); |
1
by brian
clean slate |
363 |
}
|