1
by brian
clean slate |
1 |
/* Copyright (C) 2000 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
14 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
15 |
||
994.2.4
by Monty Taylor
Blast. Fixed some make distcheck issues. |
16 |
#include "mysys/mysys_priv.h" |
17 |
#include "mysys/mysys_err.h" |
|
212.5.18
by Monty Taylor
Moved m_ctype, m_string and my_bitmap. Removed t_ctype. |
18 |
#include <mystrings/m_ctype.h> |
19 |
#include <mystrings/m_string.h> |
|
722.1.4
by Monty Taylor
Removed all the setting of DEFS everywhere. Use configmake.h to get the values |
20 |
#include <drizzled/configmake.h> |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
21 |
#include <vector> |
22 |
||
23 |
using namespace std; |
|
24 |
||
25 |
||
26 |
/*
|
|
27 |
We collect memory in this vector that we free on delete.
|
|
28 |
*/
|
|
29 |
static vector<void *>memory_vector; |
|
1
by brian
clean slate |
30 |
|
31 |
/*
|
|
32 |
The code below implements this functionality:
|
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
33 |
|
1
by brian
clean slate |
34 |
- Initializing charset related structures
|
35 |
- Loading dynamic charsets
|
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
36 |
- Searching for a proper CHARSET_INFO
|
1
by brian
clean slate |
37 |
using charset name, collation name or collation ID
|
38 |
- Setting server default character set
|
|
39 |
*/
|
|
40 |
||
236.3.9
by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets) |
41 |
bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2) |
1
by brian
clean slate |
42 |
{
|
43 |
return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname)); |
|
44 |
}
|
|
45 |
||
46 |
||
47 |
static uint |
|
48 |
get_collation_number_internal(const char *name) |
|
49 |
{
|
|
50 |
CHARSET_INFO **cs; |
|
51 |
for (cs= all_charsets; |
|
52 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
53 |
cs++) |
|
54 |
{
|
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
55 |
if ( cs[0] && cs[0]->name && |
383.1.12
by Brian Aker
Much closer toward UTF8 being around all the time... |
56 |
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name)) |
1
by brian
clean slate |
57 |
return cs[0]->number; |
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
58 |
}
|
1
by brian
clean slate |
59 |
return 0; |
60 |
}
|
|
61 |
||
62 |
||
146
by Brian Aker
my_bool cleanup. |
63 |
static bool init_state_maps(CHARSET_INFO *cs) |
1
by brian
clean slate |
64 |
{
|
482
by Brian Aker
Remove uint. |
65 |
uint32_t i; |
481
by Brian Aker
Remove all of uchar. |
66 |
unsigned char *state_map; |
67 |
unsigned char *ident_map; |
|
1
by brian
clean slate |
68 |
|
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
69 |
if (!(cs->state_map= (unsigned char*) cs_alloc(256))) |
1
by brian
clean slate |
70 |
return 1; |
71 |
||
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
72 |
if (!(cs->ident_map= (unsigned char*) cs_alloc(256))) |
1
by brian
clean slate |
73 |
return 1; |
74 |
||
75 |
state_map= cs->state_map; |
|
76 |
ident_map= cs->ident_map; |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
77 |
|
1
by brian
clean slate |
78 |
/* Fill state_map with states to get a faster parser */
|
79 |
for (i=0; i < 256 ; i++) |
|
80 |
{
|
|
81 |
if (my_isalpha(cs,i)) |
|
481
by Brian Aker
Remove all of uchar. |
82 |
state_map[i]=(unsigned char) MY_LEX_IDENT; |
1
by brian
clean slate |
83 |
else if (my_isdigit(cs,i)) |
481
by Brian Aker
Remove all of uchar. |
84 |
state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT; |
1
by brian
clean slate |
85 |
else if (my_mbcharlen(cs, i)>1) |
481
by Brian Aker
Remove all of uchar. |
86 |
state_map[i]=(unsigned char) MY_LEX_IDENT; |
1
by brian
clean slate |
87 |
else if (my_isspace(cs,i)) |
481
by Brian Aker
Remove all of uchar. |
88 |
state_map[i]=(unsigned char) MY_LEX_SKIP; |
1
by brian
clean slate |
89 |
else
|
481
by Brian Aker
Remove all of uchar. |
90 |
state_map[i]=(unsigned char) MY_LEX_CHAR; |
1
by brian
clean slate |
91 |
}
|
481
by Brian Aker
Remove all of uchar. |
92 |
state_map[(unsigned char)'_']=state_map[(unsigned char)'$']=(unsigned char) MY_LEX_IDENT; |
93 |
state_map[(unsigned char)'\'']=(unsigned char) MY_LEX_STRING; |
|
94 |
state_map[(unsigned char)'.']=(unsigned char) MY_LEX_REAL_OR_POINT; |
|
95 |
state_map[(unsigned char)'>']=state_map[(unsigned char)'=']=state_map[(unsigned char)'!']= (unsigned char) MY_LEX_CMP_OP; |
|
96 |
state_map[(unsigned char)'<']= (unsigned char) MY_LEX_LONG_CMP_OP; |
|
97 |
state_map[(unsigned char)'&']=state_map[(unsigned char)'|']=(unsigned char) MY_LEX_BOOL; |
|
98 |
state_map[(unsigned char)'#']=(unsigned char) MY_LEX_COMMENT; |
|
99 |
state_map[(unsigned char)';']=(unsigned char) MY_LEX_SEMICOLON; |
|
100 |
state_map[(unsigned char)':']=(unsigned char) MY_LEX_SET_VAR; |
|
101 |
state_map[0]=(unsigned char) MY_LEX_EOL; |
|
102 |
state_map[(unsigned char)'\\']= (unsigned char) MY_LEX_ESCAPE; |
|
103 |
state_map[(unsigned char)'/']= (unsigned char) MY_LEX_LONG_COMMENT; |
|
104 |
state_map[(unsigned char)'*']= (unsigned char) MY_LEX_END_LONG_COMMENT; |
|
105 |
state_map[(unsigned char)'@']= (unsigned char) MY_LEX_USER_END; |
|
106 |
state_map[(unsigned char) '`']= (unsigned char) MY_LEX_USER_VARIABLE_DELIMITER; |
|
107 |
state_map[(unsigned char)'"']= (unsigned char) MY_LEX_STRING_OR_DELIMITER; |
|
1
by brian
clean slate |
108 |
|
109 |
/*
|
|
110 |
Create a second map to make it faster to find identifiers
|
|
111 |
*/
|
|
112 |
for (i=0; i < 256 ; i++) |
|
113 |
{
|
|
481
by Brian Aker
Remove all of uchar. |
114 |
ident_map[i]= (unsigned char) (state_map[i] == MY_LEX_IDENT || |
1
by brian
clean slate |
115 |
state_map[i] == MY_LEX_NUMBER_IDENT); |
116 |
}
|
|
117 |
||
118 |
/* Special handling of hex and binary strings */
|
|
481
by Brian Aker
Remove all of uchar. |
119 |
state_map[(unsigned char)'x']= state_map[(unsigned char)'X']= (unsigned char) MY_LEX_IDENT_OR_HEX; |
120 |
state_map[(unsigned char)'b']= state_map[(unsigned char)'B']= (unsigned char) MY_LEX_IDENT_OR_BIN; |
|
1
by brian
clean slate |
121 |
return 0; |
122 |
}
|
|
123 |
||
124 |
||
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
125 |
static bool charset_initialized= false; |
1
by brian
clean slate |
126 |
|
127 |
CHARSET_INFO *all_charsets[256]; |
|
383.1.12
by Brian Aker
Much closer toward UTF8 being around all the time... |
128 |
const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci; |
1
by brian
clean slate |
129 |
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
130 |
void add_compiled_collation(CHARSET_INFO * cs) |
1
by brian
clean slate |
131 |
{
|
132 |
all_charsets[cs->number]= cs; |
|
133 |
cs->state|= MY_CS_AVAILABLE; |
|
134 |
}
|
|
135 |
||
632.1.11
by Monty Taylor
Fixed Sun Studio warnings in mysys. |
136 |
void *cs_alloc(size_t size) |
1
by brian
clean slate |
137 |
{
|
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
138 |
void *ptr= malloc(size); |
139 |
||
140 |
memory_vector.push_back(ptr); |
|
141 |
||
142 |
return ptr; |
|
1
by brian
clean slate |
143 |
}
|
144 |
||
145 |
||
146
by Brian Aker
my_bool cleanup. |
146 |
static bool init_available_charsets(myf myflags) |
1
by brian
clean slate |
147 |
{
|
862
by Brian Aker
Remove charset directory code. |
148 |
bool error= false; |
1
by brian
clean slate |
149 |
/*
|
150 |
We have to use charset_initialized to not lock on THR_LOCK_charset
|
|
151 |
inside get_internal_charset...
|
|
152 |
*/
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
153 |
if (charset_initialized == false) |
1
by brian
clean slate |
154 |
{
|
155 |
CHARSET_INFO **cs; |
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
156 |
memset(&all_charsets, 0, sizeof(all_charsets)); |
157 |
init_compiled_charsets(myflags); |
|
158 |
||
159 |
/* Copy compiled charsets */
|
|
160 |
for (cs=all_charsets; |
|
161 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
162 |
cs++) |
|
1
by brian
clean slate |
163 |
{
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
164 |
if (*cs) |
1
by brian
clean slate |
165 |
{
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
166 |
if (cs[0]->ctype) |
167 |
if (init_state_maps(*cs)) |
|
168 |
*cs= NULL; |
|
1
by brian
clean slate |
169 |
}
|
170 |
}
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
171 |
|
172 |
charset_initialized= true; |
|
1
by brian
clean slate |
173 |
}
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
174 |
assert(charset_initialized); |
175 |
||
1
by brian
clean slate |
176 |
return error; |
177 |
}
|
|
178 |
||
179 |
||
180 |
void free_charsets(void) |
|
181 |
{
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
182 |
charset_initialized= true; |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
183 |
|
184 |
while (memory_vector.empty() == false) |
|
185 |
{
|
|
186 |
void *ptr= memory_vector.back(); |
|
187 |
memory_vector.pop_back(); |
|
188 |
free(ptr); |
|
189 |
}
|
|
190 |
memory_vector.clear(); |
|
191 |
||
1
by brian
clean slate |
192 |
}
|
193 |
||
194 |
||
482
by Brian Aker
Remove uint. |
195 |
uint32_t get_collation_number(const char *name) |
1
by brian
clean slate |
196 |
{
|
197 |
init_available_charsets(MYF(0)); |
|
198 |
return get_collation_number_internal(name); |
|
199 |
}
|
|
200 |
||
201 |
||
482
by Brian Aker
Remove uint. |
202 |
uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags) |
1
by brian
clean slate |
203 |
{
|
204 |
CHARSET_INFO **cs; |
|
205 |
init_available_charsets(MYF(0)); |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
206 |
|
1
by brian
clean slate |
207 |
for (cs= all_charsets; |
208 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
209 |
cs++) |
|
210 |
{
|
|
211 |
if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && |
|
383.1.12
by Brian Aker
Much closer toward UTF8 being around all the time... |
212 |
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name)) |
1
by brian
clean slate |
213 |
return cs[0]->number; |
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
214 |
}
|
1
by brian
clean slate |
215 |
return 0; |
216 |
}
|
|
217 |
||
218 |
||
482
by Brian Aker
Remove uint. |
219 |
const char *get_charset_name(uint32_t charset_number) |
1
by brian
clean slate |
220 |
{
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
221 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
222 |
init_available_charsets(MYF(0)); |
223 |
||
224 |
cs=all_charsets[charset_number]; |
|
225 |
if (cs && (cs->number == charset_number) && cs->name ) |
|
226 |
return (char*) cs->name; |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
227 |
|
1
by brian
clean slate |
228 |
return (char*) "?"; /* this mimics find_type() */ |
229 |
}
|
|
230 |
||
231 |
||
482
by Brian Aker
Remove uint. |
232 |
static const CHARSET_INFO *get_internal_charset(uint32_t cs_number) |
1
by brian
clean slate |
233 |
{
|
234 |
CHARSET_INFO *cs; |
|
235 |
/*
|
|
236 |
To make things thread safe we are not allowing other threads to interfere
|
|
237 |
while we may changing the cs_info_table
|
|
238 |
*/
|
|
239 |
if ((cs= all_charsets[cs_number])) |
|
240 |
{
|
|
241 |
if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED)) |
|
242 |
{
|
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
243 |
assert(0); |
1
by brian
clean slate |
244 |
}
|
245 |
cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL; |
|
246 |
}
|
|
247 |
if (cs && !(cs->state & MY_CS_READY)) |
|
248 |
{
|
|
249 |
if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) || |
|
250 |
(cs->coll->init && cs->coll->init(cs, cs_alloc))) |
|
251 |
cs= NULL; |
|
252 |
else
|
|
253 |
cs->state|= MY_CS_READY; |
|
254 |
}
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
255 |
|
1
by brian
clean slate |
256 |
return cs; |
257 |
}
|
|
258 |
||
259 |
||
862
by Brian Aker
Remove charset directory code. |
260 |
const CHARSET_INFO *get_charset(uint32_t cs_number) |
1
by brian
clean slate |
261 |
{
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
262 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
263 |
if (cs_number == default_charset_info->number) |
264 |
return default_charset_info; |
|
265 |
||
266 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
267 |
|
1
by brian
clean slate |
268 |
if (!cs_number || cs_number >= array_elements(all_charsets)-1) |
269 |
return NULL; |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
270 |
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
271 |
cs= get_internal_charset(cs_number); |
1
by brian
clean slate |
272 |
|
273 |
return cs; |
|
274 |
}
|
|
275 |
||
862
by Brian Aker
Remove charset directory code. |
276 |
const CHARSET_INFO *get_charset_by_name(const char *cs_name) |
1
by brian
clean slate |
277 |
{
|
482
by Brian Aker
Remove uint. |
278 |
uint32_t cs_number; |
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
279 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
280 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
281 |
||
1014.3.1
by Brian Aker
Simplify the calling stack for getting schema collation. We need to extend |
282 |
cs_number= get_collation_number(cs_name); |
383.1.7
by Brian Aker
Remove homebrew xml parser. |
283 |
cs= cs_number ? get_internal_charset(cs_number) : NULL; |
1
by brian
clean slate |
284 |
|
285 |
return cs; |
|
286 |
}
|
|
287 |
||
288 |
||
862
by Brian Aker
Remove charset directory code. |
289 |
const CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint32_t cs_flags) |
1
by brian
clean slate |
290 |
{
|
482
by Brian Aker
Remove uint. |
291 |
uint32_t cs_number; |
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
292 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
293 |
|
294 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
295 |
||
296 |
cs_number= get_charset_number(cs_name, cs_flags); |
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
297 |
cs= cs_number ? get_internal_charset(cs_number) : NULL; |
1
by brian
clean slate |
298 |
|
51.3.22
by Jay Pipes
Final round of removal of DBUG in mysys/, including Makefile |
299 |
return(cs); |
1
by brian
clean slate |
300 |
}
|
301 |
||
302 |
||
303 |
/*
|
|
304 |
Escape apostrophes by doubling them up
|
|
305 |
||
306 |
SYNOPSIS
|
|
236.3.4
by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle |
307 |
escape_quotes_for_drizzle()
|
1
by brian
clean slate |
308 |
charset_info Charset of the strings
|
309 |
to Buffer for escaped string
|
|
310 |
to_length Length of destination buffer, or 0
|
|
311 |
from The string to escape
|
|
312 |
length The length of the string to escape
|
|
313 |
||
314 |
DESCRIPTION
|
|
315 |
This escapes the contents of a string by doubling up any apostrophes that
|
|
316 |
it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
|
|
317 |
effect on the server.
|
|
318 |
||
319 |
NOTE
|
|
320 |
To be consistent with escape_string_for_mysql(), to_length may be 0 to
|
|
321 |
mean "big enough"
|
|
322 |
||
323 |
RETURN VALUES
|
|
365.2.9
by Monty Taylor
Got rid of all instances of ~0 |
324 |
UINT32_MAX The escaped string did not fit in the to buffer
|
1
by brian
clean slate |
325 |
>=0 The length of the escaped string
|
326 |
*/
|
|
327 |
||
236.3.9
by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets) |
328 |
size_t escape_quotes_for_drizzle(const CHARSET_INFO *charset_info, |
236.3.4
by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle |
329 |
char *to, size_t to_length, |
330 |
const char *from, size_t length) |
|
1
by brian
clean slate |
331 |
{
|
332 |
const char *to_start= to; |
|
333 |
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
|
163
by Brian Aker
Merge Monty's code. |
334 |
bool overflow= false; |
146
by Brian Aker
my_bool cleanup. |
335 |
bool use_mb_flag= use_mb(charset_info); |
1
by brian
clean slate |
336 |
for (end= from + length; from < end; from++) |
337 |
{
|
|
338 |
int tmp_length; |
|
339 |
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) |
|
340 |
{
|
|
341 |
if (to + tmp_length > to_end) |
|
342 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
343 |
overflow= true; |
1
by brian
clean slate |
344 |
break; |
345 |
}
|
|
346 |
while (tmp_length--) |
|
347 |
*to++= *from++; |
|
348 |
from--; |
|
349 |
continue; |
|
350 |
}
|
|
351 |
/*
|
|
352 |
We don't have the same issue here with a non-multi-byte character being
|
|
353 |
turned into a multi-byte character by the addition of an escaping
|
|
354 |
character, because we are only escaping the ' character with itself.
|
|
355 |
*/
|
|
356 |
if (*from == '\'') |
|
357 |
{
|
|
358 |
if (to + 2 > to_end) |
|
359 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
360 |
overflow= true; |
1
by brian
clean slate |
361 |
break; |
362 |
}
|
|
363 |
*to++= '\''; |
|
364 |
*to++= '\''; |
|
365 |
}
|
|
366 |
else
|
|
367 |
{
|
|
368 |
if (to + 1 > to_end) |
|
369 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
370 |
overflow= true; |
1
by brian
clean slate |
371 |
break; |
372 |
}
|
|
373 |
*to++= *from; |
|
374 |
}
|
|
375 |
}
|
|
376 |
*to= 0; |
|
365.2.9
by Monty Taylor
Got rid of all instances of ~0 |
377 |
return overflow ? UINT32_MAX : (uint32_t) (to - to_start); |
1
by brian
clean slate |
378 |
}
|