1
by brian
clean slate |
1 |
/* Copyright (C) 2000 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
14 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
15 |
||
16 |
#include "mysys_priv.h" |
|
17 |
#include "mysys_err.h" |
|
212.5.18
by Monty Taylor
Moved m_ctype, m_string and my_bitmap. Removed t_ctype. |
18 |
#include <mystrings/m_ctype.h> |
19 |
#include <mystrings/m_string.h> |
|
1
by brian
clean slate |
20 |
#include <my_dir.h> |
21 |
||
22 |
||
23 |
/*
|
|
24 |
The code below implements this functionality:
|
|
25 |
|
|
26 |
- Initializing charset related structures
|
|
27 |
- Loading dynamic charsets
|
|
28 |
- Searching for a proper CHARSET_INFO
|
|
29 |
using charset name, collation name or collation ID
|
|
30 |
- Setting server default character set
|
|
31 |
*/
|
|
32 |
||
236.3.9
by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets) |
33 |
bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2) |
1
by brian
clean slate |
34 |
{
|
35 |
return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname)); |
|
36 |
}
|
|
37 |
||
38 |
||
39 |
static uint |
|
40 |
get_collation_number_internal(const char *name) |
|
41 |
{
|
|
42 |
CHARSET_INFO **cs; |
|
43 |
for (cs= all_charsets; |
|
44 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
45 |
cs++) |
|
46 |
{
|
|
47 |
if ( cs[0] && cs[0]->name && |
|
383.1.12
by Brian Aker
Much closer toward UTF8 being around all the time... |
48 |
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name)) |
1
by brian
clean slate |
49 |
return cs[0]->number; |
50 |
}
|
|
51 |
return 0; |
|
52 |
}
|
|
53 |
||
54 |
||
146
by Brian Aker
my_bool cleanup. |
55 |
static bool init_state_maps(CHARSET_INFO *cs) |
1
by brian
clean slate |
56 |
{
|
482
by Brian Aker
Remove uint. |
57 |
uint32_t i; |
481
by Brian Aker
Remove all of uchar. |
58 |
unsigned char *state_map; |
59 |
unsigned char *ident_map; |
|
1
by brian
clean slate |
60 |
|
481
by Brian Aker
Remove all of uchar. |
61 |
if (!(cs->state_map= (unsigned char*) my_once_alloc(256, MYF(MY_WME)))) |
1
by brian
clean slate |
62 |
return 1; |
63 |
||
481
by Brian Aker
Remove all of uchar. |
64 |
if (!(cs->ident_map= (unsigned char*) my_once_alloc(256, MYF(MY_WME)))) |
1
by brian
clean slate |
65 |
return 1; |
66 |
||
67 |
state_map= cs->state_map; |
|
68 |
ident_map= cs->ident_map; |
|
69 |
||
70 |
/* Fill state_map with states to get a faster parser */
|
|
71 |
for (i=0; i < 256 ; i++) |
|
72 |
{
|
|
73 |
if (my_isalpha(cs,i)) |
|
481
by Brian Aker
Remove all of uchar. |
74 |
state_map[i]=(unsigned char) MY_LEX_IDENT; |
1
by brian
clean slate |
75 |
else if (my_isdigit(cs,i)) |
481
by Brian Aker
Remove all of uchar. |
76 |
state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT; |
1
by brian
clean slate |
77 |
#if defined(USE_MB) && defined(USE_MB_IDENT)
|
78 |
else if (my_mbcharlen(cs, i)>1) |
|
481
by Brian Aker
Remove all of uchar. |
79 |
state_map[i]=(unsigned char) MY_LEX_IDENT; |
1
by brian
clean slate |
80 |
#endif
|
81 |
else if (my_isspace(cs,i)) |
|
481
by Brian Aker
Remove all of uchar. |
82 |
state_map[i]=(unsigned char) MY_LEX_SKIP; |
1
by brian
clean slate |
83 |
else
|
481
by Brian Aker
Remove all of uchar. |
84 |
state_map[i]=(unsigned char) MY_LEX_CHAR; |
1
by brian
clean slate |
85 |
}
|
481
by Brian Aker
Remove all of uchar. |
86 |
state_map[(unsigned char)'_']=state_map[(unsigned char)'$']=(unsigned char) MY_LEX_IDENT; |
87 |
state_map[(unsigned char)'\'']=(unsigned char) MY_LEX_STRING; |
|
88 |
state_map[(unsigned char)'.']=(unsigned char) MY_LEX_REAL_OR_POINT; |
|
89 |
state_map[(unsigned char)'>']=state_map[(unsigned char)'=']=state_map[(unsigned char)'!']= (unsigned char) MY_LEX_CMP_OP; |
|
90 |
state_map[(unsigned char)'<']= (unsigned char) MY_LEX_LONG_CMP_OP; |
|
91 |
state_map[(unsigned char)'&']=state_map[(unsigned char)'|']=(unsigned char) MY_LEX_BOOL; |
|
92 |
state_map[(unsigned char)'#']=(unsigned char) MY_LEX_COMMENT; |
|
93 |
state_map[(unsigned char)';']=(unsigned char) MY_LEX_SEMICOLON; |
|
94 |
state_map[(unsigned char)':']=(unsigned char) MY_LEX_SET_VAR; |
|
95 |
state_map[0]=(unsigned char) MY_LEX_EOL; |
|
96 |
state_map[(unsigned char)'\\']= (unsigned char) MY_LEX_ESCAPE; |
|
97 |
state_map[(unsigned char)'/']= (unsigned char) MY_LEX_LONG_COMMENT; |
|
98 |
state_map[(unsigned char)'*']= (unsigned char) MY_LEX_END_LONG_COMMENT; |
|
99 |
state_map[(unsigned char)'@']= (unsigned char) MY_LEX_USER_END; |
|
100 |
state_map[(unsigned char) '`']= (unsigned char) MY_LEX_USER_VARIABLE_DELIMITER; |
|
101 |
state_map[(unsigned char)'"']= (unsigned char) MY_LEX_STRING_OR_DELIMITER; |
|
1
by brian
clean slate |
102 |
|
103 |
/*
|
|
104 |
Create a second map to make it faster to find identifiers
|
|
105 |
*/
|
|
106 |
for (i=0; i < 256 ; i++) |
|
107 |
{
|
|
481
by Brian Aker
Remove all of uchar. |
108 |
ident_map[i]= (unsigned char) (state_map[i] == MY_LEX_IDENT || |
1
by brian
clean slate |
109 |
state_map[i] == MY_LEX_NUMBER_IDENT); |
110 |
}
|
|
111 |
||
112 |
/* Special handling of hex and binary strings */
|
|
481
by Brian Aker
Remove all of uchar. |
113 |
state_map[(unsigned char)'x']= state_map[(unsigned char)'X']= (unsigned char) MY_LEX_IDENT_OR_HEX; |
114 |
state_map[(unsigned char)'b']= state_map[(unsigned char)'B']= (unsigned char) MY_LEX_IDENT_OR_BIN; |
|
1
by brian
clean slate |
115 |
return 0; |
116 |
}
|
|
117 |
||
118 |
||
119 |
#define MY_MAX_ALLOWED_BUF 1024*1024
|
|
120 |
#define MY_CHARSET_INDEX "Index.xml"
|
|
121 |
||
122 |
const char *charsets_dir= NULL; |
|
123 |
static int charset_initialized=0; |
|
124 |
||
125 |
||
126 |
char *get_charsets_dir(char *buf) |
|
127 |
{
|
|
128 |
const char *sharedir= SHAREDIR; |
|
129 |
char *res; |
|
130 |
||
131 |
if (charsets_dir != NULL) |
|
132 |
strmake(buf, charsets_dir, FN_REFLEN-1); |
|
133 |
else
|
|
134 |
{
|
|
135 |
if (test_if_hard_path(sharedir) || |
|
136 |
is_prefix(sharedir, DEFAULT_CHARSET_HOME)) |
|
461
by Monty Taylor
Removed NullS. bu-bye. |
137 |
strxmov(buf, sharedir, "/", CHARSET_DIR, NULL); |
1
by brian
clean slate |
138 |
else
|
139 |
strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR, |
|
461
by Monty Taylor
Removed NullS. bu-bye. |
140 |
NULL); |
1
by brian
clean slate |
141 |
}
|
461
by Monty Taylor
Removed NullS. bu-bye. |
142 |
res= convert_dirname(buf,buf,NULL); |
51.3.22
by Jay Pipes
Final round of removal of DBUG in mysys/, including Makefile |
143 |
return(res); |
1
by brian
clean slate |
144 |
}
|
145 |
||
146 |
CHARSET_INFO *all_charsets[256]; |
|
383.1.12
by Brian Aker
Much closer toward UTF8 being around all the time... |
147 |
const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci; |
1
by brian
clean slate |
148 |
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
149 |
void add_compiled_collation(CHARSET_INFO * cs) |
1
by brian
clean slate |
150 |
{
|
151 |
all_charsets[cs->number]= cs; |
|
152 |
cs->state|= MY_CS_AVAILABLE; |
|
153 |
}
|
|
154 |
||
155 |
static void *cs_alloc(size_t size) |
|
156 |
{
|
|
157 |
return my_once_alloc(size, MYF(MY_WME)); |
|
158 |
}
|
|
159 |
||
160 |
||
146
by Brian Aker
my_bool cleanup. |
161 |
static bool init_available_charsets(myf myflags) |
1
by brian
clean slate |
162 |
{
|
163 |
char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
|
163
by Brian Aker
Merge Monty's code. |
164 |
bool error=false; |
1
by brian
clean slate |
165 |
/*
|
166 |
We have to use charset_initialized to not lock on THR_LOCK_charset
|
|
167 |
inside get_internal_charset...
|
|
168 |
*/
|
|
169 |
if (!charset_initialized) |
|
170 |
{
|
|
171 |
CHARSET_INFO **cs; |
|
172 |
/*
|
|
173 |
To make things thread safe we are not allowing other threads to interfere
|
|
174 |
while we may changing the cs_info_table
|
|
175 |
*/
|
|
176 |
pthread_mutex_lock(&THR_LOCK_charset); |
|
177 |
if (!charset_initialized) |
|
178 |
{
|
|
212.6.1
by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file. |
179 |
memset(&all_charsets, 0, sizeof(all_charsets)); |
1
by brian
clean slate |
180 |
init_compiled_charsets(myflags); |
181 |
||
182 |
/* Copy compiled charsets */
|
|
183 |
for (cs=all_charsets; |
|
184 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
185 |
cs++) |
|
186 |
{
|
|
187 |
if (*cs) |
|
188 |
{
|
|
189 |
if (cs[0]->ctype) |
|
190 |
if (init_state_maps(*cs)) |
|
191 |
*cs= NULL; |
|
192 |
}
|
|
193 |
}
|
|
194 |
||
411.1.1
by Brian Aker
Work on removing GNU specific calls. |
195 |
my_stpcpy(get_charsets_dir(fname), MY_CHARSET_INDEX); |
1
by brian
clean slate |
196 |
charset_initialized=1; |
197 |
}
|
|
198 |
pthread_mutex_unlock(&THR_LOCK_charset); |
|
199 |
}
|
|
200 |
return error; |
|
201 |
}
|
|
202 |
||
203 |
||
204 |
void free_charsets(void) |
|
205 |
{
|
|
206 |
charset_initialized=0; |
|
207 |
}
|
|
208 |
||
209 |
||
482
by Brian Aker
Remove uint. |
210 |
uint32_t get_collation_number(const char *name) |
1
by brian
clean slate |
211 |
{
|
212 |
init_available_charsets(MYF(0)); |
|
213 |
return get_collation_number_internal(name); |
|
214 |
}
|
|
215 |
||
216 |
||
482
by Brian Aker
Remove uint. |
217 |
uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags) |
1
by brian
clean slate |
218 |
{
|
219 |
CHARSET_INFO **cs; |
|
220 |
init_available_charsets(MYF(0)); |
|
221 |
||
222 |
for (cs= all_charsets; |
|
223 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
224 |
cs++) |
|
225 |
{
|
|
226 |
if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && |
|
383.1.12
by Brian Aker
Much closer toward UTF8 being around all the time... |
227 |
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name)) |
1
by brian
clean slate |
228 |
return cs[0]->number; |
229 |
}
|
|
230 |
return 0; |
|
231 |
}
|
|
232 |
||
233 |
||
482
by Brian Aker
Remove uint. |
234 |
const char *get_charset_name(uint32_t charset_number) |
1
by brian
clean slate |
235 |
{
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
236 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
237 |
init_available_charsets(MYF(0)); |
238 |
||
239 |
cs=all_charsets[charset_number]; |
|
240 |
if (cs && (cs->number == charset_number) && cs->name ) |
|
241 |
return (char*) cs->name; |
|
242 |
||
243 |
return (char*) "?"; /* this mimics find_type() */ |
|
244 |
}
|
|
245 |
||
246 |
||
482
by Brian Aker
Remove uint. |
247 |
static const CHARSET_INFO *get_internal_charset(uint32_t cs_number) |
1
by brian
clean slate |
248 |
{
|
249 |
CHARSET_INFO *cs; |
|
250 |
/*
|
|
251 |
To make things thread safe we are not allowing other threads to interfere
|
|
252 |
while we may changing the cs_info_table
|
|
253 |
*/
|
|
254 |
pthread_mutex_lock(&THR_LOCK_charset); |
|
255 |
if ((cs= all_charsets[cs_number])) |
|
256 |
{
|
|
257 |
if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED)) |
|
258 |
{
|
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
259 |
assert(0); |
1
by brian
clean slate |
260 |
}
|
261 |
cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL; |
|
262 |
}
|
|
263 |
if (cs && !(cs->state & MY_CS_READY)) |
|
264 |
{
|
|
265 |
if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) || |
|
266 |
(cs->coll->init && cs->coll->init(cs, cs_alloc))) |
|
267 |
cs= NULL; |
|
268 |
else
|
|
269 |
cs->state|= MY_CS_READY; |
|
270 |
}
|
|
271 |
pthread_mutex_unlock(&THR_LOCK_charset); |
|
272 |
return cs; |
|
273 |
}
|
|
274 |
||
275 |
||
482
by Brian Aker
Remove uint. |
276 |
const const CHARSET_INFO *get_charset(uint32_t cs_number, myf flags) |
1
by brian
clean slate |
277 |
{
|
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
278 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
279 |
if (cs_number == default_charset_info->number) |
280 |
return default_charset_info; |
|
281 |
||
282 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
283 |
||
284 |
if (!cs_number || cs_number >= array_elements(all_charsets)-1) |
|
285 |
return NULL; |
|
286 |
||
383.1.7
by Brian Aker
Remove homebrew xml parser. |
287 |
cs= get_internal_charset(cs_number); |
1
by brian
clean slate |
288 |
|
289 |
if (!cs && (flags & MY_WME)) |
|
290 |
{
|
|
291 |
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23]; |
|
411.1.1
by Brian Aker
Work on removing GNU specific calls. |
292 |
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
1
by brian
clean slate |
293 |
cs_string[0]='#'; |
294 |
int10_to_str(cs_number, cs_string+1, 10); |
|
295 |
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); |
|
296 |
}
|
|
297 |
return cs; |
|
298 |
}
|
|
299 |
||
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
300 |
const CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) |
1
by brian
clean slate |
301 |
{
|
482
by Brian Aker
Remove uint. |
302 |
uint32_t cs_number; |
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
303 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
304 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
305 |
||
306 |
cs_number=get_collation_number(cs_name); |
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
307 |
cs= cs_number ? get_internal_charset(cs_number) : NULL; |
1
by brian
clean slate |
308 |
|
309 |
if (!cs && (flags & MY_WME)) |
|
310 |
{
|
|
311 |
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
|
411.1.1
by Brian Aker
Work on removing GNU specific calls. |
312 |
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
1
by brian
clean slate |
313 |
my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file); |
314 |
}
|
|
315 |
||
316 |
return cs; |
|
317 |
}
|
|
318 |
||
319 |
||
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
320 |
const CHARSET_INFO *get_charset_by_csname(const char *cs_name, |
482
by Brian Aker
Remove uint. |
321 |
uint32_t cs_flags, |
1
by brian
clean slate |
322 |
myf flags) |
323 |
{
|
|
482
by Brian Aker
Remove uint. |
324 |
uint32_t cs_number; |
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
325 |
const CHARSET_INFO *cs; |
1
by brian
clean slate |
326 |
|
327 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
328 |
||
329 |
cs_number= get_charset_number(cs_name, cs_flags); |
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
330 |
cs= cs_number ? get_internal_charset(cs_number) : NULL; |
1
by brian
clean slate |
331 |
|
332 |
if (!cs && (flags & MY_WME)) |
|
333 |
{
|
|
334 |
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
|
411.1.1
by Brian Aker
Work on removing GNU specific calls. |
335 |
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
1
by brian
clean slate |
336 |
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); |
337 |
}
|
|
338 |
||
51.3.22
by Jay Pipes
Final round of removal of DBUG in mysys/, including Makefile |
339 |
return(cs); |
1
by brian
clean slate |
340 |
}
|
341 |
||
342 |
||
343 |
/**
|
|
344 |
Resolve character set by the character set name (utf8, latin1, ...).
|
|
345 |
||
346 |
The function tries to resolve character set by the specified name. If
|
|
347 |
there is character set with the given name, it is assigned to the "cs"
|
|
163
by Brian Aker
Merge Monty's code. |
348 |
parameter and false is returned. If there is no such character set,
|
349 |
"default_cs" is assigned to the "cs" and true is returned.
|
|
1
by brian
clean slate |
350 |
|
351 |
@param[in] cs_name Character set name.
|
|
352 |
@param[in] default_cs Default character set.
|
|
353 |
@param[out] cs Variable to store character set.
|
|
354 |
||
163
by Brian Aker
Merge Monty's code. |
355 |
@return false if character set was resolved successfully; true if there
|
1
by brian
clean slate |
356 |
is no character set with given name.
|
357 |
*/
|
|
358 |
||
146
by Brian Aker
my_bool cleanup. |
359 |
bool resolve_charset(const char *cs_name, |
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
360 |
const CHARSET_INFO *default_cs, |
361 |
const CHARSET_INFO **cs) |
|
1
by brian
clean slate |
362 |
{
|
363 |
*cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0)); |
|
364 |
||
365 |
if (*cs == NULL) |
|
366 |
{
|
|
367 |
*cs= default_cs; |
|
163
by Brian Aker
Merge Monty's code. |
368 |
return true; |
1
by brian
clean slate |
369 |
}
|
370 |
||
163
by Brian Aker
Merge Monty's code. |
371 |
return false; |
1
by brian
clean slate |
372 |
}
|
373 |
||
374 |
||
375 |
/**
|
|
376 |
Resolve collation by the collation name (utf8_general_ci, ...).
|
|
377 |
||
378 |
The function tries to resolve collation by the specified name. If there
|
|
379 |
is collation with the given name, it is assigned to the "cl" parameter
|
|
163
by Brian Aker
Merge Monty's code. |
380 |
and false is returned. If there is no such collation, "default_cl" is
|
381 |
assigned to the "cl" and true is returned.
|
|
1
by brian
clean slate |
382 |
|
383 |
@param[out] cl Variable to store collation.
|
|
384 |
@param[in] cl_name Collation name.
|
|
385 |
@param[in] default_cl Default collation.
|
|
386 |
||
163
by Brian Aker
Merge Monty's code. |
387 |
@return false if collation was resolved successfully; true if there is no
|
1
by brian
clean slate |
388 |
collation with given name.
|
389 |
*/
|
|
390 |
||
146
by Brian Aker
my_bool cleanup. |
391 |
bool resolve_collation(const char *cl_name, |
264.2.6
by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code. |
392 |
const CHARSET_INFO *default_cl, |
393 |
const CHARSET_INFO **cl) |
|
1
by brian
clean slate |
394 |
{
|
395 |
*cl= get_charset_by_name(cl_name, MYF(0)); |
|
396 |
||
397 |
if (*cl == NULL) |
|
398 |
{
|
|
399 |
*cl= default_cl; |
|
163
by Brian Aker
Merge Monty's code. |
400 |
return true; |
1
by brian
clean slate |
401 |
}
|
402 |
||
163
by Brian Aker
Merge Monty's code. |
403 |
return false; |
1
by brian
clean slate |
404 |
}
|
405 |
||
406 |
||
407 |
#ifdef BACKSLASH_MBTAIL
|
|
408 |
static CHARSET_INFO *fs_cset_cache= NULL; |
|
409 |
||
410 |
CHARSET_INFO *fs_character_set() |
|
411 |
{
|
|
412 |
if (!fs_cset_cache) |
|
413 |
{
|
|
414 |
char buf[10]= "cp"; |
|
415 |
GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE, |
|
416 |
buf+2, sizeof(buf)-3); |
|
417 |
/*
|
|
418 |
We cannot call get_charset_by_name here
|
|
419 |
because fs_character_set() is executed before
|
|
420 |
LOCK_THD_charset mutex initialization, which
|
|
421 |
is used inside get_charset_by_name.
|
|
422 |
As we're now interested in cp932 only,
|
|
423 |
let's just detect it using strcmp().
|
|
424 |
*/
|
|
425 |
fs_cset_cache= !strcmp(buf, "cp932") ? |
|
426 |
&my_charset_cp932_japanese_ci : &my_charset_bin; |
|
427 |
}
|
|
428 |
return fs_cset_cache; |
|
429 |
}
|
|
430 |
#endif
|
|
431 |
||
432 |
/*
|
|
433 |
Escape apostrophes by doubling them up
|
|
434 |
||
435 |
SYNOPSIS
|
|
236.3.4
by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle |
436 |
escape_quotes_for_drizzle()
|
1
by brian
clean slate |
437 |
charset_info Charset of the strings
|
438 |
to Buffer for escaped string
|
|
439 |
to_length Length of destination buffer, or 0
|
|
440 |
from The string to escape
|
|
441 |
length The length of the string to escape
|
|
442 |
||
443 |
DESCRIPTION
|
|
444 |
This escapes the contents of a string by doubling up any apostrophes that
|
|
445 |
it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
|
|
446 |
effect on the server.
|
|
447 |
||
448 |
NOTE
|
|
449 |
To be consistent with escape_string_for_mysql(), to_length may be 0 to
|
|
450 |
mean "big enough"
|
|
451 |
||
452 |
RETURN VALUES
|
|
365.2.9
by Monty Taylor
Got rid of all instances of ~0 |
453 |
UINT32_MAX The escaped string did not fit in the to buffer
|
1
by brian
clean slate |
454 |
>=0 The length of the escaped string
|
455 |
*/
|
|
456 |
||
236.3.9
by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets) |
457 |
size_t escape_quotes_for_drizzle(const CHARSET_INFO *charset_info, |
236.3.4
by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle |
458 |
char *to, size_t to_length, |
459 |
const char *from, size_t length) |
|
1
by brian
clean slate |
460 |
{
|
461 |
const char *to_start= to; |
|
462 |
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
|
163
by Brian Aker
Merge Monty's code. |
463 |
bool overflow= false; |
1
by brian
clean slate |
464 |
#ifdef USE_MB
|
146
by Brian Aker
my_bool cleanup. |
465 |
bool use_mb_flag= use_mb(charset_info); |
1
by brian
clean slate |
466 |
#endif
|
467 |
for (end= from + length; from < end; from++) |
|
468 |
{
|
|
469 |
#ifdef USE_MB
|
|
470 |
int tmp_length; |
|
471 |
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) |
|
472 |
{
|
|
473 |
if (to + tmp_length > to_end) |
|
474 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
475 |
overflow= true; |
1
by brian
clean slate |
476 |
break; |
477 |
}
|
|
478 |
while (tmp_length--) |
|
479 |
*to++= *from++; |
|
480 |
from--; |
|
481 |
continue; |
|
482 |
}
|
|
483 |
/*
|
|
484 |
We don't have the same issue here with a non-multi-byte character being
|
|
485 |
turned into a multi-byte character by the addition of an escaping
|
|
486 |
character, because we are only escaping the ' character with itself.
|
|
487 |
*/
|
|
488 |
#endif
|
|
489 |
if (*from == '\'') |
|
490 |
{
|
|
491 |
if (to + 2 > to_end) |
|
492 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
493 |
overflow= true; |
1
by brian
clean slate |
494 |
break; |
495 |
}
|
|
496 |
*to++= '\''; |
|
497 |
*to++= '\''; |
|
498 |
}
|
|
499 |
else
|
|
500 |
{
|
|
501 |
if (to + 1 > to_end) |
|
502 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
503 |
overflow= true; |
1
by brian
clean slate |
504 |
break; |
505 |
}
|
|
506 |
*to++= *from; |
|
507 |
}
|
|
508 |
}
|
|
509 |
*to= 0; |
|
365.2.9
by Monty Taylor
Got rid of all instances of ~0 |
510 |
return overflow ? UINT32_MAX : (uint32_t) (to - to_start); |
1
by brian
clean slate |
511 |
}
|