1
by brian
clean slate |
1 |
/* Copyright (C) 2000 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
14 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
15 |
||
16 |
#include "mysys_priv.h" |
|
17 |
#include "mysys_err.h" |
|
18 |
#include <m_ctype.h> |
|
19 |
#include <m_string.h> |
|
20 |
#include <my_dir.h> |
|
21 |
#include <my_xml.h> |
|
22 |
||
23 |
||
24 |
/*
|
|
25 |
The code below implements this functionality:
|
|
26 |
|
|
27 |
- Initializing charset related structures
|
|
28 |
- Loading dynamic charsets
|
|
29 |
- Searching for a proper CHARSET_INFO
|
|
30 |
using charset name, collation name or collation ID
|
|
31 |
- Setting server default character set
|
|
32 |
*/
|
|
33 |
||
34 |
my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2) |
|
35 |
{
|
|
36 |
return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname)); |
|
37 |
}
|
|
38 |
||
39 |
||
40 |
static uint |
|
41 |
get_collation_number_internal(const char *name) |
|
42 |
{
|
|
43 |
CHARSET_INFO **cs; |
|
44 |
for (cs= all_charsets; |
|
45 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
46 |
cs++) |
|
47 |
{
|
|
48 |
if ( cs[0] && cs[0]->name && |
|
49 |
!my_strcasecmp(&my_charset_latin1, cs[0]->name, name)) |
|
50 |
return cs[0]->number; |
|
51 |
}
|
|
52 |
return 0; |
|
53 |
}
|
|
54 |
||
55 |
||
56 |
static my_bool init_state_maps(CHARSET_INFO *cs) |
|
57 |
{
|
|
58 |
uint i; |
|
59 |
uchar *state_map; |
|
60 |
uchar *ident_map; |
|
61 |
||
62 |
if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME)))) |
|
63 |
return 1; |
|
64 |
||
65 |
if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME)))) |
|
66 |
return 1; |
|
67 |
||
68 |
state_map= cs->state_map; |
|
69 |
ident_map= cs->ident_map; |
|
70 |
||
71 |
/* Fill state_map with states to get a faster parser */
|
|
72 |
for (i=0; i < 256 ; i++) |
|
73 |
{
|
|
74 |
if (my_isalpha(cs,i)) |
|
75 |
state_map[i]=(uchar) MY_LEX_IDENT; |
|
76 |
else if (my_isdigit(cs,i)) |
|
77 |
state_map[i]=(uchar) MY_LEX_NUMBER_IDENT; |
|
78 |
#if defined(USE_MB) && defined(USE_MB_IDENT)
|
|
79 |
else if (my_mbcharlen(cs, i)>1) |
|
80 |
state_map[i]=(uchar) MY_LEX_IDENT; |
|
81 |
#endif
|
|
82 |
else if (my_isspace(cs,i)) |
|
83 |
state_map[i]=(uchar) MY_LEX_SKIP; |
|
84 |
else
|
|
85 |
state_map[i]=(uchar) MY_LEX_CHAR; |
|
86 |
}
|
|
87 |
state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT; |
|
88 |
state_map[(uchar)'\'']=(uchar) MY_LEX_STRING; |
|
89 |
state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT; |
|
90 |
state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP; |
|
91 |
state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP; |
|
92 |
state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL; |
|
93 |
state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT; |
|
94 |
state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON; |
|
95 |
state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR; |
|
96 |
state_map[0]=(uchar) MY_LEX_EOL; |
|
97 |
state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE; |
|
98 |
state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT; |
|
99 |
state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT; |
|
100 |
state_map[(uchar)'@']= (uchar) MY_LEX_USER_END; |
|
101 |
state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER; |
|
102 |
state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER; |
|
103 |
||
104 |
/*
|
|
105 |
Create a second map to make it faster to find identifiers
|
|
106 |
*/
|
|
107 |
for (i=0; i < 256 ; i++) |
|
108 |
{
|
|
109 |
ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT || |
|
110 |
state_map[i] == MY_LEX_NUMBER_IDENT); |
|
111 |
}
|
|
112 |
||
113 |
/* Special handling of hex and binary strings */
|
|
114 |
state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX; |
|
115 |
state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN; |
|
116 |
state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR; |
|
117 |
return 0; |
|
118 |
}
|
|
119 |
||
120 |
||
121 |
static void simple_cs_init_functions(CHARSET_INFO *cs) |
|
122 |
{
|
|
123 |
if (cs->state & MY_CS_BINSORT) |
|
124 |
cs->coll= &my_collation_8bit_bin_handler; |
|
125 |
else
|
|
126 |
cs->coll= &my_collation_8bit_simple_ci_handler; |
|
127 |
||
128 |
cs->cset= &my_charset_8bit_handler; |
|
129 |
}
|
|
130 |
||
131 |
||
132 |
||
133 |
static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) |
|
134 |
{
|
|
135 |
to->number= from->number ? from->number : to->number; |
|
136 |
||
137 |
if (from->csname) |
|
138 |
if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME)))) |
|
139 |
goto err; |
|
140 |
||
141 |
if (from->name) |
|
142 |
if (!(to->name= my_once_strdup(from->name,MYF(MY_WME)))) |
|
143 |
goto err; |
|
144 |
||
145 |
if (from->comment) |
|
146 |
if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME)))) |
|
147 |
goto err; |
|
148 |
||
149 |
if (from->ctype) |
|
150 |
{
|
|
151 |
if (!(to->ctype= (uchar*) my_once_memdup((char*) from->ctype, |
|
152 |
MY_CS_CTYPE_TABLE_SIZE, |
|
153 |
MYF(MY_WME)))) |
|
154 |
goto err; |
|
155 |
if (init_state_maps(to)) |
|
156 |
goto err; |
|
157 |
}
|
|
158 |
if (from->to_lower) |
|
159 |
if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower, |
|
160 |
MY_CS_TO_LOWER_TABLE_SIZE, |
|
161 |
MYF(MY_WME)))) |
|
162 |
goto err; |
|
163 |
||
164 |
if (from->to_upper) |
|
165 |
if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper, |
|
166 |
MY_CS_TO_UPPER_TABLE_SIZE, |
|
167 |
MYF(MY_WME)))) |
|
168 |
goto err; |
|
169 |
if (from->sort_order) |
|
170 |
{
|
|
171 |
if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order, |
|
172 |
MY_CS_SORT_ORDER_TABLE_SIZE, |
|
173 |
MYF(MY_WME)))) |
|
174 |
goto err; |
|
175 |
||
176 |
}
|
|
177 |
if (from->tab_to_uni) |
|
178 |
{
|
|
179 |
uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16); |
|
180 |
if (!(to->tab_to_uni= (uint16*) my_once_memdup((char*)from->tab_to_uni, |
|
181 |
sz, MYF(MY_WME)))) |
|
182 |
goto err; |
|
183 |
}
|
|
184 |
if (from->tailoring) |
|
185 |
if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME)))) |
|
186 |
goto err; |
|
187 |
||
188 |
return 0; |
|
189 |
||
190 |
err: |
|
191 |
return 1; |
|
192 |
}
|
|
193 |
||
194 |
||
195 |
||
196 |
static my_bool simple_cs_is_full(CHARSET_INFO *cs) |
|
197 |
{
|
|
198 |
return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper && |
|
199 |
cs->to_lower) && |
|
200 |
(cs->number && cs->name && |
|
201 |
(cs->sort_order || (cs->state & MY_CS_BINSORT) ))); |
|
202 |
}
|
|
203 |
||
204 |
||
205 |
static void |
|
206 |
copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from) |
|
207 |
{
|
|
208 |
to->cset= from->cset; |
|
209 |
to->coll= from->coll; |
|
210 |
to->strxfrm_multiply= from->strxfrm_multiply; |
|
211 |
to->min_sort_char= from->min_sort_char; |
|
212 |
to->max_sort_char= from->max_sort_char; |
|
213 |
to->mbminlen= from->mbminlen; |
|
214 |
to->mbmaxlen= from->mbmaxlen; |
|
215 |
}
|
|
216 |
||
217 |
||
218 |
static int add_collation(CHARSET_INFO *cs) |
|
219 |
{
|
|
220 |
if (cs->name && (cs->number || |
|
221 |
(cs->number=get_collation_number_internal(cs->name)))) |
|
222 |
{
|
|
223 |
if (!all_charsets[cs->number]) |
|
224 |
{
|
|
225 |
if (!(all_charsets[cs->number]= |
|
226 |
(CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0)))) |
|
227 |
return MY_XML_ERROR; |
|
228 |
bzero((void*)all_charsets[cs->number],sizeof(CHARSET_INFO)); |
|
229 |
}
|
|
230 |
||
231 |
if (cs->primary_number == cs->number) |
|
232 |
cs->state |= MY_CS_PRIMARY; |
|
233 |
||
234 |
if (cs->binary_number == cs->number) |
|
235 |
cs->state |= MY_CS_BINSORT; |
|
236 |
||
237 |
all_charsets[cs->number]->state|= cs->state; |
|
238 |
||
239 |
if (!(all_charsets[cs->number]->state & MY_CS_COMPILED)) |
|
240 |
{
|
|
241 |
CHARSET_INFO *newcs= all_charsets[cs->number]; |
|
242 |
if (cs_copy_data(all_charsets[cs->number],cs)) |
|
243 |
return MY_XML_ERROR; |
|
244 |
||
245 |
newcs->levels_for_compare= 1; |
|
246 |
newcs->levels_for_order= 1; |
|
247 |
||
248 |
if (!strcmp(cs->csname,"ucs2") ) |
|
249 |
{
|
|
250 |
#if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
|
|
251 |
copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci); |
|
252 |
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; |
|
253 |
#endif
|
|
254 |
}
|
|
255 |
else if (!strcmp(cs->csname, "utf8")) |
|
256 |
{
|
|
257 |
#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)
|
|
258 |
copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci); |
|
259 |
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED; |
|
260 |
#endif
|
|
261 |
}
|
|
262 |
else if (!strcmp(cs->csname, "utf8mb3")) |
|
263 |
{
|
|
264 |
#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)
|
|
265 |
copy_uca_collation(newcs, &my_charset_utf8mb3_unicode_ci); |
|
266 |
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED; |
|
267 |
#endif
|
|
268 |
}
|
|
269 |
else if (!strcmp(cs->csname, "utf16")) |
|
270 |
{
|
|
271 |
#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
|
|
272 |
copy_uca_collation(newcs, &my_charset_utf16_unicode_ci); |
|
273 |
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; |
|
274 |
#endif
|
|
275 |
}
|
|
276 |
else if (!strcmp(cs->csname, "utf32")) |
|
277 |
{
|
|
278 |
#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
|
|
279 |
copy_uca_collation(newcs, &my_charset_utf32_unicode_ci); |
|
280 |
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; |
|
281 |
#endif
|
|
282 |
}
|
|
283 |
else
|
|
284 |
{
|
|
285 |
uchar *sort_order= all_charsets[cs->number]->sort_order; |
|
286 |
simple_cs_init_functions(all_charsets[cs->number]); |
|
287 |
newcs->mbminlen= 1; |
|
288 |
newcs->mbmaxlen= 1; |
|
289 |
if (simple_cs_is_full(all_charsets[cs->number])) |
|
290 |
{
|
|
291 |
all_charsets[cs->number]->state |= MY_CS_LOADED; |
|
292 |
}
|
|
293 |
all_charsets[cs->number]->state|= MY_CS_AVAILABLE; |
|
294 |
||
295 |
/*
|
|
296 |
Check if case sensitive sort order: A < a < B.
|
|
297 |
We need MY_CS_FLAG for regex library, and for
|
|
298 |
case sensitivity flag for 5.0 client protocol,
|
|
299 |
to support isCaseSensitive() method in JDBC driver
|
|
300 |
*/
|
|
301 |
if (sort_order && sort_order['A'] < sort_order['a'] && |
|
302 |
sort_order['a'] < sort_order['B']) |
|
303 |
all_charsets[cs->number]->state|= MY_CS_CSSORT; |
|
304 |
||
305 |
if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number])) |
|
306 |
all_charsets[cs->number]->state|= MY_CS_PUREASCII; |
|
307 |
if (!my_charset_is_ascii_compatible(cs)) |
|
308 |
all_charsets[cs->number]->state|= MY_CS_NONASCII; |
|
309 |
}
|
|
310 |
}
|
|
311 |
else
|
|
312 |
{
|
|
313 |
/*
|
|
314 |
We need the below to make get_charset_name()
|
|
315 |
and get_charset_number() working even if a
|
|
316 |
character set has not been really incompiled.
|
|
317 |
The above functions are used for example
|
|
318 |
in error message compiler extra/comp_err.c.
|
|
319 |
If a character set was compiled, this information
|
|
320 |
will get lost and overwritten in add_compiled_collation().
|
|
321 |
*/
|
|
322 |
CHARSET_INFO *dst= all_charsets[cs->number]; |
|
323 |
dst->number= cs->number; |
|
324 |
if (cs->comment) |
|
325 |
if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME)))) |
|
326 |
return MY_XML_ERROR; |
|
327 |
if (cs->csname && !dst->csname) |
|
328 |
if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME)))) |
|
329 |
return MY_XML_ERROR; |
|
330 |
if (cs->name && !dst->name) |
|
331 |
if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME)))) |
|
332 |
return MY_XML_ERROR; |
|
333 |
}
|
|
334 |
cs->number= 0; |
|
335 |
cs->primary_number= 0; |
|
336 |
cs->binary_number= 0; |
|
337 |
cs->name= NULL; |
|
338 |
cs->state= 0; |
|
339 |
cs->sort_order= NULL; |
|
340 |
cs->state= 0; |
|
341 |
}
|
|
342 |
return MY_XML_OK; |
|
343 |
}
|
|
344 |
||
345 |
||
346 |
#define MY_MAX_ALLOWED_BUF 1024*1024
|
|
347 |
#define MY_CHARSET_INDEX "Index.xml"
|
|
348 |
||
349 |
const char *charsets_dir= NULL; |
|
350 |
static int charset_initialized=0; |
|
351 |
||
352 |
||
353 |
static my_bool my_read_charset_file(const char *filename, myf myflags) |
|
354 |
{
|
|
355 |
uchar *buf; |
|
356 |
int fd; |
|
357 |
uint len, tmp_len; |
|
15
by brian
Fix for stat, NETWARE removal |
358 |
struct stat stat_info; |
1
by brian
clean slate |
359 |
|
15
by brian
Fix for stat, NETWARE removal |
360 |
if (stat(filename, &stat_info) || |
1
by brian
clean slate |
361 |
((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) || |
362 |
!(buf= (uchar*) my_malloc(len,myflags))) |
|
363 |
return TRUE; |
|
364 |
||
365 |
if ((fd=my_open(filename,O_RDONLY,myflags)) < 0) |
|
366 |
goto error; |
|
367 |
tmp_len=my_read(fd, buf, len, myflags); |
|
368 |
my_close(fd,myflags); |
|
369 |
if (tmp_len != len) |
|
370 |
goto error; |
|
371 |
||
372 |
if (my_parse_charset_xml((char*) buf,len,add_collation)) |
|
373 |
{
|
|
374 |
#ifdef NOT_YET
|
|
375 |
printf("ERROR at line %d pos %d '%s'\n", |
|
376 |
my_xml_error_lineno(&p)+1, |
|
377 |
my_xml_error_pos(&p), |
|
378 |
my_xml_error_string(&p)); |
|
379 |
#endif
|
|
380 |
}
|
|
381 |
||
382 |
my_free(buf, myflags); |
|
383 |
return FALSE; |
|
384 |
||
385 |
error: |
|
386 |
my_free(buf, myflags); |
|
387 |
return TRUE; |
|
388 |
}
|
|
389 |
||
390 |
||
391 |
char *get_charsets_dir(char *buf) |
|
392 |
{
|
|
393 |
const char *sharedir= SHAREDIR; |
|
394 |
char *res; |
|
395 |
DBUG_ENTER("get_charsets_dir"); |
|
396 |
||
397 |
if (charsets_dir != NULL) |
|
398 |
strmake(buf, charsets_dir, FN_REFLEN-1); |
|
399 |
else
|
|
400 |
{
|
|
401 |
if (test_if_hard_path(sharedir) || |
|
402 |
is_prefix(sharedir, DEFAULT_CHARSET_HOME)) |
|
403 |
strxmov(buf, sharedir, "/", CHARSET_DIR, NullS); |
|
404 |
else
|
|
405 |
strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR, |
|
406 |
NullS); |
|
407 |
}
|
|
408 |
res= convert_dirname(buf,buf,NullS); |
|
409 |
DBUG_PRINT("info",("charsets dir: '%s'", buf)); |
|
410 |
DBUG_RETURN(res); |
|
411 |
}
|
|
412 |
||
413 |
CHARSET_INFO *all_charsets[256]; |
|
414 |
CHARSET_INFO *default_charset_info = &my_charset_latin1; |
|
415 |
||
416 |
void add_compiled_collation(CHARSET_INFO *cs) |
|
417 |
{
|
|
418 |
all_charsets[cs->number]= cs; |
|
419 |
cs->state|= MY_CS_AVAILABLE; |
|
420 |
}
|
|
421 |
||
422 |
static void *cs_alloc(size_t size) |
|
423 |
{
|
|
424 |
return my_once_alloc(size, MYF(MY_WME)); |
|
425 |
}
|
|
426 |
||
427 |
||
428 |
static my_bool init_available_charsets(myf myflags) |
|
429 |
{
|
|
430 |
char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
|
431 |
my_bool error=FALSE; |
|
432 |
/*
|
|
433 |
We have to use charset_initialized to not lock on THR_LOCK_charset
|
|
434 |
inside get_internal_charset...
|
|
435 |
*/
|
|
436 |
if (!charset_initialized) |
|
437 |
{
|
|
438 |
CHARSET_INFO **cs; |
|
439 |
/*
|
|
440 |
To make things thread safe we are not allowing other threads to interfere
|
|
441 |
while we may changing the cs_info_table
|
|
442 |
*/
|
|
443 |
pthread_mutex_lock(&THR_LOCK_charset); |
|
444 |
if (!charset_initialized) |
|
445 |
{
|
|
446 |
bzero(&all_charsets,sizeof(all_charsets)); |
|
447 |
init_compiled_charsets(myflags); |
|
448 |
||
449 |
/* Copy compiled charsets */
|
|
450 |
for (cs=all_charsets; |
|
451 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
452 |
cs++) |
|
453 |
{
|
|
454 |
if (*cs) |
|
455 |
{
|
|
456 |
if (cs[0]->ctype) |
|
457 |
if (init_state_maps(*cs)) |
|
458 |
*cs= NULL; |
|
459 |
}
|
|
460 |
}
|
|
461 |
||
462 |
strmov(get_charsets_dir(fname), MY_CHARSET_INDEX); |
|
463 |
error= my_read_charset_file(fname,myflags); |
|
464 |
charset_initialized=1; |
|
465 |
}
|
|
466 |
pthread_mutex_unlock(&THR_LOCK_charset); |
|
467 |
}
|
|
468 |
return error; |
|
469 |
}
|
|
470 |
||
471 |
||
472 |
void free_charsets(void) |
|
473 |
{
|
|
474 |
charset_initialized=0; |
|
475 |
}
|
|
476 |
||
477 |
||
478 |
uint get_collation_number(const char *name) |
|
479 |
{
|
|
480 |
init_available_charsets(MYF(0)); |
|
481 |
return get_collation_number_internal(name); |
|
482 |
}
|
|
483 |
||
484 |
||
485 |
uint get_charset_number(const char *charset_name, uint cs_flags) |
|
486 |
{
|
|
487 |
CHARSET_INFO **cs; |
|
488 |
init_available_charsets(MYF(0)); |
|
489 |
||
490 |
for (cs= all_charsets; |
|
491 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
492 |
cs++) |
|
493 |
{
|
|
494 |
if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && |
|
495 |
!my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name)) |
|
496 |
return cs[0]->number; |
|
497 |
}
|
|
498 |
return 0; |
|
499 |
}
|
|
500 |
||
501 |
||
502 |
const char *get_charset_name(uint charset_number) |
|
503 |
{
|
|
504 |
CHARSET_INFO *cs; |
|
505 |
init_available_charsets(MYF(0)); |
|
506 |
||
507 |
cs=all_charsets[charset_number]; |
|
508 |
if (cs && (cs->number == charset_number) && cs->name ) |
|
509 |
return (char*) cs->name; |
|
510 |
||
511 |
return (char*) "?"; /* this mimics find_type() */ |
|
512 |
}
|
|
513 |
||
514 |
||
515 |
static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags) |
|
516 |
{
|
|
517 |
char buf[FN_REFLEN]; |
|
518 |
CHARSET_INFO *cs; |
|
519 |
/*
|
|
520 |
To make things thread safe we are not allowing other threads to interfere
|
|
521 |
while we may changing the cs_info_table
|
|
522 |
*/
|
|
523 |
pthread_mutex_lock(&THR_LOCK_charset); |
|
524 |
if ((cs= all_charsets[cs_number])) |
|
525 |
{
|
|
526 |
if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED)) |
|
527 |
{
|
|
528 |
strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS); |
|
529 |
my_read_charset_file(buf,flags); |
|
530 |
}
|
|
531 |
cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL; |
|
532 |
}
|
|
533 |
if (cs && !(cs->state & MY_CS_READY)) |
|
534 |
{
|
|
535 |
if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) || |
|
536 |
(cs->coll->init && cs->coll->init(cs, cs_alloc))) |
|
537 |
cs= NULL; |
|
538 |
else
|
|
539 |
cs->state|= MY_CS_READY; |
|
540 |
}
|
|
541 |
pthread_mutex_unlock(&THR_LOCK_charset); |
|
542 |
return cs; |
|
543 |
}
|
|
544 |
||
545 |
||
546 |
CHARSET_INFO *get_charset(uint cs_number, myf flags) |
|
547 |
{
|
|
548 |
CHARSET_INFO *cs; |
|
549 |
if (cs_number == default_charset_info->number) |
|
550 |
return default_charset_info; |
|
551 |
||
552 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
553 |
||
554 |
if (!cs_number || cs_number >= array_elements(all_charsets)-1) |
|
555 |
return NULL; |
|
556 |
||
557 |
cs=get_internal_charset(cs_number, flags); |
|
558 |
||
559 |
if (!cs && (flags & MY_WME)) |
|
560 |
{
|
|
561 |
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23]; |
|
562 |
strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
|
563 |
cs_string[0]='#'; |
|
564 |
int10_to_str(cs_number, cs_string+1, 10); |
|
565 |
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); |
|
566 |
}
|
|
567 |
return cs; |
|
568 |
}
|
|
569 |
||
570 |
CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) |
|
571 |
{
|
|
572 |
uint cs_number; |
|
573 |
CHARSET_INFO *cs; |
|
574 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
575 |
||
576 |
cs_number=get_collation_number(cs_name); |
|
577 |
cs= cs_number ? get_internal_charset(cs_number,flags) : NULL; |
|
578 |
||
579 |
if (!cs && (flags & MY_WME)) |
|
580 |
{
|
|
581 |
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
|
582 |
strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
|
583 |
my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file); |
|
584 |
}
|
|
585 |
||
586 |
return cs; |
|
587 |
}
|
|
588 |
||
589 |
||
590 |
CHARSET_INFO *get_charset_by_csname(const char *cs_name, |
|
591 |
uint cs_flags, |
|
592 |
myf flags) |
|
593 |
{
|
|
594 |
uint cs_number; |
|
595 |
CHARSET_INFO *cs; |
|
596 |
DBUG_ENTER("get_charset_by_csname"); |
|
597 |
DBUG_PRINT("enter",("name: '%s'", cs_name)); |
|
598 |
||
599 |
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */ |
|
600 |
||
601 |
cs_number= get_charset_number(cs_name, cs_flags); |
|
602 |
cs= cs_number ? get_internal_charset(cs_number, flags) : NULL; |
|
603 |
||
604 |
if (!cs && (flags & MY_WME)) |
|
605 |
{
|
|
606 |
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
|
607 |
strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
|
608 |
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); |
|
609 |
}
|
|
610 |
||
611 |
DBUG_RETURN(cs); |
|
612 |
}
|
|
613 |
||
614 |
||
615 |
/**
|
|
616 |
Resolve character set by the character set name (utf8, latin1, ...).
|
|
617 |
||
618 |
The function tries to resolve character set by the specified name. If
|
|
619 |
there is character set with the given name, it is assigned to the "cs"
|
|
620 |
parameter and FALSE is returned. If there is no such character set,
|
|
621 |
"default_cs" is assigned to the "cs" and TRUE is returned.
|
|
622 |
||
623 |
@param[in] cs_name Character set name.
|
|
624 |
@param[in] default_cs Default character set.
|
|
625 |
@param[out] cs Variable to store character set.
|
|
626 |
||
627 |
@return FALSE if character set was resolved successfully; TRUE if there
|
|
628 |
is no character set with given name.
|
|
629 |
*/
|
|
630 |
||
631 |
my_bool resolve_charset(const char *cs_name, |
|
632 |
CHARSET_INFO *default_cs, |
|
633 |
CHARSET_INFO **cs) |
|
634 |
{
|
|
635 |
*cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0)); |
|
636 |
||
637 |
if (*cs == NULL) |
|
638 |
{
|
|
639 |
*cs= default_cs; |
|
640 |
return TRUE; |
|
641 |
}
|
|
642 |
||
643 |
return FALSE; |
|
644 |
}
|
|
645 |
||
646 |
||
647 |
/**
|
|
648 |
Resolve collation by the collation name (utf8_general_ci, ...).
|
|
649 |
||
650 |
The function tries to resolve collation by the specified name. If there
|
|
651 |
is collation with the given name, it is assigned to the "cl" parameter
|
|
652 |
and FALSE is returned. If there is no such collation, "default_cl" is
|
|
653 |
assigned to the "cl" and TRUE is returned.
|
|
654 |
||
655 |
@param[out] cl Variable to store collation.
|
|
656 |
@param[in] cl_name Collation name.
|
|
657 |
@param[in] default_cl Default collation.
|
|
658 |
||
659 |
@return FALSE if collation was resolved successfully; TRUE if there is no
|
|
660 |
collation with given name.
|
|
661 |
*/
|
|
662 |
||
663 |
my_bool resolve_collation(const char *cl_name, |
|
664 |
CHARSET_INFO *default_cl, |
|
665 |
CHARSET_INFO **cl) |
|
666 |
{
|
|
667 |
*cl= get_charset_by_name(cl_name, MYF(0)); |
|
668 |
||
669 |
if (*cl == NULL) |
|
670 |
{
|
|
671 |
*cl= default_cl; |
|
672 |
return TRUE; |
|
673 |
}
|
|
674 |
||
675 |
return FALSE; |
|
676 |
}
|
|
677 |
||
678 |
||
679 |
/*
|
|
680 |
Escape string with backslashes (\)
|
|
681 |
||
682 |
SYNOPSIS
|
|
683 |
escape_string_for_mysql()
|
|
684 |
charset_info Charset of the strings
|
|
685 |
to Buffer for escaped string
|
|
686 |
to_length Length of destination buffer, or 0
|
|
687 |
from The string to escape
|
|
688 |
length The length of the string to escape
|
|
689 |
||
690 |
DESCRIPTION
|
|
691 |
This escapes the contents of a string by adding backslashes before special
|
|
692 |
characters, and turning others into specific escape sequences, such as
|
|
693 |
turning newlines into \n and null bytes into \0.
|
|
694 |
||
695 |
NOTE
|
|
696 |
To maintain compatibility with the old C API, to_length may be 0 to mean
|
|
697 |
"big enough"
|
|
698 |
||
699 |
RETURN VALUES
|
|
700 |
(size_t) -1 The escaped string did not fit in the to buffer
|
|
701 |
# The length of the escaped string
|
|
702 |
*/
|
|
703 |
||
704 |
size_t escape_string_for_mysql(CHARSET_INFO *charset_info, |
|
705 |
char *to, size_t to_length, |
|
706 |
const char *from, size_t length) |
|
707 |
{
|
|
708 |
const char *to_start= to; |
|
709 |
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
|
710 |
my_bool overflow= FALSE; |
|
711 |
#ifdef USE_MB
|
|
712 |
my_bool use_mb_flag= use_mb(charset_info); |
|
713 |
#endif
|
|
714 |
for (end= from + length; from < end; from++) |
|
715 |
{
|
|
716 |
char escape= 0; |
|
717 |
#ifdef USE_MB
|
|
718 |
int tmp_length; |
|
719 |
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) |
|
720 |
{
|
|
721 |
if (to + tmp_length > to_end) |
|
722 |
{
|
|
723 |
overflow= TRUE; |
|
724 |
break; |
|
725 |
}
|
|
726 |
while (tmp_length--) |
|
727 |
*to++= *from++; |
|
728 |
from--; |
|
729 |
continue; |
|
730 |
}
|
|
731 |
/*
|
|
732 |
If the next character appears to begin a multi-byte character, we
|
|
733 |
escape that first byte of that apparent multi-byte character. (The
|
|
734 |
character just looks like a multi-byte character -- if it were actually
|
|
735 |
a multi-byte character, it would have been passed through in the test
|
|
736 |
above.)
|
|
737 |
||
738 |
Without this check, we can create a problem by converting an invalid
|
|
739 |
multi-byte character into a valid one. For example, 0xbf27 is not
|
|
740 |
a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
|
|
741 |
*/
|
|
742 |
if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1) |
|
743 |
escape= *from; |
|
744 |
else
|
|
745 |
#endif
|
|
746 |
switch (*from) { |
|
747 |
case 0: /* Must be escaped for 'mysql' */ |
|
748 |
escape= '0'; |
|
749 |
break; |
|
750 |
case '\n': /* Must be escaped for logs */ |
|
751 |
escape= 'n'; |
|
752 |
break; |
|
753 |
case '\r': |
|
754 |
escape= 'r'; |
|
755 |
break; |
|
756 |
case '\\': |
|
757 |
escape= '\\'; |
|
758 |
break; |
|
759 |
case '\'': |
|
760 |
escape= '\''; |
|
761 |
break; |
|
762 |
case '"': /* Better safe than sorry */ |
|
763 |
escape= '"'; |
|
764 |
break; |
|
765 |
case '\032': /* This gives problems on Win32 */ |
|
766 |
escape= 'Z'; |
|
767 |
break; |
|
768 |
}
|
|
769 |
if (escape) |
|
770 |
{
|
|
771 |
if (to + 2 > to_end) |
|
772 |
{
|
|
773 |
overflow= TRUE; |
|
774 |
break; |
|
775 |
}
|
|
776 |
*to++= '\\'; |
|
777 |
*to++= escape; |
|
778 |
}
|
|
779 |
else
|
|
780 |
{
|
|
781 |
if (to + 1 > to_end) |
|
782 |
{
|
|
783 |
overflow= TRUE; |
|
784 |
break; |
|
785 |
}
|
|
786 |
*to++= *from; |
|
787 |
}
|
|
788 |
}
|
|
789 |
*to= 0; |
|
790 |
return overflow ? (size_t) -1 : (size_t) (to - to_start); |
|
791 |
}
|
|
792 |
||
793 |
||
794 |
#ifdef BACKSLASH_MBTAIL
|
|
795 |
static CHARSET_INFO *fs_cset_cache= NULL; |
|
796 |
||
797 |
CHARSET_INFO *fs_character_set() |
|
798 |
{
|
|
799 |
if (!fs_cset_cache) |
|
800 |
{
|
|
801 |
char buf[10]= "cp"; |
|
802 |
GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE, |
|
803 |
buf+2, sizeof(buf)-3); |
|
804 |
/*
|
|
805 |
We cannot call get_charset_by_name here
|
|
806 |
because fs_character_set() is executed before
|
|
807 |
LOCK_THD_charset mutex initialization, which
|
|
808 |
is used inside get_charset_by_name.
|
|
809 |
As we're now interested in cp932 only,
|
|
810 |
let's just detect it using strcmp().
|
|
811 |
*/
|
|
812 |
fs_cset_cache= !strcmp(buf, "cp932") ? |
|
813 |
&my_charset_cp932_japanese_ci : &my_charset_bin; |
|
814 |
}
|
|
815 |
return fs_cset_cache; |
|
816 |
}
|
|
817 |
#endif
|
|
818 |
||
819 |
/*
|
|
820 |
Escape apostrophes by doubling them up
|
|
821 |
||
822 |
SYNOPSIS
|
|
823 |
escape_quotes_for_mysql()
|
|
824 |
charset_info Charset of the strings
|
|
825 |
to Buffer for escaped string
|
|
826 |
to_length Length of destination buffer, or 0
|
|
827 |
from The string to escape
|
|
828 |
length The length of the string to escape
|
|
829 |
||
830 |
DESCRIPTION
|
|
831 |
This escapes the contents of a string by doubling up any apostrophes that
|
|
832 |
it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
|
|
833 |
effect on the server.
|
|
834 |
||
835 |
NOTE
|
|
836 |
To be consistent with escape_string_for_mysql(), to_length may be 0 to
|
|
837 |
mean "big enough"
|
|
838 |
||
839 |
RETURN VALUES
|
|
840 |
~0 The escaped string did not fit in the to buffer
|
|
841 |
>=0 The length of the escaped string
|
|
842 |
*/
|
|
843 |
||
844 |
size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info, |
|
845 |
char *to, size_t to_length, |
|
846 |
const char *from, size_t length) |
|
847 |
{
|
|
848 |
const char *to_start= to; |
|
849 |
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
|
850 |
my_bool overflow= FALSE; |
|
851 |
#ifdef USE_MB
|
|
852 |
my_bool use_mb_flag= use_mb(charset_info); |
|
853 |
#endif
|
|
854 |
for (end= from + length; from < end; from++) |
|
855 |
{
|
|
856 |
#ifdef USE_MB
|
|
857 |
int tmp_length; |
|
858 |
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) |
|
859 |
{
|
|
860 |
if (to + tmp_length > to_end) |
|
861 |
{
|
|
862 |
overflow= TRUE; |
|
863 |
break; |
|
864 |
}
|
|
865 |
while (tmp_length--) |
|
866 |
*to++= *from++; |
|
867 |
from--; |
|
868 |
continue; |
|
869 |
}
|
|
870 |
/*
|
|
871 |
We don't have the same issue here with a non-multi-byte character being
|
|
872 |
turned into a multi-byte character by the addition of an escaping
|
|
873 |
character, because we are only escaping the ' character with itself.
|
|
874 |
*/
|
|
875 |
#endif
|
|
876 |
if (*from == '\'') |
|
877 |
{
|
|
878 |
if (to + 2 > to_end) |
|
879 |
{
|
|
880 |
overflow= TRUE; |
|
881 |
break; |
|
882 |
}
|
|
883 |
*to++= '\''; |
|
884 |
*to++= '\''; |
|
885 |
}
|
|
886 |
else
|
|
887 |
{
|
|
888 |
if (to + 1 > to_end) |
|
889 |
{
|
|
890 |
overflow= TRUE; |
|
891 |
break; |
|
892 |
}
|
|
893 |
*to++= *from; |
|
894 |
}
|
|
895 |
}
|
|
896 |
*to= 0; |
|
897 |
return overflow ? (ulong)~0 : (ulong) (to - to_start); |
|
898 |
}
|