71
72
for (i=0; i < 256 ; i++)
73
74
if (my_isalpha(cs,i))
74
state_map[i]=(unsigned char) MY_LEX_IDENT;
75
state_map[i]=(uchar) MY_LEX_IDENT;
75
76
else if (my_isdigit(cs,i))
76
state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT;
77
state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
77
78
#if defined(USE_MB) && defined(USE_MB_IDENT)
78
79
else if (my_mbcharlen(cs, i)>1)
79
state_map[i]=(unsigned char) MY_LEX_IDENT;
80
state_map[i]=(uchar) MY_LEX_IDENT;
81
82
else if (my_isspace(cs,i))
82
state_map[i]=(unsigned char) MY_LEX_SKIP;
83
state_map[i]=(uchar) MY_LEX_SKIP;
84
state_map[i]=(unsigned char) MY_LEX_CHAR;
85
state_map[i]=(uchar) MY_LEX_CHAR;
86
state_map[(unsigned char)'_']=state_map[(unsigned char)'$']=(unsigned char) MY_LEX_IDENT;
87
state_map[(unsigned char)'\'']=(unsigned char) MY_LEX_STRING;
88
state_map[(unsigned char)'.']=(unsigned char) MY_LEX_REAL_OR_POINT;
89
state_map[(unsigned char)'>']=state_map[(unsigned char)'=']=state_map[(unsigned char)'!']= (unsigned char) MY_LEX_CMP_OP;
90
state_map[(unsigned char)'<']= (unsigned char) MY_LEX_LONG_CMP_OP;
91
state_map[(unsigned char)'&']=state_map[(unsigned char)'|']=(unsigned char) MY_LEX_BOOL;
92
state_map[(unsigned char)'#']=(unsigned char) MY_LEX_COMMENT;
93
state_map[(unsigned char)';']=(unsigned char) MY_LEX_SEMICOLON;
94
state_map[(unsigned char)':']=(unsigned char) MY_LEX_SET_VAR;
95
state_map[0]=(unsigned char) MY_LEX_EOL;
96
state_map[(unsigned char)'\\']= (unsigned char) MY_LEX_ESCAPE;
97
state_map[(unsigned char)'/']= (unsigned char) MY_LEX_LONG_COMMENT;
98
state_map[(unsigned char)'*']= (unsigned char) MY_LEX_END_LONG_COMMENT;
99
state_map[(unsigned char)'@']= (unsigned char) MY_LEX_USER_END;
100
state_map[(unsigned char) '`']= (unsigned char) MY_LEX_USER_VARIABLE_DELIMITER;
101
state_map[(unsigned char)'"']= (unsigned char) MY_LEX_STRING_OR_DELIMITER;
87
state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT;
88
state_map[(uchar)'\'']=(uchar) MY_LEX_STRING;
89
state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT;
90
state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP;
91
state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP;
92
state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL;
93
state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT;
94
state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON;
95
state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR;
96
state_map[0]=(uchar) MY_LEX_EOL;
97
state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE;
98
state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT;
99
state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT;
100
state_map[(uchar)'@']= (uchar) MY_LEX_USER_END;
101
state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER;
102
state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER;
104
105
Create a second map to make it faster to find identifiers
106
107
for (i=0; i < 256 ; i++)
108
ident_map[i]= (unsigned char) (state_map[i] == MY_LEX_IDENT ||
109
ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT ||
109
110
state_map[i] == MY_LEX_NUMBER_IDENT);
112
113
/* Special handling of hex and binary strings */
113
state_map[(unsigned char)'x']= state_map[(unsigned char)'X']= (unsigned char) MY_LEX_IDENT_OR_HEX;
114
state_map[(unsigned char)'b']= state_map[(unsigned char)'B']= (unsigned char) MY_LEX_IDENT_OR_BIN;
114
state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
115
state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN;
120
static void simple_cs_init_functions(CHARSET_INFO *cs)
122
if (cs->state & MY_CS_BINSORT)
123
cs->coll= &my_collation_8bit_bin_handler;
125
cs->coll= &my_collation_8bit_simple_ci_handler;
127
cs->cset= &my_charset_8bit_handler;
132
static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
134
to->number= from->number ? from->number : to->number;
137
if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
141
if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
145
if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
150
if (!(to->ctype= (uchar*) my_once_memdup((char*) from->ctype,
151
MY_CS_CTYPE_TABLE_SIZE,
154
if (init_state_maps(to))
158
if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
159
MY_CS_TO_LOWER_TABLE_SIZE,
164
if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper,
165
MY_CS_TO_UPPER_TABLE_SIZE,
168
if (from->sort_order)
170
if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order,
171
MY_CS_SORT_ORDER_TABLE_SIZE,
176
if (from->tab_to_uni)
178
uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16_t);
179
if (!(to->tab_to_uni= (uint16_t*) my_once_memdup((char*)from->tab_to_uni,
184
if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
195
static bool simple_cs_is_full(CHARSET_INFO *cs)
197
return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
199
(cs->number && cs->name &&
200
(cs->sort_order || (cs->state & MY_CS_BINSORT) )));
205
copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from)
207
to->cset= from->cset;
208
to->coll= from->coll;
209
to->strxfrm_multiply= from->strxfrm_multiply;
210
to->min_sort_char= from->min_sort_char;
211
to->max_sort_char= from->max_sort_char;
212
to->mbminlen= from->mbminlen;
213
to->mbmaxlen= from->mbmaxlen;
217
static int add_collation(CHARSET_INFO *cs)
219
if (cs->name && (cs->number ||
220
(cs->number=get_collation_number_internal(cs->name))))
222
if (!all_charsets[cs->number])
224
if (!(all_charsets[cs->number]=
225
(CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
227
memset(all_charsets[cs->number], 0, sizeof(CHARSET_INFO));
230
if (cs->primary_number == cs->number)
231
cs->state |= MY_CS_PRIMARY;
233
if (cs->binary_number == cs->number)
234
cs->state |= MY_CS_BINSORT;
236
all_charsets[cs->number]->state|= cs->state;
238
if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
240
CHARSET_INFO *newcs= all_charsets[cs->number];
241
if (cs_copy_data(all_charsets[cs->number],cs))
244
newcs->levels_for_compare= 1;
245
newcs->levels_for_order= 1;
247
if (!strcmp(cs->csname,"ucs2") )
249
#if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
250
copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
251
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
254
else if (!strcmp(cs->csname, "utf8"))
256
#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)
257
copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
258
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
261
else if (!strcmp(cs->csname, "utf8mb3"))
263
#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)
264
copy_uca_collation(newcs, &my_charset_utf8mb3_unicode_ci);
265
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
268
else if (!strcmp(cs->csname, "utf16"))
270
#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
271
copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
272
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
275
else if (!strcmp(cs->csname, "utf32"))
277
#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
278
copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
279
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
284
uchar *sort_order= all_charsets[cs->number]->sort_order;
285
simple_cs_init_functions(all_charsets[cs->number]);
288
if (simple_cs_is_full(all_charsets[cs->number]))
290
all_charsets[cs->number]->state |= MY_CS_LOADED;
292
all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
295
Check if case sensitive sort order: A < a < B.
296
We need MY_CS_FLAG for regex library, and for
297
case sensitivity flag for 5.0 client protocol,
298
to support isCaseSensitive() method in JDBC driver
300
if (sort_order && sort_order['A'] < sort_order['a'] &&
301
sort_order['a'] < sort_order['B'])
302
all_charsets[cs->number]->state|= MY_CS_CSSORT;
304
if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
305
all_charsets[cs->number]->state|= MY_CS_PUREASCII;
306
if (!my_charset_is_ascii_compatible(cs))
307
all_charsets[cs->number]->state|= MY_CS_NONASCII;
313
We need the below to make get_charset_name()
314
and get_charset_number() working even if a
315
character set has not been really incompiled.
316
The above functions are used for example
317
in error message compiler extra/comp_err.c.
318
If a character set was compiled, this information
319
will get lost and overwritten in add_compiled_collation().
321
CHARSET_INFO *dst= all_charsets[cs->number];
322
dst->number= cs->number;
324
if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
326
if (cs->csname && !dst->csname)
327
if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
329
if (cs->name && !dst->name)
330
if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME))))
334
cs->primary_number= 0;
335
cs->binary_number= 0;
338
cs->sort_order= NULL;
345
#define MY_MAX_ALLOWED_BUF 1024*1024
119
346
#define MY_CHARSET_INDEX "Index.xml"
121
348
const char *charsets_dir= NULL;
122
349
static int charset_initialized=0;
352
static bool my_read_charset_file(const char *filename, myf myflags)
357
struct stat stat_info;
359
if (stat(filename, &stat_info) ||
360
((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) ||
361
!(buf= (uchar*) my_malloc(len,myflags)))
364
if ((fd=my_open(filename,O_RDONLY,myflags)) < 0)
366
tmp_len=my_read(fd, buf, len, myflags);
367
my_close(fd,myflags);
371
if (my_parse_charset_xml((char*) buf,len,add_collation))
374
printf("ERROR at line %d pos %d '%s'\n",
375
my_xml_error_lineno(&p)+1,
376
my_xml_error_pos(&p),
377
my_xml_error_string(&p));
381
my_free(buf, myflags);
385
my_free(buf, myflags);
125
390
char *get_charsets_dir(char *buf)
127
392
const char *sharedir= SHAREDIR;
675
Escape string with backslashes (\)
678
escape_string_for_drizzle()
679
charset_info Charset of the strings
680
to Buffer for escaped string
681
to_length Length of destination buffer, or 0
682
from The string to escape
683
length The length of the string to escape
686
This escapes the contents of a string by adding backslashes before special
687
characters, and turning others into specific escape sequences, such as
688
turning newlines into \n and null bytes into \0.
691
To maintain compatibility with the old C API, to_length may be 0 to mean
695
(size_t) -1 The escaped string did not fit in the to buffer
696
# The length of the escaped string
699
size_t escape_string_for_drizzle(const CHARSET_INFO *charset_info,
700
char *to, size_t to_length,
701
const char *from, size_t length)
703
const char *to_start= to;
704
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
705
bool overflow= false;
707
bool use_mb_flag= use_mb(charset_info);
709
for (end= from + length; from < end; from++)
714
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
716
if (to + tmp_length > to_end)
727
If the next character appears to begin a multi-byte character, we
728
escape that first byte of that apparent multi-byte character. (The
729
character just looks like a multi-byte character -- if it were actually
730
a multi-byte character, it would have been passed through in the test
733
Without this check, we can create a problem by converting an invalid
734
multi-byte character into a valid one. For example, 0xbf27 is not
735
a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
737
if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
742
case 0: /* Must be escaped for 'mysql' */
745
case '\n': /* Must be escaped for logs */
757
case '"': /* Better safe than sorry */
760
case '\032': /* This gives problems on Win32 */
785
return overflow ? (size_t) -1 : (size_t) (to - to_start);
406
789
#ifdef BACKSLASH_MBTAIL
407
790
static CHARSET_INFO *fs_cset_cache= NULL;