71
72
for (i=0; i < 256 ; i++)
73
74
if (my_isalpha(cs,i))
74
state_map[i]=(unsigned char) MY_LEX_IDENT;
75
state_map[i]=(uchar) MY_LEX_IDENT;
75
76
else if (my_isdigit(cs,i))
76
state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT;
77
state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
77
78
#if defined(USE_MB) && defined(USE_MB_IDENT)
78
79
else if (my_mbcharlen(cs, i)>1)
79
state_map[i]=(unsigned char) MY_LEX_IDENT;
80
state_map[i]=(uchar) MY_LEX_IDENT;
81
82
else if (my_isspace(cs,i))
82
state_map[i]=(unsigned char) MY_LEX_SKIP;
83
state_map[i]=(uchar) MY_LEX_SKIP;
84
state_map[i]=(unsigned char) MY_LEX_CHAR;
85
state_map[i]=(uchar) MY_LEX_CHAR;
86
state_map[(unsigned char)'_']=state_map[(unsigned char)'$']=(unsigned char) MY_LEX_IDENT;
87
state_map[(unsigned char)'\'']=(unsigned char) MY_LEX_STRING;
88
state_map[(unsigned char)'.']=(unsigned char) MY_LEX_REAL_OR_POINT;
89
state_map[(unsigned char)'>']=state_map[(unsigned char)'=']=state_map[(unsigned char)'!']= (unsigned char) MY_LEX_CMP_OP;
90
state_map[(unsigned char)'<']= (unsigned char) MY_LEX_LONG_CMP_OP;
91
state_map[(unsigned char)'&']=state_map[(unsigned char)'|']=(unsigned char) MY_LEX_BOOL;
92
state_map[(unsigned char)'#']=(unsigned char) MY_LEX_COMMENT;
93
state_map[(unsigned char)';']=(unsigned char) MY_LEX_SEMICOLON;
94
state_map[(unsigned char)':']=(unsigned char) MY_LEX_SET_VAR;
95
state_map[0]=(unsigned char) MY_LEX_EOL;
96
state_map[(unsigned char)'\\']= (unsigned char) MY_LEX_ESCAPE;
97
state_map[(unsigned char)'/']= (unsigned char) MY_LEX_LONG_COMMENT;
98
state_map[(unsigned char)'*']= (unsigned char) MY_LEX_END_LONG_COMMENT;
99
state_map[(unsigned char)'@']= (unsigned char) MY_LEX_USER_END;
100
state_map[(unsigned char) '`']= (unsigned char) MY_LEX_USER_VARIABLE_DELIMITER;
101
state_map[(unsigned char)'"']= (unsigned char) MY_LEX_STRING_OR_DELIMITER;
87
state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT;
88
state_map[(uchar)'\'']=(uchar) MY_LEX_STRING;
89
state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT;
90
state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP;
91
state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP;
92
state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL;
93
state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT;
94
state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON;
95
state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR;
96
state_map[0]=(uchar) MY_LEX_EOL;
97
state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE;
98
state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT;
99
state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT;
100
state_map[(uchar)'@']= (uchar) MY_LEX_USER_END;
101
state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER;
102
state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER;
104
105
Create a second map to make it faster to find identifiers
106
107
for (i=0; i < 256 ; i++)
108
ident_map[i]= (unsigned char) (state_map[i] == MY_LEX_IDENT ||
109
ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT ||
109
110
state_map[i] == MY_LEX_NUMBER_IDENT);
112
113
/* Special handling of hex and binary strings */
113
state_map[(unsigned char)'x']= state_map[(unsigned char)'X']= (unsigned char) MY_LEX_IDENT_OR_HEX;
114
state_map[(unsigned char)'b']= state_map[(unsigned char)'B']= (unsigned char) MY_LEX_IDENT_OR_BIN;
114
state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
115
state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN;
116
state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR;
121
static void simple_cs_init_functions(CHARSET_INFO *cs)
123
if (cs->state & MY_CS_BINSORT)
124
cs->coll= &my_collation_8bit_bin_handler;
126
cs->coll= &my_collation_8bit_simple_ci_handler;
128
cs->cset= &my_charset_8bit_handler;
133
static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
135
to->number= from->number ? from->number : to->number;
138
if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
142
if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
146
if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
151
if (!(to->ctype= (uchar*) my_once_memdup((char*) from->ctype,
152
MY_CS_CTYPE_TABLE_SIZE,
155
if (init_state_maps(to))
159
if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
160
MY_CS_TO_LOWER_TABLE_SIZE,
165
if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper,
166
MY_CS_TO_UPPER_TABLE_SIZE,
169
if (from->sort_order)
171
if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order,
172
MY_CS_SORT_ORDER_TABLE_SIZE,
177
if (from->tab_to_uni)
179
uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16_t);
180
if (!(to->tab_to_uni= (uint16_t*) my_once_memdup((char*)from->tab_to_uni,
185
if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
196
static bool simple_cs_is_full(CHARSET_INFO *cs)
198
return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
200
(cs->number && cs->name &&
201
(cs->sort_order || (cs->state & MY_CS_BINSORT) )));
206
copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from)
208
to->cset= from->cset;
209
to->coll= from->coll;
210
to->strxfrm_multiply= from->strxfrm_multiply;
211
to->min_sort_char= from->min_sort_char;
212
to->max_sort_char= from->max_sort_char;
213
to->mbminlen= from->mbminlen;
214
to->mbmaxlen= from->mbmaxlen;
218
static int add_collation(CHARSET_INFO *cs)
220
if (cs->name && (cs->number ||
221
(cs->number=get_collation_number_internal(cs->name))))
223
if (!all_charsets[cs->number])
225
if (!(all_charsets[cs->number]=
226
(CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
228
bzero((void*)all_charsets[cs->number],sizeof(CHARSET_INFO));
231
if (cs->primary_number == cs->number)
232
cs->state |= MY_CS_PRIMARY;
234
if (cs->binary_number == cs->number)
235
cs->state |= MY_CS_BINSORT;
237
all_charsets[cs->number]->state|= cs->state;
239
if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
241
CHARSET_INFO *newcs= all_charsets[cs->number];
242
if (cs_copy_data(all_charsets[cs->number],cs))
245
newcs->levels_for_compare= 1;
246
newcs->levels_for_order= 1;
248
if (!strcmp(cs->csname,"ucs2") )
250
#if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
251
copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
252
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
255
else if (!strcmp(cs->csname, "utf8"))
257
#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)
258
copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
259
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
262
else if (!strcmp(cs->csname, "utf8mb3"))
264
#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)
265
copy_uca_collation(newcs, &my_charset_utf8mb3_unicode_ci);
266
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
269
else if (!strcmp(cs->csname, "utf16"))
271
#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
272
copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
273
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
276
else if (!strcmp(cs->csname, "utf32"))
278
#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
279
copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
280
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
285
uchar *sort_order= all_charsets[cs->number]->sort_order;
286
simple_cs_init_functions(all_charsets[cs->number]);
289
if (simple_cs_is_full(all_charsets[cs->number]))
291
all_charsets[cs->number]->state |= MY_CS_LOADED;
293
all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
296
Check if case sensitive sort order: A < a < B.
297
We need MY_CS_FLAG for regex library, and for
298
case sensitivity flag for 5.0 client protocol,
299
to support isCaseSensitive() method in JDBC driver
301
if (sort_order && sort_order['A'] < sort_order['a'] &&
302
sort_order['a'] < sort_order['B'])
303
all_charsets[cs->number]->state|= MY_CS_CSSORT;
305
if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
306
all_charsets[cs->number]->state|= MY_CS_PUREASCII;
307
if (!my_charset_is_ascii_compatible(cs))
308
all_charsets[cs->number]->state|= MY_CS_NONASCII;
314
We need the below to make get_charset_name()
315
and get_charset_number() working even if a
316
character set has not been really incompiled.
317
The above functions are used for example
318
in error message compiler extra/comp_err.c.
319
If a character set was compiled, this information
320
will get lost and overwritten in add_compiled_collation().
322
CHARSET_INFO *dst= all_charsets[cs->number];
323
dst->number= cs->number;
325
if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
327
if (cs->csname && !dst->csname)
328
if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
330
if (cs->name && !dst->name)
331
if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME))))
335
cs->primary_number= 0;
336
cs->binary_number= 0;
339
cs->sort_order= NULL;
346
#define MY_MAX_ALLOWED_BUF 1024*1024
119
347
#define MY_CHARSET_INDEX "Index.xml"
121
349
const char *charsets_dir= NULL;
122
350
static int charset_initialized=0;
353
static bool my_read_charset_file(const char *filename, myf myflags)
358
struct stat stat_info;
360
if (stat(filename, &stat_info) ||
361
((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) ||
362
!(buf= (uchar*) my_malloc(len,myflags)))
365
if ((fd=my_open(filename,O_RDONLY,myflags)) < 0)
367
tmp_len=my_read(fd, buf, len, myflags);
368
my_close(fd,myflags);
372
if (my_parse_charset_xml((char*) buf,len,add_collation))
375
printf("ERROR at line %d pos %d '%s'\n",
376
my_xml_error_lineno(&p)+1,
377
my_xml_error_pos(&p),
378
my_xml_error_string(&p));
382
my_free(buf, myflags);
386
my_free(buf, myflags);
125
391
char *get_charsets_dir(char *buf)
127
393
const char *sharedir= SHAREDIR;
676
Escape string with backslashes (\)
679
escape_string_for_mysql()
680
charset_info Charset of the strings
681
to Buffer for escaped string
682
to_length Length of destination buffer, or 0
683
from The string to escape
684
length The length of the string to escape
687
This escapes the contents of a string by adding backslashes before special
688
characters, and turning others into specific escape sequences, such as
689
turning newlines into \n and null bytes into \0.
692
To maintain compatibility with the old C API, to_length may be 0 to mean
696
(size_t) -1 The escaped string did not fit in the to buffer
697
# The length of the escaped string
700
size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
701
char *to, size_t to_length,
702
const char *from, size_t length)
704
const char *to_start= to;
705
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
706
bool overflow= false;
708
bool use_mb_flag= use_mb(charset_info);
710
for (end= from + length; from < end; from++)
715
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
717
if (to + tmp_length > to_end)
728
If the next character appears to begin a multi-byte character, we
729
escape that first byte of that apparent multi-byte character. (The
730
character just looks like a multi-byte character -- if it were actually
731
a multi-byte character, it would have been passed through in the test
734
Without this check, we can create a problem by converting an invalid
735
multi-byte character into a valid one. For example, 0xbf27 is not
736
a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
738
if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
743
case 0: /* Must be escaped for 'mysql' */
746
case '\n': /* Must be escaped for logs */
758
case '"': /* Better safe than sorry */
761
case '\032': /* This gives problems on Win32 */
786
return overflow ? (size_t) -1 : (size_t) (to - to_start);
406
790
#ifdef BACKSLASH_MBTAIL
407
791
static CHARSET_INFO *fs_cset_cache= NULL;