~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
#include "mysys_priv.h"
17
#include "mysys_err.h"
18
#include <m_ctype.h>
19
#include <m_string.h>
20
#include <my_dir.h>
21
#include <my_xml.h>
22
23
24
/*
25
  The code below implements this functionality:
26
  
27
    - Initializing charset related structures
28
    - Loading dynamic charsets
29
    - Searching for a proper CHARSET_INFO 
30
      using charset name, collation name or collation ID
31
    - Setting server default character set
32
*/
33
34
my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
35
{
36
  return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
37
}
38
39
40
static uint
41
get_collation_number_internal(const char *name)
42
{
43
  CHARSET_INFO **cs;
44
  for (cs= all_charsets;
45
       cs < all_charsets+array_elements(all_charsets)-1 ;
46
       cs++)
47
  {
48
    if ( cs[0] && cs[0]->name && 
49
         !my_strcasecmp(&my_charset_latin1, cs[0]->name, name))
50
      return cs[0]->number;
51
  }  
52
  return 0;
53
}
54
55
56
static my_bool init_state_maps(CHARSET_INFO *cs)
57
{
58
  uint i;
59
  uchar *state_map;
60
  uchar *ident_map;
61
62
  if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
63
    return 1;
64
    
65
  if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
66
    return 1;
67
68
  state_map= cs->state_map;
69
  ident_map= cs->ident_map;
70
  
71
  /* Fill state_map with states to get a faster parser */
72
  for (i=0; i < 256 ; i++)
73
  {
74
    if (my_isalpha(cs,i))
75
      state_map[i]=(uchar) MY_LEX_IDENT;
76
    else if (my_isdigit(cs,i))
77
      state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
78
#if defined(USE_MB) && defined(USE_MB_IDENT)
79
    else if (my_mbcharlen(cs, i)>1)
80
      state_map[i]=(uchar) MY_LEX_IDENT;
81
#endif
82
    else if (my_isspace(cs,i))
83
      state_map[i]=(uchar) MY_LEX_SKIP;
84
    else
85
      state_map[i]=(uchar) MY_LEX_CHAR;
86
  }
87
  state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT;
88
  state_map[(uchar)'\'']=(uchar) MY_LEX_STRING;
89
  state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT;
90
  state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP;
91
  state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP;
92
  state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL;
93
  state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT;
94
  state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON;
95
  state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR;
96
  state_map[0]=(uchar) MY_LEX_EOL;
97
  state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE;
98
  state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT;
99
  state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT;
100
  state_map[(uchar)'@']= (uchar) MY_LEX_USER_END;
101
  state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER;
102
  state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER;
103
104
  /*
105
    Create a second map to make it faster to find identifiers
106
  */
107
  for (i=0; i < 256 ; i++)
108
  {
109
    ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT ||
110
			   state_map[i] == MY_LEX_NUMBER_IDENT);
111
  }
112
113
  /* Special handling of hex and binary strings */
114
  state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
115
  state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN;
116
  state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR;
117
  return 0;
118
}
119
120
121
static void simple_cs_init_functions(CHARSET_INFO *cs)
122
{
123
  if (cs->state & MY_CS_BINSORT)
124
    cs->coll= &my_collation_8bit_bin_handler;
125
  else
126
    cs->coll= &my_collation_8bit_simple_ci_handler;
127
  
128
  cs->cset= &my_charset_8bit_handler;
129
}
130
131
132
133
static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
134
{
135
  to->number= from->number ? from->number : to->number;
136
137
  if (from->csname)
138
    if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
139
      goto err;
140
  
141
  if (from->name)
142
    if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
143
      goto err;
144
  
145
  if (from->comment)
146
    if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
147
      goto err;
148
  
149
  if (from->ctype)
150
  {
151
    if (!(to->ctype= (uchar*) my_once_memdup((char*) from->ctype,
152
					     MY_CS_CTYPE_TABLE_SIZE,
153
					     MYF(MY_WME))))
154
      goto err;
155
    if (init_state_maps(to))
156
      goto err;
157
  }
158
  if (from->to_lower)
159
    if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
160
						MY_CS_TO_LOWER_TABLE_SIZE,
161
						MYF(MY_WME))))
162
      goto err;
163
164
  if (from->to_upper)
165
    if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper,
166
						MY_CS_TO_UPPER_TABLE_SIZE,
167
						MYF(MY_WME))))
168
      goto err;
169
  if (from->sort_order)
170
  {
171
    if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order,
172
						  MY_CS_SORT_ORDER_TABLE_SIZE,
173
						  MYF(MY_WME))))
174
      goto err;
175
176
  }
177
  if (from->tab_to_uni)
178
  {
179
    uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16);
180
    if (!(to->tab_to_uni= (uint16*)  my_once_memdup((char*)from->tab_to_uni,
181
						    sz, MYF(MY_WME))))
182
      goto err;
183
  }
184
  if (from->tailoring)
185
    if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
186
      goto err;
187
188
  return 0;
189
190
err:
191
  return 1;
192
}
193
194
195
196
static my_bool simple_cs_is_full(CHARSET_INFO *cs)
197
{
198
  return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
199
	   cs->to_lower) &&
200
	  (cs->number && cs->name &&
201
	  (cs->sort_order || (cs->state & MY_CS_BINSORT) )));
202
}
203
204
205
static void
206
copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from)
207
{
208
  to->cset= from->cset;
209
  to->coll= from->coll;
210
  to->strxfrm_multiply= from->strxfrm_multiply;
211
  to->min_sort_char= from->min_sort_char;
212
  to->max_sort_char= from->max_sort_char;
213
  to->mbminlen= from->mbminlen;
214
  to->mbmaxlen= from->mbmaxlen;
215
}
216
217
218
static int add_collation(CHARSET_INFO *cs)
219
{
220
  if (cs->name && (cs->number ||
221
                   (cs->number=get_collation_number_internal(cs->name))))
222
  {
223
    if (!all_charsets[cs->number])
224
    {
225
      if (!(all_charsets[cs->number]=
226
         (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
227
        return MY_XML_ERROR;
228
      bzero((void*)all_charsets[cs->number],sizeof(CHARSET_INFO));
229
    }
230
    
231
    if (cs->primary_number == cs->number)
232
      cs->state |= MY_CS_PRIMARY;
233
      
234
    if (cs->binary_number == cs->number)
235
      cs->state |= MY_CS_BINSORT;
236
    
237
    all_charsets[cs->number]->state|= cs->state;
238
    
239
    if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
240
    {
241
      CHARSET_INFO *newcs= all_charsets[cs->number];
242
      if (cs_copy_data(all_charsets[cs->number],cs))
243
        return MY_XML_ERROR;
244
245
      newcs->levels_for_compare= 1;
246
      newcs->levels_for_order= 1;
247
      
248
      if (!strcmp(cs->csname,"ucs2") )
249
      {
250
#if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
251
        copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
252
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
253
#endif        
254
      }
255
      else if (!strcmp(cs->csname, "utf8"))
256
      {
257
#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)
258
        copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
259
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
260
#endif
261
      }
262
      else if (!strcmp(cs->csname, "utf8mb3"))
263
      {
264
#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)
265
        copy_uca_collation(newcs, &my_charset_utf8mb3_unicode_ci);
266
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
267
#endif
268
      }
269
      else if (!strcmp(cs->csname, "utf16"))
270
      {
271
#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
272
        copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
273
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
274
#endif
275
      }
276
      else if (!strcmp(cs->csname, "utf32"))
277
      {
278
#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
279
        copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
280
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
281
#endif
282
      }
283
      else
284
      {
285
        uchar *sort_order= all_charsets[cs->number]->sort_order;
286
        simple_cs_init_functions(all_charsets[cs->number]);
287
        newcs->mbminlen= 1;
288
        newcs->mbmaxlen= 1;
289
        if (simple_cs_is_full(all_charsets[cs->number]))
290
        {
291
          all_charsets[cs->number]->state |= MY_CS_LOADED;
292
        }
293
        all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
294
        
295
        /*
296
          Check if case sensitive sort order: A < a < B.
297
          We need MY_CS_FLAG for regex library, and for
298
          case sensitivity flag for 5.0 client protocol,
299
          to support isCaseSensitive() method in JDBC driver 
300
        */
301
        if (sort_order && sort_order['A'] < sort_order['a'] &&
302
                          sort_order['a'] < sort_order['B'])
303
          all_charsets[cs->number]->state|= MY_CS_CSSORT; 
304
305
        if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
306
          all_charsets[cs->number]->state|= MY_CS_PUREASCII;
307
        if (!my_charset_is_ascii_compatible(cs))
308
          all_charsets[cs->number]->state|= MY_CS_NONASCII;
309
      }
310
    }
311
    else
312
    {
313
      /*
314
        We need the below to make get_charset_name()
315
        and get_charset_number() working even if a
316
        character set has not been really incompiled.
317
        The above functions are used for example
318
        in error message compiler extra/comp_err.c.
319
        If a character set was compiled, this information
320
        will get lost and overwritten in add_compiled_collation().
321
      */
322
      CHARSET_INFO *dst= all_charsets[cs->number];
323
      dst->number= cs->number;
324
      if (cs->comment)
325
	if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
326
	  return MY_XML_ERROR;
327
      if (cs->csname && !dst->csname)
328
        if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
329
	  return MY_XML_ERROR;
330
      if (cs->name && !dst->name)
331
	if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME))))
332
	  return MY_XML_ERROR;
333
    }
334
    cs->number= 0;
335
    cs->primary_number= 0;
336
    cs->binary_number= 0;
337
    cs->name= NULL;
338
    cs->state= 0;
339
    cs->sort_order= NULL;
340
    cs->state= 0;
341
  }
342
  return MY_XML_OK;
343
}
344
345
346
#define MY_MAX_ALLOWED_BUF 1024*1024
347
#define MY_CHARSET_INDEX "Index.xml"
348
349
const char *charsets_dir= NULL;
350
static int charset_initialized=0;
351
352
353
static my_bool my_read_charset_file(const char *filename, myf myflags)
354
{
355
  uchar *buf;
356
  int  fd;
357
  uint len, tmp_len;
15 by brian
Fix for stat, NETWARE removal
358
  struct stat stat_info;
1 by brian
clean slate
359
  
15 by brian
Fix for stat, NETWARE removal
360
  if (stat(filename, &stat_info) ||
1 by brian
clean slate
361
       ((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) ||
362
       !(buf= (uchar*) my_malloc(len,myflags)))
363
    return TRUE;
364
  
365
  if ((fd=my_open(filename,O_RDONLY,myflags)) < 0)
366
    goto error;
367
  tmp_len=my_read(fd, buf, len, myflags);
368
  my_close(fd,myflags);
369
  if (tmp_len != len)
370
    goto error;
371
  
372
  if (my_parse_charset_xml((char*) buf,len,add_collation))
373
  {
374
#ifdef NOT_YET
375
    printf("ERROR at line %d pos %d '%s'\n",
376
	   my_xml_error_lineno(&p)+1,
377
	   my_xml_error_pos(&p),
378
	   my_xml_error_string(&p));
379
#endif
380
  }
381
  
382
  my_free(buf, myflags);
383
  return FALSE;
384
385
error:
386
  my_free(buf, myflags);
387
  return TRUE;
388
}
389
390
391
char *get_charsets_dir(char *buf)
392
{
393
  const char *sharedir= SHAREDIR;
394
  char *res;
395
  DBUG_ENTER("get_charsets_dir");
396
397
  if (charsets_dir != NULL)
398
    strmake(buf, charsets_dir, FN_REFLEN-1);
399
  else
400
  {
401
    if (test_if_hard_path(sharedir) ||
402
	is_prefix(sharedir, DEFAULT_CHARSET_HOME))
403
      strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);
404
    else
405
      strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
406
	      NullS);
407
  }
408
  res= convert_dirname(buf,buf,NullS);
409
  DBUG_PRINT("info",("charsets dir: '%s'", buf));
410
  DBUG_RETURN(res);
411
}
412
413
CHARSET_INFO *all_charsets[256];
414
CHARSET_INFO *default_charset_info = &my_charset_latin1;
415
416
void add_compiled_collation(CHARSET_INFO *cs)
417
{
418
  all_charsets[cs->number]= cs;
419
  cs->state|= MY_CS_AVAILABLE;
420
}
421
422
static void *cs_alloc(size_t size)
423
{
424
  return my_once_alloc(size, MYF(MY_WME));
425
}
426
427
428
static my_bool init_available_charsets(myf myflags)
429
{
430
  char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
431
  my_bool error=FALSE;
432
  /*
433
    We have to use charset_initialized to not lock on THR_LOCK_charset
434
    inside get_internal_charset...
435
  */
436
  if (!charset_initialized)
437
  {
438
    CHARSET_INFO **cs;
439
    /*
440
      To make things thread safe we are not allowing other threads to interfere
441
      while we may changing the cs_info_table
442
    */
443
    pthread_mutex_lock(&THR_LOCK_charset);
444
    if (!charset_initialized)
445
    {
446
      bzero(&all_charsets,sizeof(all_charsets));
447
      init_compiled_charsets(myflags);
448
      
449
      /* Copy compiled charsets */
450
      for (cs=all_charsets;
451
           cs < all_charsets+array_elements(all_charsets)-1 ;
452
           cs++)
453
      {
454
        if (*cs)
455
        {
456
          if (cs[0]->ctype)
457
            if (init_state_maps(*cs))
458
              *cs= NULL;
459
        }
460
      }
461
      
462
      strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);
463
      error= my_read_charset_file(fname,myflags);
464
      charset_initialized=1;
465
    }
466
    pthread_mutex_unlock(&THR_LOCK_charset);
467
  }
468
  return error;
469
}
470
471
472
void free_charsets(void)
473
{
474
  charset_initialized=0;
475
}
476
477
478
uint get_collation_number(const char *name)
479
{
480
  init_available_charsets(MYF(0));
481
  return get_collation_number_internal(name);
482
}
483
484
485
uint get_charset_number(const char *charset_name, uint cs_flags)
486
{
487
  CHARSET_INFO **cs;
488
  init_available_charsets(MYF(0));
489
  
490
  for (cs= all_charsets;
491
       cs < all_charsets+array_elements(all_charsets)-1 ;
492
       cs++)
493
  {
494
    if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
495
         !my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name))
496
      return cs[0]->number;
497
  }  
498
  return 0;
499
}
500
501
502
const char *get_charset_name(uint charset_number)
503
{
504
  CHARSET_INFO *cs;
505
  init_available_charsets(MYF(0));
506
507
  cs=all_charsets[charset_number];
508
  if (cs && (cs->number == charset_number) && cs->name )
509
    return (char*) cs->name;
510
  
511
  return (char*) "?";   /* this mimics find_type() */
512
}
513
514
515
static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
516
{
517
  char  buf[FN_REFLEN];
518
  CHARSET_INFO *cs;
519
  /*
520
    To make things thread safe we are not allowing other threads to interfere
521
    while we may changing the cs_info_table
522
  */
523
  pthread_mutex_lock(&THR_LOCK_charset);
524
  if ((cs= all_charsets[cs_number]))
525
  {
526
    if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED))
527
    {
528
      strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS);
529
      my_read_charset_file(buf,flags);
530
    }
531
    cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
532
  }
533
  if (cs && !(cs->state & MY_CS_READY))
534
  {
535
    if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
536
        (cs->coll->init && cs->coll->init(cs, cs_alloc)))
537
      cs= NULL;
538
    else
539
      cs->state|= MY_CS_READY;
540
  }
541
  pthread_mutex_unlock(&THR_LOCK_charset);
542
  return cs;
543
}
544
545
546
CHARSET_INFO *get_charset(uint cs_number, myf flags)
547
{
548
  CHARSET_INFO *cs;
549
  if (cs_number == default_charset_info->number)
550
    return default_charset_info;
551
552
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
553
  
554
  if (!cs_number || cs_number >= array_elements(all_charsets)-1)
555
    return NULL;
556
  
557
  cs=get_internal_charset(cs_number, flags);
558
559
  if (!cs && (flags & MY_WME))
560
  {
561
    char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
562
    strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
563
    cs_string[0]='#';
564
    int10_to_str(cs_number, cs_string+1, 10);
565
    my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
566
  }
567
  return cs;
568
}
569
570
CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
571
{
572
  uint cs_number;
573
  CHARSET_INFO *cs;
574
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
575
576
  cs_number=get_collation_number(cs_name);
577
  cs= cs_number ? get_internal_charset(cs_number,flags) : NULL;
578
579
  if (!cs && (flags & MY_WME))
580
  {
581
    char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
582
    strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
583
    my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file);
584
  }
585
586
  return cs;
587
}
588
589
590
CHARSET_INFO *get_charset_by_csname(const char *cs_name,
591
				    uint cs_flags,
592
				    myf flags)
593
{
594
  uint cs_number;
595
  CHARSET_INFO *cs;
596
  DBUG_ENTER("get_charset_by_csname");
597
  DBUG_PRINT("enter",("name: '%s'", cs_name));
598
599
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
600
601
  cs_number= get_charset_number(cs_name, cs_flags);
602
  cs= cs_number ? get_internal_charset(cs_number, flags) : NULL;
603
604
  if (!cs && (flags & MY_WME))
605
  {
606
    char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
607
    strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
608
    my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
609
  }
610
611
  DBUG_RETURN(cs);
612
}
613
614
615
/**
616
  Resolve character set by the character set name (utf8, latin1, ...).
617
618
  The function tries to resolve character set by the specified name. If
619
  there is character set with the given name, it is assigned to the "cs"
620
  parameter and FALSE is returned. If there is no such character set,
621
  "default_cs" is assigned to the "cs" and TRUE is returned.
622
623
  @param[in] cs_name    Character set name.
624
  @param[in] default_cs Default character set.
625
  @param[out] cs        Variable to store character set.
626
627
  @return FALSE if character set was resolved successfully; TRUE if there
628
  is no character set with given name.
629
*/
630
631
my_bool resolve_charset(const char *cs_name,
632
                        CHARSET_INFO *default_cs,
633
                        CHARSET_INFO **cs)
634
{
635
  *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
636
637
  if (*cs == NULL)
638
  {
639
    *cs= default_cs;
640
    return TRUE;
641
  }
642
643
  return FALSE;
644
}
645
646
647
/**
648
  Resolve collation by the collation name (utf8_general_ci, ...).
649
650
  The function tries to resolve collation by the specified name. If there
651
  is collation with the given name, it is assigned to the "cl" parameter
652
  and FALSE is returned. If there is no such collation, "default_cl" is
653
  assigned to the "cl" and TRUE is returned.
654
655
  @param[out] cl        Variable to store collation.
656
  @param[in] cl_name    Collation name.
657
  @param[in] default_cl Default collation.
658
659
  @return FALSE if collation was resolved successfully; TRUE if there is no
660
  collation with given name.
661
*/
662
663
my_bool resolve_collation(const char *cl_name,
664
                          CHARSET_INFO *default_cl,
665
                          CHARSET_INFO **cl)
666
{
667
  *cl= get_charset_by_name(cl_name, MYF(0));
668
669
  if (*cl == NULL)
670
  {
671
    *cl= default_cl;
672
    return TRUE;
673
  }
674
675
  return FALSE;
676
}
677
678
679
/*
680
  Escape string with backslashes (\)
681
682
  SYNOPSIS
683
    escape_string_for_mysql()
684
    charset_info        Charset of the strings
685
    to                  Buffer for escaped string
686
    to_length           Length of destination buffer, or 0
687
    from                The string to escape
688
    length              The length of the string to escape
689
690
  DESCRIPTION
691
    This escapes the contents of a string by adding backslashes before special
692
    characters, and turning others into specific escape sequences, such as
693
    turning newlines into \n and null bytes into \0.
694
695
  NOTE
696
    To maintain compatibility with the old C API, to_length may be 0 to mean
697
    "big enough"
698
699
  RETURN VALUES
700
    (size_t) -1 The escaped string did not fit in the to buffer
701
    #           The length of the escaped string
702
*/
703
704
size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
705
                               char *to, size_t to_length,
706
                               const char *from, size_t length)
707
{
708
  const char *to_start= to;
709
  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
710
  my_bool overflow= FALSE;
711
#ifdef USE_MB
712
  my_bool use_mb_flag= use_mb(charset_info);
713
#endif
714
  for (end= from + length; from < end; from++)
715
  {
716
    char escape= 0;
717
#ifdef USE_MB
718
    int tmp_length;
719
    if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
720
    {
721
      if (to + tmp_length > to_end)
722
      {
723
        overflow= TRUE;
724
        break;
725
      }
726
      while (tmp_length--)
727
	*to++= *from++;
728
      from--;
729
      continue;
730
    }
731
    /*
732
     If the next character appears to begin a multi-byte character, we
733
     escape that first byte of that apparent multi-byte character. (The
734
     character just looks like a multi-byte character -- if it were actually
735
     a multi-byte character, it would have been passed through in the test
736
     above.)
737
738
     Without this check, we can create a problem by converting an invalid
739
     multi-byte character into a valid one. For example, 0xbf27 is not
740
     a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
741
    */
742
    if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
743
      escape= *from;
744
    else
745
#endif
746
    switch (*from) {
747
    case 0:				/* Must be escaped for 'mysql' */
748
      escape= '0';
749
      break;
750
    case '\n':				/* Must be escaped for logs */
751
      escape= 'n';
752
      break;
753
    case '\r':
754
      escape= 'r';
755
      break;
756
    case '\\':
757
      escape= '\\';
758
      break;
759
    case '\'':
760
      escape= '\'';
761
      break;
762
    case '"':				/* Better safe than sorry */
763
      escape= '"';
764
      break;
765
    case '\032':			/* This gives problems on Win32 */
766
      escape= 'Z';
767
      break;
768
    }
769
    if (escape)
770
    {
771
      if (to + 2 > to_end)
772
      {
773
        overflow= TRUE;
774
        break;
775
      }
776
      *to++= '\\';
777
      *to++= escape;
778
    }
779
    else
780
    {
781
      if (to + 1 > to_end)
782
      {
783
        overflow= TRUE;
784
        break;
785
      }
786
      *to++= *from;
787
    }
788
  }
789
  *to= 0;
790
  return overflow ? (size_t) -1 : (size_t) (to - to_start);
791
}
792
793
794
#ifdef BACKSLASH_MBTAIL
795
static CHARSET_INFO *fs_cset_cache= NULL;
796
797
CHARSET_INFO *fs_character_set()
798
{
799
  if (!fs_cset_cache)
800
  {
801
    char buf[10]= "cp";
802
    GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
803
                  buf+2, sizeof(buf)-3);
804
    /*
805
      We cannot call get_charset_by_name here
806
      because fs_character_set() is executed before
807
      LOCK_THD_charset mutex initialization, which
808
      is used inside get_charset_by_name.
809
      As we're now interested in cp932 only,
810
      let's just detect it using strcmp().
811
    */
812
    fs_cset_cache= !strcmp(buf, "cp932") ?
813
                   &my_charset_cp932_japanese_ci : &my_charset_bin;
814
  }
815
  return fs_cset_cache;
816
}
817
#endif
818
819
/*
820
  Escape apostrophes by doubling them up
821
822
  SYNOPSIS
823
    escape_quotes_for_mysql()
824
    charset_info        Charset of the strings
825
    to                  Buffer for escaped string
826
    to_length           Length of destination buffer, or 0
827
    from                The string to escape
828
    length              The length of the string to escape
829
830
  DESCRIPTION
831
    This escapes the contents of a string by doubling up any apostrophes that
832
    it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
833
    effect on the server.
834
835
  NOTE
836
    To be consistent with escape_string_for_mysql(), to_length may be 0 to
837
    mean "big enough"
838
839
  RETURN VALUES
840
    ~0          The escaped string did not fit in the to buffer
841
    >=0         The length of the escaped string
842
*/
843
844
size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
845
                               char *to, size_t to_length,
846
                               const char *from, size_t length)
847
{
848
  const char *to_start= to;
849
  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
850
  my_bool overflow= FALSE;
851
#ifdef USE_MB
852
  my_bool use_mb_flag= use_mb(charset_info);
853
#endif
854
  for (end= from + length; from < end; from++)
855
  {
856
#ifdef USE_MB
857
    int tmp_length;
858
    if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
859
    {
860
      if (to + tmp_length > to_end)
861
      {
862
        overflow= TRUE;
863
        break;
864
      }
865
      while (tmp_length--)
866
	*to++= *from++;
867
      from--;
868
      continue;
869
    }
870
    /*
871
      We don't have the same issue here with a non-multi-byte character being
872
      turned into a multi-byte character by the addition of an escaping
873
      character, because we are only escaping the ' character with itself.
874
     */
875
#endif
876
    if (*from == '\'')
877
    {
878
      if (to + 2 > to_end)
879
      {
880
        overflow= TRUE;
881
        break;
882
      }
883
      *to++= '\'';
884
      *to++= '\'';
885
    }
886
    else
887
    {
888
      if (to + 1 > to_end)
889
      {
890
        overflow= TRUE;
891
        break;
892
      }
893
      *to++= *from;
894
    }
895
  }
896
  *to= 0;
897
  return overflow ? (ulong)~0 : (ulong) (to - to_start);
898
}