~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
#include "mysys_priv.h"
17
#include "mysys_err.h"
18
#include <m_ctype.h>
19
#include <m_string.h>
20
#include <my_dir.h>
21
#include <my_xml.h>
22
23
24
/*
25
  The code below implements this functionality:
26
  
27
    - Initializing charset related structures
28
    - Loading dynamic charsets
29
    - Searching for a proper CHARSET_INFO 
30
      using charset name, collation name or collation ID
31
    - Setting server default character set
32
*/
33
146 by Brian Aker
my_bool cleanup.
34
bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
1 by brian
clean slate
35
{
36
  return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
37
}
38
39
40
static uint
41
get_collation_number_internal(const char *name)
42
{
43
  CHARSET_INFO **cs;
44
  for (cs= all_charsets;
45
       cs < all_charsets+array_elements(all_charsets)-1 ;
46
       cs++)
47
  {
48
    if ( cs[0] && cs[0]->name && 
49
         !my_strcasecmp(&my_charset_latin1, cs[0]->name, name))
50
      return cs[0]->number;
51
  }  
52
  return 0;
53
}
54
55
146 by Brian Aker
my_bool cleanup.
56
static bool init_state_maps(CHARSET_INFO *cs)
1 by brian
clean slate
57
{
58
  uint i;
59
  uchar *state_map;
60
  uchar *ident_map;
61
62
  if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
63
    return 1;
64
    
65
  if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
66
    return 1;
67
68
  state_map= cs->state_map;
69
  ident_map= cs->ident_map;
70
  
71
  /* Fill state_map with states to get a faster parser */
72
  for (i=0; i < 256 ; i++)
73
  {
74
    if (my_isalpha(cs,i))
75
      state_map[i]=(uchar) MY_LEX_IDENT;
76
    else if (my_isdigit(cs,i))
77
      state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
78
#if defined(USE_MB) && defined(USE_MB_IDENT)
79
    else if (my_mbcharlen(cs, i)>1)
80
      state_map[i]=(uchar) MY_LEX_IDENT;
81
#endif
82
    else if (my_isspace(cs,i))
83
      state_map[i]=(uchar) MY_LEX_SKIP;
84
    else
85
      state_map[i]=(uchar) MY_LEX_CHAR;
86
  }
87
  state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT;
88
  state_map[(uchar)'\'']=(uchar) MY_LEX_STRING;
89
  state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT;
90
  state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP;
91
  state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP;
92
  state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL;
93
  state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT;
94
  state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON;
95
  state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR;
96
  state_map[0]=(uchar) MY_LEX_EOL;
97
  state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE;
98
  state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT;
99
  state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT;
100
  state_map[(uchar)'@']= (uchar) MY_LEX_USER_END;
101
  state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER;
102
  state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER;
103
104
  /*
105
    Create a second map to make it faster to find identifiers
106
  */
107
  for (i=0; i < 256 ; i++)
108
  {
109
    ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT ||
110
			   state_map[i] == MY_LEX_NUMBER_IDENT);
111
  }
112
113
  /* Special handling of hex and binary strings */
114
  state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
115
  state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN;
116
  state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR;
117
  return 0;
118
}
119
120
121
static void simple_cs_init_functions(CHARSET_INFO *cs)
122
{
123
  if (cs->state & MY_CS_BINSORT)
124
    cs->coll= &my_collation_8bit_bin_handler;
125
  else
126
    cs->coll= &my_collation_8bit_simple_ci_handler;
127
  
128
  cs->cset= &my_charset_8bit_handler;
129
}
130
131
132
133
static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
134
{
135
  to->number= from->number ? from->number : to->number;
136
137
  if (from->csname)
138
    if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
139
      goto err;
140
  
141
  if (from->name)
142
    if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
143
      goto err;
144
  
145
  if (from->comment)
146
    if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
147
      goto err;
148
  
149
  if (from->ctype)
150
  {
151
    if (!(to->ctype= (uchar*) my_once_memdup((char*) from->ctype,
152
					     MY_CS_CTYPE_TABLE_SIZE,
153
					     MYF(MY_WME))))
154
      goto err;
155
    if (init_state_maps(to))
156
      goto err;
157
  }
158
  if (from->to_lower)
159
    if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
160
						MY_CS_TO_LOWER_TABLE_SIZE,
161
						MYF(MY_WME))))
162
      goto err;
163
164
  if (from->to_upper)
165
    if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper,
166
						MY_CS_TO_UPPER_TABLE_SIZE,
167
						MYF(MY_WME))))
168
      goto err;
169
  if (from->sort_order)
170
  {
171
    if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order,
172
						  MY_CS_SORT_ORDER_TABLE_SIZE,
173
						  MYF(MY_WME))))
174
      goto err;
175
176
  }
177
  if (from->tab_to_uni)
178
  {
179
    uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16);
180
    if (!(to->tab_to_uni= (uint16*)  my_once_memdup((char*)from->tab_to_uni,
181
						    sz, MYF(MY_WME))))
182
      goto err;
183
  }
184
  if (from->tailoring)
185
    if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
186
      goto err;
187
188
  return 0;
189
190
err:
191
  return 1;
192
}
193
194
195
146 by Brian Aker
my_bool cleanup.
196
static bool simple_cs_is_full(CHARSET_INFO *cs)
1 by brian
clean slate
197
{
198
  return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
199
	   cs->to_lower) &&
200
	  (cs->number && cs->name &&
201
	  (cs->sort_order || (cs->state & MY_CS_BINSORT) )));
202
}
203
204
205
static void
206
copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from)
207
{
208
  to->cset= from->cset;
209
  to->coll= from->coll;
210
  to->strxfrm_multiply= from->strxfrm_multiply;
211
  to->min_sort_char= from->min_sort_char;
212
  to->max_sort_char= from->max_sort_char;
213
  to->mbminlen= from->mbminlen;
214
  to->mbmaxlen= from->mbmaxlen;
215
}
216
217
218
static int add_collation(CHARSET_INFO *cs)
219
{
220
  if (cs->name && (cs->number ||
221
                   (cs->number=get_collation_number_internal(cs->name))))
222
  {
223
    if (!all_charsets[cs->number])
224
    {
225
      if (!(all_charsets[cs->number]=
226
         (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
227
        return MY_XML_ERROR;
228
      bzero((void*)all_charsets[cs->number],sizeof(CHARSET_INFO));
229
    }
230
    
231
    if (cs->primary_number == cs->number)
232
      cs->state |= MY_CS_PRIMARY;
233
      
234
    if (cs->binary_number == cs->number)
235
      cs->state |= MY_CS_BINSORT;
236
    
237
    all_charsets[cs->number]->state|= cs->state;
238
    
239
    if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
240
    {
241
      CHARSET_INFO *newcs= all_charsets[cs->number];
242
      if (cs_copy_data(all_charsets[cs->number],cs))
243
        return MY_XML_ERROR;
244
245
      newcs->levels_for_compare= 1;
246
      newcs->levels_for_order= 1;
247
      
248
      if (!strcmp(cs->csname,"ucs2") )
249
      {
250
#if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
251
        copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
252
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
253
#endif        
254
      }
255
      else if (!strcmp(cs->csname, "utf8"))
256
      {
257
#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)
258
        copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
259
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
260
#endif
261
      }
262
      else if (!strcmp(cs->csname, "utf8mb3"))
263
      {
264
#if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)
265
        copy_uca_collation(newcs, &my_charset_utf8mb3_unicode_ci);
266
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
267
#endif
268
      }
269
      else if (!strcmp(cs->csname, "utf16"))
270
      {
271
#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
272
        copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
273
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
274
#endif
275
      }
276
      else if (!strcmp(cs->csname, "utf32"))
277
      {
278
#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
279
        copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
280
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
281
#endif
282
      }
283
      else
284
      {
285
        uchar *sort_order= all_charsets[cs->number]->sort_order;
286
        simple_cs_init_functions(all_charsets[cs->number]);
287
        newcs->mbminlen= 1;
288
        newcs->mbmaxlen= 1;
289
        if (simple_cs_is_full(all_charsets[cs->number]))
290
        {
291
          all_charsets[cs->number]->state |= MY_CS_LOADED;
292
        }
293
        all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
294
        
295
        /*
296
          Check if case sensitive sort order: A < a < B.
297
          We need MY_CS_FLAG for regex library, and for
298
          case sensitivity flag for 5.0 client protocol,
299
          to support isCaseSensitive() method in JDBC driver 
300
        */
301
        if (sort_order && sort_order['A'] < sort_order['a'] &&
302
                          sort_order['a'] < sort_order['B'])
303
          all_charsets[cs->number]->state|= MY_CS_CSSORT; 
304
305
        if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
306
          all_charsets[cs->number]->state|= MY_CS_PUREASCII;
307
        if (!my_charset_is_ascii_compatible(cs))
308
          all_charsets[cs->number]->state|= MY_CS_NONASCII;
309
      }
310
    }
311
    else
312
    {
313
      /*
314
        We need the below to make get_charset_name()
315
        and get_charset_number() working even if a
316
        character set has not been really incompiled.
317
        The above functions are used for example
318
        in error message compiler extra/comp_err.c.
319
        If a character set was compiled, this information
320
        will get lost and overwritten in add_compiled_collation().
321
      */
322
      CHARSET_INFO *dst= all_charsets[cs->number];
323
      dst->number= cs->number;
324
      if (cs->comment)
325
	if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
326
	  return MY_XML_ERROR;
327
      if (cs->csname && !dst->csname)
328
        if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
329
	  return MY_XML_ERROR;
330
      if (cs->name && !dst->name)
331
	if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME))))
332
	  return MY_XML_ERROR;
333
    }
334
    cs->number= 0;
335
    cs->primary_number= 0;
336
    cs->binary_number= 0;
337
    cs->name= NULL;
338
    cs->state= 0;
339
    cs->sort_order= NULL;
340
    cs->state= 0;
341
  }
342
  return MY_XML_OK;
343
}
344
345
346
#define MY_MAX_ALLOWED_BUF 1024*1024
347
#define MY_CHARSET_INDEX "Index.xml"
348
349
const char *charsets_dir= NULL;
350
static int charset_initialized=0;
351
352
146 by Brian Aker
my_bool cleanup.
353
static bool my_read_charset_file(const char *filename, myf myflags)
1 by brian
clean slate
354
{
355
  uchar *buf;
356
  int  fd;
357
  uint len, tmp_len;
15 by brian
Fix for stat, NETWARE removal
358
  struct stat stat_info;
1 by brian
clean slate
359
  
15 by brian
Fix for stat, NETWARE removal
360
  if (stat(filename, &stat_info) ||
1 by brian
clean slate
361
       ((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) ||
362
       !(buf= (uchar*) my_malloc(len,myflags)))
163 by Brian Aker
Merge Monty's code.
363
    return true;
1 by brian
clean slate
364
  
365
  if ((fd=my_open(filename,O_RDONLY,myflags)) < 0)
366
    goto error;
367
  tmp_len=my_read(fd, buf, len, myflags);
368
  my_close(fd,myflags);
369
  if (tmp_len != len)
370
    goto error;
371
  
372
  if (my_parse_charset_xml((char*) buf,len,add_collation))
373
  {
374
#ifdef NOT_YET
375
    printf("ERROR at line %d pos %d '%s'\n",
376
	   my_xml_error_lineno(&p)+1,
377
	   my_xml_error_pos(&p),
378
	   my_xml_error_string(&p));
379
#endif
380
  }
381
  
382
  my_free(buf, myflags);
163 by Brian Aker
Merge Monty's code.
383
  return false;
1 by brian
clean slate
384
385
error:
386
  my_free(buf, myflags);
163 by Brian Aker
Merge Monty's code.
387
  return true;
1 by brian
clean slate
388
}
389
390
391
char *get_charsets_dir(char *buf)
392
{
393
  const char *sharedir= SHAREDIR;
394
  char *res;
395
396
  if (charsets_dir != NULL)
397
    strmake(buf, charsets_dir, FN_REFLEN-1);
398
  else
399
  {
400
    if (test_if_hard_path(sharedir) ||
401
	is_prefix(sharedir, DEFAULT_CHARSET_HOME))
402
      strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);
403
    else
404
      strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
405
	      NullS);
406
  }
407
  res= convert_dirname(buf,buf,NullS);
51.3.22 by Jay Pipes
Final round of removal of DBUG in mysys/, including Makefile
408
  return(res);
1 by brian
clean slate
409
}
410
411
CHARSET_INFO *all_charsets[256];
412
CHARSET_INFO *default_charset_info = &my_charset_latin1;
413
414
void add_compiled_collation(CHARSET_INFO *cs)
415
{
416
  all_charsets[cs->number]= cs;
417
  cs->state|= MY_CS_AVAILABLE;
418
}
419
420
static void *cs_alloc(size_t size)
421
{
422
  return my_once_alloc(size, MYF(MY_WME));
423
}
424
425
146 by Brian Aker
my_bool cleanup.
426
static bool init_available_charsets(myf myflags)
1 by brian
clean slate
427
{
428
  char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
163 by Brian Aker
Merge Monty's code.
429
  bool error=false;
1 by brian
clean slate
430
  /*
431
    We have to use charset_initialized to not lock on THR_LOCK_charset
432
    inside get_internal_charset...
433
  */
434
  if (!charset_initialized)
435
  {
436
    CHARSET_INFO **cs;
437
    /*
438
      To make things thread safe we are not allowing other threads to interfere
439
      while we may changing the cs_info_table
440
    */
441
    pthread_mutex_lock(&THR_LOCK_charset);
442
    if (!charset_initialized)
443
    {
444
      bzero(&all_charsets,sizeof(all_charsets));
445
      init_compiled_charsets(myflags);
446
      
447
      /* Copy compiled charsets */
448
      for (cs=all_charsets;
449
           cs < all_charsets+array_elements(all_charsets)-1 ;
450
           cs++)
451
      {
452
        if (*cs)
453
        {
454
          if (cs[0]->ctype)
455
            if (init_state_maps(*cs))
456
              *cs= NULL;
457
        }
458
      }
459
      
460
      strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);
461
      error= my_read_charset_file(fname,myflags);
462
      charset_initialized=1;
463
    }
464
    pthread_mutex_unlock(&THR_LOCK_charset);
465
  }
466
  return error;
467
}
468
469
470
void free_charsets(void)
471
{
472
  charset_initialized=0;
473
}
474
475
476
uint get_collation_number(const char *name)
477
{
478
  init_available_charsets(MYF(0));
479
  return get_collation_number_internal(name);
480
}
481
482
483
uint get_charset_number(const char *charset_name, uint cs_flags)
484
{
485
  CHARSET_INFO **cs;
486
  init_available_charsets(MYF(0));
487
  
488
  for (cs= all_charsets;
489
       cs < all_charsets+array_elements(all_charsets)-1 ;
490
       cs++)
491
  {
492
    if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
493
         !my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name))
494
      return cs[0]->number;
495
  }  
496
  return 0;
497
}
498
499
500
const char *get_charset_name(uint charset_number)
501
{
502
  CHARSET_INFO *cs;
503
  init_available_charsets(MYF(0));
504
505
  cs=all_charsets[charset_number];
506
  if (cs && (cs->number == charset_number) && cs->name )
507
    return (char*) cs->name;
508
  
509
  return (char*) "?";   /* this mimics find_type() */
510
}
511
512
513
static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
514
{
515
  char  buf[FN_REFLEN];
516
  CHARSET_INFO *cs;
517
  /*
518
    To make things thread safe we are not allowing other threads to interfere
519
    while we may changing the cs_info_table
520
  */
521
  pthread_mutex_lock(&THR_LOCK_charset);
522
  if ((cs= all_charsets[cs_number]))
523
  {
524
    if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED))
525
    {
526
      strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS);
527
      my_read_charset_file(buf,flags);
528
    }
529
    cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
530
  }
531
  if (cs && !(cs->state & MY_CS_READY))
532
  {
533
    if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
534
        (cs->coll->init && cs->coll->init(cs, cs_alloc)))
535
      cs= NULL;
536
    else
537
      cs->state|= MY_CS_READY;
538
  }
539
  pthread_mutex_unlock(&THR_LOCK_charset);
540
  return cs;
541
}
542
543
544
CHARSET_INFO *get_charset(uint cs_number, myf flags)
545
{
546
  CHARSET_INFO *cs;
547
  if (cs_number == default_charset_info->number)
548
    return default_charset_info;
549
550
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
551
  
552
  if (!cs_number || cs_number >= array_elements(all_charsets)-1)
553
    return NULL;
554
  
555
  cs=get_internal_charset(cs_number, flags);
556
557
  if (!cs && (flags & MY_WME))
558
  {
559
    char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
560
    strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
561
    cs_string[0]='#';
562
    int10_to_str(cs_number, cs_string+1, 10);
563
    my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
564
  }
565
  return cs;
566
}
567
568
CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
569
{
570
  uint cs_number;
571
  CHARSET_INFO *cs;
572
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
573
574
  cs_number=get_collation_number(cs_name);
575
  cs= cs_number ? get_internal_charset(cs_number,flags) : NULL;
576
577
  if (!cs && (flags & MY_WME))
578
  {
579
    char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
580
    strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
581
    my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file);
582
  }
583
584
  return cs;
585
}
586
587
588
CHARSET_INFO *get_charset_by_csname(const char *cs_name,
589
				    uint cs_flags,
590
				    myf flags)
591
{
592
  uint cs_number;
593
  CHARSET_INFO *cs;
594
595
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
596
597
  cs_number= get_charset_number(cs_name, cs_flags);
598
  cs= cs_number ? get_internal_charset(cs_number, flags) : NULL;
599
600
  if (!cs && (flags & MY_WME))
601
  {
602
    char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
603
    strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
604
    my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
605
  }
606
51.3.22 by Jay Pipes
Final round of removal of DBUG in mysys/, including Makefile
607
  return(cs);
1 by brian
clean slate
608
}
609
610
611
/**
612
  Resolve character set by the character set name (utf8, latin1, ...).
613
614
  The function tries to resolve character set by the specified name. If
615
  there is character set with the given name, it is assigned to the "cs"
163 by Brian Aker
Merge Monty's code.
616
  parameter and false is returned. If there is no such character set,
617
  "default_cs" is assigned to the "cs" and true is returned.
1 by brian
clean slate
618
619
  @param[in] cs_name    Character set name.
620
  @param[in] default_cs Default character set.
621
  @param[out] cs        Variable to store character set.
622
163 by Brian Aker
Merge Monty's code.
623
  @return false if character set was resolved successfully; true if there
1 by brian
clean slate
624
  is no character set with given name.
625
*/
626
146 by Brian Aker
my_bool cleanup.
627
bool resolve_charset(const char *cs_name,
1 by brian
clean slate
628
                        CHARSET_INFO *default_cs,
629
                        CHARSET_INFO **cs)
630
{
631
  *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
632
633
  if (*cs == NULL)
634
  {
635
    *cs= default_cs;
163 by Brian Aker
Merge Monty's code.
636
    return true;
1 by brian
clean slate
637
  }
638
163 by Brian Aker
Merge Monty's code.
639
  return false;
1 by brian
clean slate
640
}
641
642
643
/**
644
  Resolve collation by the collation name (utf8_general_ci, ...).
645
646
  The function tries to resolve collation by the specified name. If there
647
  is collation with the given name, it is assigned to the "cl" parameter
163 by Brian Aker
Merge Monty's code.
648
  and false is returned. If there is no such collation, "default_cl" is
649
  assigned to the "cl" and true is returned.
1 by brian
clean slate
650
651
  @param[out] cl        Variable to store collation.
652
  @param[in] cl_name    Collation name.
653
  @param[in] default_cl Default collation.
654
163 by Brian Aker
Merge Monty's code.
655
  @return false if collation was resolved successfully; true if there is no
1 by brian
clean slate
656
  collation with given name.
657
*/
658
146 by Brian Aker
my_bool cleanup.
659
bool resolve_collation(const char *cl_name,
1 by brian
clean slate
660
                          CHARSET_INFO *default_cl,
661
                          CHARSET_INFO **cl)
662
{
663
  *cl= get_charset_by_name(cl_name, MYF(0));
664
665
  if (*cl == NULL)
666
  {
667
    *cl= default_cl;
163 by Brian Aker
Merge Monty's code.
668
    return true;
1 by brian
clean slate
669
  }
670
163 by Brian Aker
Merge Monty's code.
671
  return false;
1 by brian
clean slate
672
}
673
674
675
/*
676
  Escape string with backslashes (\)
677
678
  SYNOPSIS
679
    escape_string_for_mysql()
680
    charset_info        Charset of the strings
681
    to                  Buffer for escaped string
682
    to_length           Length of destination buffer, or 0
683
    from                The string to escape
684
    length              The length of the string to escape
685
686
  DESCRIPTION
687
    This escapes the contents of a string by adding backslashes before special
688
    characters, and turning others into specific escape sequences, such as
689
    turning newlines into \n and null bytes into \0.
690
691
  NOTE
692
    To maintain compatibility with the old C API, to_length may be 0 to mean
693
    "big enough"
694
695
  RETURN VALUES
696
    (size_t) -1 The escaped string did not fit in the to buffer
697
    #           The length of the escaped string
698
*/
699
700
size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
701
                               char *to, size_t to_length,
702
                               const char *from, size_t length)
703
{
704
  const char *to_start= to;
705
  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
163 by Brian Aker
Merge Monty's code.
706
  bool overflow= false;
1 by brian
clean slate
707
#ifdef USE_MB
146 by Brian Aker
my_bool cleanup.
708
  bool use_mb_flag= use_mb(charset_info);
1 by brian
clean slate
709
#endif
710
  for (end= from + length; from < end; from++)
711
  {
712
    char escape= 0;
713
#ifdef USE_MB
714
    int tmp_length;
715
    if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
716
    {
717
      if (to + tmp_length > to_end)
718
      {
163 by Brian Aker
Merge Monty's code.
719
        overflow= true;
1 by brian
clean slate
720
        break;
721
      }
722
      while (tmp_length--)
723
	*to++= *from++;
724
      from--;
725
      continue;
726
    }
727
    /*
728
     If the next character appears to begin a multi-byte character, we
729
     escape that first byte of that apparent multi-byte character. (The
730
     character just looks like a multi-byte character -- if it were actually
731
     a multi-byte character, it would have been passed through in the test
732
     above.)
733
734
     Without this check, we can create a problem by converting an invalid
735
     multi-byte character into a valid one. For example, 0xbf27 is not
736
     a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
737
    */
738
    if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
739
      escape= *from;
740
    else
741
#endif
742
    switch (*from) {
743
    case 0:				/* Must be escaped for 'mysql' */
744
      escape= '0';
745
      break;
746
    case '\n':				/* Must be escaped for logs */
747
      escape= 'n';
748
      break;
749
    case '\r':
750
      escape= 'r';
751
      break;
752
    case '\\':
753
      escape= '\\';
754
      break;
755
    case '\'':
756
      escape= '\'';
757
      break;
758
    case '"':				/* Better safe than sorry */
759
      escape= '"';
760
      break;
761
    case '\032':			/* This gives problems on Win32 */
762
      escape= 'Z';
763
      break;
764
    }
765
    if (escape)
766
    {
767
      if (to + 2 > to_end)
768
      {
163 by Brian Aker
Merge Monty's code.
769
        overflow= true;
1 by brian
clean slate
770
        break;
771
      }
772
      *to++= '\\';
773
      *to++= escape;
774
    }
775
    else
776
    {
777
      if (to + 1 > to_end)
778
      {
163 by Brian Aker
Merge Monty's code.
779
        overflow= true;
1 by brian
clean slate
780
        break;
781
      }
782
      *to++= *from;
783
    }
784
  }
785
  *to= 0;
786
  return overflow ? (size_t) -1 : (size_t) (to - to_start);
787
}
788
789
790
#ifdef BACKSLASH_MBTAIL
791
static CHARSET_INFO *fs_cset_cache= NULL;
792
793
CHARSET_INFO *fs_character_set()
794
{
795
  if (!fs_cset_cache)
796
  {
797
    char buf[10]= "cp";
798
    GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
799
                  buf+2, sizeof(buf)-3);
800
    /*
801
      We cannot call get_charset_by_name here
802
      because fs_character_set() is executed before
803
      LOCK_THD_charset mutex initialization, which
804
      is used inside get_charset_by_name.
805
      As we're now interested in cp932 only,
806
      let's just detect it using strcmp().
807
    */
808
    fs_cset_cache= !strcmp(buf, "cp932") ?
809
                   &my_charset_cp932_japanese_ci : &my_charset_bin;
810
  }
811
  return fs_cset_cache;
812
}
813
#endif
814
815
/*
816
  Escape apostrophes by doubling them up
817
818
  SYNOPSIS
819
    escape_quotes_for_mysql()
820
    charset_info        Charset of the strings
821
    to                  Buffer for escaped string
822
    to_length           Length of destination buffer, or 0
823
    from                The string to escape
824
    length              The length of the string to escape
825
826
  DESCRIPTION
827
    This escapes the contents of a string by doubling up any apostrophes that
828
    it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
829
    effect on the server.
830
831
  NOTE
832
    To be consistent with escape_string_for_mysql(), to_length may be 0 to
833
    mean "big enough"
834
835
  RETURN VALUES
836
    ~0          The escaped string did not fit in the to buffer
837
    >=0         The length of the escaped string
838
*/
839
840
size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
841
                               char *to, size_t to_length,
842
                               const char *from, size_t length)
843
{
844
  const char *to_start= to;
845
  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
163 by Brian Aker
Merge Monty's code.
846
  bool overflow= false;
1 by brian
clean slate
847
#ifdef USE_MB
146 by Brian Aker
my_bool cleanup.
848
  bool use_mb_flag= use_mb(charset_info);
1 by brian
clean slate
849
#endif
850
  for (end= from + length; from < end; from++)
851
  {
852
#ifdef USE_MB
853
    int tmp_length;
854
    if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
855
    {
856
      if (to + tmp_length > to_end)
857
      {
163 by Brian Aker
Merge Monty's code.
858
        overflow= true;
1 by brian
clean slate
859
        break;
860
      }
861
      while (tmp_length--)
862
	*to++= *from++;
863
      from--;
864
      continue;
865
    }
866
    /*
867
      We don't have the same issue here with a non-multi-byte character being
868
      turned into a multi-byte character by the addition of an escaping
869
      character, because we are only escaping the ' character with itself.
870
     */
871
#endif
872
    if (*from == '\'')
873
    {
874
      if (to + 2 > to_end)
875
      {
163 by Brian Aker
Merge Monty's code.
876
        overflow= true;
1 by brian
clean slate
877
        break;
878
      }
879
      *to++= '\'';
880
      *to++= '\'';
881
    }
882
    else
883
    {
884
      if (to + 1 > to_end)
885
      {
163 by Brian Aker
Merge Monty's code.
886
        overflow= true;
1 by brian
clean slate
887
        break;
888
      }
889
      *to++= *from;
890
    }
891
  }
892
  *to= 0;
893
  return overflow ? (ulong)~0 : (ulong) (to - to_start);
894
}