~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
994.2.4 by Monty Taylor
Blast. Fixed some make distcheck issues.
16
#include "mysys/mysys_priv.h"
17
#include "mysys/mysys_err.h"
212.5.18 by Monty Taylor
Moved m_ctype, m_string and my_bitmap. Removed t_ctype.
18
#include <mystrings/m_ctype.h>
19
#include <mystrings/m_string.h>
722.1.4 by Monty Taylor
Removed all the setting of DEFS everywhere. Use configmake.h to get the values
20
#include <drizzled/configmake.h>
1 by brian
clean slate
21
22
23
/*
24
  The code below implements this functionality:
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
25
1 by brian
clean slate
26
    - Initializing charset related structures
27
    - Loading dynamic charsets
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
28
    - Searching for a proper CHARSET_INFO
1 by brian
clean slate
29
      using charset name, collation name or collation ID
30
    - Setting server default character set
31
*/
32
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
33
bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2)
1 by brian
clean slate
34
{
35
  return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
36
}
37
38
39
static uint
40
get_collation_number_internal(const char *name)
41
{
42
  CHARSET_INFO **cs;
43
  for (cs= all_charsets;
44
       cs < all_charsets+array_elements(all_charsets)-1 ;
45
       cs++)
46
  {
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
47
    if ( cs[0] && cs[0]->name &&
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
48
         !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
1 by brian
clean slate
49
      return cs[0]->number;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
50
  }
1 by brian
clean slate
51
  return 0;
52
}
53
54
146 by Brian Aker
my_bool cleanup.
55
static bool init_state_maps(CHARSET_INFO *cs)
1 by brian
clean slate
56
{
482 by Brian Aker
Remove uint.
57
  uint32_t i;
481 by Brian Aker
Remove all of uchar.
58
  unsigned char *state_map;
59
  unsigned char *ident_map;
1 by brian
clean slate
60
656.3.1 by Monty Taylor
Got rid of my_once_alloc.
61
  if (!(cs->state_map= (unsigned char*) malloc(256)))
1 by brian
clean slate
62
    return 1;
63
    
656.3.1 by Monty Taylor
Got rid of my_once_alloc.
64
  if (!(cs->ident_map= (unsigned char*) malloc(256)))
1 by brian
clean slate
65
    return 1;
66
67
  state_map= cs->state_map;
68
  ident_map= cs->ident_map;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
69
1 by brian
clean slate
70
  /* Fill state_map with states to get a faster parser */
71
  for (i=0; i < 256 ; i++)
72
  {
73
    if (my_isalpha(cs,i))
481 by Brian Aker
Remove all of uchar.
74
      state_map[i]=(unsigned char) MY_LEX_IDENT;
1 by brian
clean slate
75
    else if (my_isdigit(cs,i))
481 by Brian Aker
Remove all of uchar.
76
      state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT;
1 by brian
clean slate
77
#if defined(USE_MB) && defined(USE_MB_IDENT)
78
    else if (my_mbcharlen(cs, i)>1)
481 by Brian Aker
Remove all of uchar.
79
      state_map[i]=(unsigned char) MY_LEX_IDENT;
1 by brian
clean slate
80
#endif
81
    else if (my_isspace(cs,i))
481 by Brian Aker
Remove all of uchar.
82
      state_map[i]=(unsigned char) MY_LEX_SKIP;
1 by brian
clean slate
83
    else
481 by Brian Aker
Remove all of uchar.
84
      state_map[i]=(unsigned char) MY_LEX_CHAR;
1 by brian
clean slate
85
  }
481 by Brian Aker
Remove all of uchar.
86
  state_map[(unsigned char)'_']=state_map[(unsigned char)'$']=(unsigned char) MY_LEX_IDENT;
87
  state_map[(unsigned char)'\'']=(unsigned char) MY_LEX_STRING;
88
  state_map[(unsigned char)'.']=(unsigned char) MY_LEX_REAL_OR_POINT;
89
  state_map[(unsigned char)'>']=state_map[(unsigned char)'=']=state_map[(unsigned char)'!']= (unsigned char) MY_LEX_CMP_OP;
90
  state_map[(unsigned char)'<']= (unsigned char) MY_LEX_LONG_CMP_OP;
91
  state_map[(unsigned char)'&']=state_map[(unsigned char)'|']=(unsigned char) MY_LEX_BOOL;
92
  state_map[(unsigned char)'#']=(unsigned char) MY_LEX_COMMENT;
93
  state_map[(unsigned char)';']=(unsigned char) MY_LEX_SEMICOLON;
94
  state_map[(unsigned char)':']=(unsigned char) MY_LEX_SET_VAR;
95
  state_map[0]=(unsigned char) MY_LEX_EOL;
96
  state_map[(unsigned char)'\\']= (unsigned char) MY_LEX_ESCAPE;
97
  state_map[(unsigned char)'/']= (unsigned char) MY_LEX_LONG_COMMENT;
98
  state_map[(unsigned char)'*']= (unsigned char) MY_LEX_END_LONG_COMMENT;
99
  state_map[(unsigned char)'@']= (unsigned char) MY_LEX_USER_END;
100
  state_map[(unsigned char) '`']= (unsigned char) MY_LEX_USER_VARIABLE_DELIMITER;
101
  state_map[(unsigned char)'"']= (unsigned char) MY_LEX_STRING_OR_DELIMITER;
1 by brian
clean slate
102
103
  /*
104
    Create a second map to make it faster to find identifiers
105
  */
106
  for (i=0; i < 256 ; i++)
107
  {
481 by Brian Aker
Remove all of uchar.
108
    ident_map[i]= (unsigned char) (state_map[i] == MY_LEX_IDENT ||
1 by brian
clean slate
109
			   state_map[i] == MY_LEX_NUMBER_IDENT);
110
  }
111
112
  /* Special handling of hex and binary strings */
481 by Brian Aker
Remove all of uchar.
113
  state_map[(unsigned char)'x']= state_map[(unsigned char)'X']= (unsigned char) MY_LEX_IDENT_OR_HEX;
114
  state_map[(unsigned char)'b']= state_map[(unsigned char)'B']= (unsigned char) MY_LEX_IDENT_OR_BIN;
1 by brian
clean slate
115
  return 0;
116
}
117
118
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
119
static bool charset_initialized= false;
1 by brian
clean slate
120
121
CHARSET_INFO *all_charsets[256];
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
122
const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
1 by brian
clean slate
123
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
124
void add_compiled_collation(CHARSET_INFO * cs)
1 by brian
clean slate
125
{
126
  all_charsets[cs->number]= cs;
127
  cs->state|= MY_CS_AVAILABLE;
128
}
129
632.1.11 by Monty Taylor
Fixed Sun Studio warnings in mysys.
130
void *cs_alloc(size_t size)
1 by brian
clean slate
131
{
656.3.1 by Monty Taylor
Got rid of my_once_alloc.
132
  return malloc(size);
1 by brian
clean slate
133
}
134
135
146 by Brian Aker
my_bool cleanup.
136
static bool init_available_charsets(myf myflags)
1 by brian
clean slate
137
{
862 by Brian Aker
Remove charset directory code.
138
  bool error= false;
1 by brian
clean slate
139
  /*
140
    We have to use charset_initialized to not lock on THR_LOCK_charset
141
    inside get_internal_charset...
142
  */
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
143
  if (charset_initialized == false)
1 by brian
clean slate
144
  {
145
    CHARSET_INFO **cs;
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
146
    memset(&all_charsets, 0, sizeof(all_charsets));
147
    init_compiled_charsets(myflags);
148
149
    /* Copy compiled charsets */
150
    for (cs=all_charsets;
151
         cs < all_charsets+array_elements(all_charsets)-1 ;
152
         cs++)
1 by brian
clean slate
153
    {
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
154
      if (*cs)
1 by brian
clean slate
155
      {
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
156
        if (cs[0]->ctype)
157
          if (init_state_maps(*cs))
158
            *cs= NULL;
1 by brian
clean slate
159
      }
160
    }
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
161
162
    charset_initialized= true;
1 by brian
clean slate
163
  }
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
164
  assert(charset_initialized);
165
1 by brian
clean slate
166
  return error;
167
}
168
169
170
void free_charsets(void)
171
{
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
172
  charset_initialized= true;
1 by brian
clean slate
173
}
174
175
482 by Brian Aker
Remove uint.
176
uint32_t get_collation_number(const char *name)
1 by brian
clean slate
177
{
178
  init_available_charsets(MYF(0));
179
  return get_collation_number_internal(name);
180
}
181
182
482 by Brian Aker
Remove uint.
183
uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags)
1 by brian
clean slate
184
{
185
  CHARSET_INFO **cs;
186
  init_available_charsets(MYF(0));
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
187
1 by brian
clean slate
188
  for (cs= all_charsets;
189
       cs < all_charsets+array_elements(all_charsets)-1 ;
190
       cs++)
191
  {
192
    if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
193
         !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name))
1 by brian
clean slate
194
      return cs[0]->number;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
195
  }
1 by brian
clean slate
196
  return 0;
197
}
198
199
482 by Brian Aker
Remove uint.
200
const char *get_charset_name(uint32_t charset_number)
1 by brian
clean slate
201
{
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
202
  const CHARSET_INFO *cs;
1 by brian
clean slate
203
  init_available_charsets(MYF(0));
204
205
  cs=all_charsets[charset_number];
206
  if (cs && (cs->number == charset_number) && cs->name )
207
    return (char*) cs->name;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
208
1 by brian
clean slate
209
  return (char*) "?";   /* this mimics find_type() */
210
}
211
212
482 by Brian Aker
Remove uint.
213
static const CHARSET_INFO *get_internal_charset(uint32_t cs_number)
1 by brian
clean slate
214
{
215
  CHARSET_INFO *cs;
216
  /*
217
    To make things thread safe we are not allowing other threads to interfere
218
    while we may changing the cs_info_table
219
  */
220
  if ((cs= all_charsets[cs_number]))
221
  {
222
    if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED))
223
    {
383.1.7 by Brian Aker
Remove homebrew xml parser.
224
      assert(0);
1 by brian
clean slate
225
    }
226
    cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
227
  }
228
  if (cs && !(cs->state & MY_CS_READY))
229
  {
230
    if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
231
        (cs->coll->init && cs->coll->init(cs, cs_alloc)))
232
      cs= NULL;
233
    else
234
      cs->state|= MY_CS_READY;
235
  }
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
236
1 by brian
clean slate
237
  return cs;
238
}
239
240
862 by Brian Aker
Remove charset directory code.
241
const CHARSET_INFO *get_charset(uint32_t cs_number)
1 by brian
clean slate
242
{
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
243
  const CHARSET_INFO *cs;
1 by brian
clean slate
244
  if (cs_number == default_charset_info->number)
245
    return default_charset_info;
246
247
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
248
1 by brian
clean slate
249
  if (!cs_number || cs_number >= array_elements(all_charsets)-1)
250
    return NULL;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
251
383.1.7 by Brian Aker
Remove homebrew xml parser.
252
  cs= get_internal_charset(cs_number);
1 by brian
clean slate
253
254
  return cs;
255
}
256
862 by Brian Aker
Remove charset directory code.
257
const CHARSET_INFO *get_charset_by_name(const char *cs_name)
1 by brian
clean slate
258
{
482 by Brian Aker
Remove uint.
259
  uint32_t cs_number;
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
260
  const CHARSET_INFO *cs;
1 by brian
clean slate
261
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
262
1014.3.1 by Brian Aker
Simplify the calling stack for getting schema collation. We need to extend
263
  cs_number= get_collation_number(cs_name);
383.1.7 by Brian Aker
Remove homebrew xml parser.
264
  cs= cs_number ? get_internal_charset(cs_number) : NULL;
1 by brian
clean slate
265
266
  return cs;
267
}
268
269
862 by Brian Aker
Remove charset directory code.
270
const CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint32_t cs_flags)
1 by brian
clean slate
271
{
482 by Brian Aker
Remove uint.
272
  uint32_t cs_number;
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
273
  const CHARSET_INFO *cs;
1 by brian
clean slate
274
275
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
276
277
  cs_number= get_charset_number(cs_name, cs_flags);
383.1.7 by Brian Aker
Remove homebrew xml parser.
278
  cs= cs_number ? get_internal_charset(cs_number) : NULL;
1 by brian
clean slate
279
51.3.22 by Jay Pipes
Final round of removal of DBUG in mysys/, including Makefile
280
  return(cs);
1 by brian
clean slate
281
}
282
283
284
/*
285
  Escape apostrophes by doubling them up
286
287
  SYNOPSIS
236.3.4 by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle
288
    escape_quotes_for_drizzle()
1 by brian
clean slate
289
    charset_info        Charset of the strings
290
    to                  Buffer for escaped string
291
    to_length           Length of destination buffer, or 0
292
    from                The string to escape
293
    length              The length of the string to escape
294
295
  DESCRIPTION
296
    This escapes the contents of a string by doubling up any apostrophes that
297
    it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
298
    effect on the server.
299
300
  NOTE
301
    To be consistent with escape_string_for_mysql(), to_length may be 0 to
302
    mean "big enough"
303
304
  RETURN VALUES
365.2.9 by Monty Taylor
Got rid of all instances of ~0
305
    UINT32_MAX  The escaped string did not fit in the to buffer
1 by brian
clean slate
306
    >=0         The length of the escaped string
307
*/
308
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
309
size_t escape_quotes_for_drizzle(const CHARSET_INFO *charset_info,
236.3.4 by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle
310
                                 char *to, size_t to_length,
311
                                 const char *from, size_t length)
1 by brian
clean slate
312
{
313
  const char *to_start= to;
314
  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
163 by Brian Aker
Merge Monty's code.
315
  bool overflow= false;
1 by brian
clean slate
316
#ifdef USE_MB
146 by Brian Aker
my_bool cleanup.
317
  bool use_mb_flag= use_mb(charset_info);
1 by brian
clean slate
318
#endif
319
  for (end= from + length; from < end; from++)
320
  {
321
#ifdef USE_MB
322
    int tmp_length;
323
    if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
324
    {
325
      if (to + tmp_length > to_end)
326
      {
163 by Brian Aker
Merge Monty's code.
327
        overflow= true;
1 by brian
clean slate
328
        break;
329
      }
330
      while (tmp_length--)
331
	*to++= *from++;
332
      from--;
333
      continue;
334
    }
335
    /*
336
      We don't have the same issue here with a non-multi-byte character being
337
      turned into a multi-byte character by the addition of an escaping
338
      character, because we are only escaping the ' character with itself.
339
     */
340
#endif
341
    if (*from == '\'')
342
    {
343
      if (to + 2 > to_end)
344
      {
163 by Brian Aker
Merge Monty's code.
345
        overflow= true;
1 by brian
clean slate
346
        break;
347
      }
348
      *to++= '\'';
349
      *to++= '\'';
350
    }
351
    else
352
    {
353
      if (to + 1 > to_end)
354
      {
163 by Brian Aker
Merge Monty's code.
355
        overflow= true;
1 by brian
clean slate
356
        break;
357
      }
358
      *to++= *from;
359
    }
360
  }
361
  *to= 0;
365.2.9 by Monty Taylor
Got rid of all instances of ~0
362
  return overflow ? UINT32_MAX : (uint32_t) (to - to_start);
1 by brian
clean slate
363
}