~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
1802.10.2 by Monty Taylor
Update all of the copyright headers to include the correct address.
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
1 by brian
clean slate
15
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
16
#include <config.h>
1241.9.57 by Monty Taylor
Oy. Bigger change than I normally like - but this stuff is all intertwined.
17
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
18
#include <drizzled/charset.h>
19
#include <drizzled/error.h>
20
#include <drizzled/charset_info.h>
21
#include <drizzled/internal/m_string.h>
722.1.4 by Monty Taylor
Removed all the setting of DEFS everywhere. Use configmake.h to get the values
22
#include <drizzled/configmake.h>
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
23
#include <vector>
24
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
25
#include <drizzled/visibility.h>
2119.4.1 by Monty Taylor
Turns on -fvisibility=hidden by default. Symbols intended to be used by
26
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
27
using namespace std;
28
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
29
namespace drizzled
30
{
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
31
32
/*
33
  We collect memory in this vector that we free on delete.
34
*/
2160.1.2 by Olaf van der Spek
casts
35
static vector<unsigned char*> memory_vector;
1 by brian
clean slate
36
37
/*
38
  The code below implements this functionality:
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
39
1 by brian
clean slate
40
    - Initializing charset related structures
41
    - Loading dynamic charsets
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
42
    - Searching for a proper CHARSET_INFO
1 by brian
clean slate
43
      using charset name, collation name or collation ID
44
    - Setting server default character set
45
*/
46
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
47
bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2)
1 by brian
clean slate
48
{
49
  return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
50
}
51
52
53
static uint
54
get_collation_number_internal(const char *name)
55
{
2160.1.2 by Olaf van der Spek
casts
56
  for (CHARSET_INFO **cs= all_charsets;
57
       cs < all_charsets+array_elements(all_charsets)-1;
1 by brian
clean slate
58
       cs++)
59
  {
2085.2.3 by Brian Aker
Fix strcasecmp issues (ie, check UTF-8).
60
    if ( cs[0] && cs[0]->name && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
61
    {
1 by brian
clean slate
62
      return cs[0]->number;
2085.2.3 by Brian Aker
Fix strcasecmp issues (ie, check UTF-8).
63
    }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
64
  }
1 by brian
clean slate
65
  return 0;
66
}
67
2160.1.2 by Olaf van der Spek
casts
68
static unsigned char *cs_alloc(size_t size)
69
{
70
  memory_vector.push_back(new unsigned char[size]);
71
  return memory_vector.back();
72
}
1 by brian
clean slate
73
146 by Brian Aker
my_bool cleanup.
74
static bool init_state_maps(CHARSET_INFO *cs)
1 by brian
clean slate
75
{
2160.1.2 by Olaf van der Spek
casts
76
  if (!(cs->state_map= cs_alloc(256)))
1 by brian
clean slate
77
    return 1;
78
    
2160.1.2 by Olaf van der Spek
casts
79
  if (!(cs->ident_map= cs_alloc(256)))
1 by brian
clean slate
80
    return 1;
81
2160.1.2 by Olaf van der Spek
casts
82
  unsigned char *state_map= cs->state_map;
83
  unsigned char *ident_map= cs->ident_map;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
84
1 by brian
clean slate
85
  /* Fill state_map with states to get a faster parser */
2160.1.2 by Olaf van der Spek
casts
86
  for (int i= 0; i < 256; i++)
1 by brian
clean slate
87
  {
88
    if (my_isalpha(cs,i))
2160.1.2 by Olaf van der Spek
casts
89
      state_map[i]= MY_LEX_IDENT;
1 by brian
clean slate
90
    else if (my_isdigit(cs,i))
2160.1.2 by Olaf van der Spek
casts
91
      state_map[i]= MY_LEX_NUMBER_IDENT;
1 by brian
clean slate
92
    else if (my_mbcharlen(cs, i)>1)
2160.1.2 by Olaf van der Spek
casts
93
      state_map[i]= MY_LEX_IDENT;
1 by brian
clean slate
94
    else if (my_isspace(cs,i))
2160.1.2 by Olaf van der Spek
casts
95
      state_map[i]= MY_LEX_SKIP;
1 by brian
clean slate
96
    else
2160.1.2 by Olaf van der Spek
casts
97
      state_map[i]= MY_LEX_CHAR;
1 by brian
clean slate
98
  }
2160.1.2 by Olaf van der Spek
casts
99
  state_map['_']=state_map['$']= MY_LEX_IDENT;
100
  state_map['\'']= MY_LEX_STRING;
101
  state_map['.']= MY_LEX_REAL_OR_POINT;
102
  state_map['>']=state_map['=']=state_map['!']=  MY_LEX_CMP_OP;
103
  state_map['<']=  MY_LEX_LONG_CMP_OP;
104
  state_map['&']=state_map['|']= MY_LEX_BOOL;
105
  state_map['#']= MY_LEX_COMMENT;
106
  state_map[';']= MY_LEX_SEMICOLON;
107
  state_map[':']= MY_LEX_SET_VAR;
108
  state_map[0]= MY_LEX_EOL;
109
  state_map['\\']=  MY_LEX_ESCAPE;
110
  state_map['/']=  MY_LEX_LONG_COMMENT;
111
  state_map['*']=  MY_LEX_END_LONG_COMMENT;
112
  state_map['@']=  MY_LEX_USER_END;
113
  state_map['`']=  MY_LEX_USER_VARIABLE_DELIMITER;
114
  state_map['"']=  MY_LEX_STRING_OR_DELIMITER;
1 by brian
clean slate
115
116
  /*
117
    Create a second map to make it faster to find identifiers
118
  */
2160.1.2 by Olaf van der Spek
casts
119
  for (int i= 0; i < 256; i++)
1 by brian
clean slate
120
  {
2160.1.2 by Olaf van der Spek
casts
121
    ident_map[i]= state_map[i] == MY_LEX_IDENT || state_map[i] == MY_LEX_NUMBER_IDENT;
1 by brian
clean slate
122
  }
123
124
  /* Special handling of hex and binary strings */
2160.1.2 by Olaf van der Spek
casts
125
  state_map['x']= state_map['X']=  MY_LEX_IDENT_OR_HEX;
126
  state_map['b']= state_map['B']=  MY_LEX_IDENT_OR_BIN;
1 by brian
clean slate
127
  return 0;
128
}
129
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
130
static bool charset_initialized= false;
1 by brian
clean slate
131
2119.4.1 by Monty Taylor
Turns on -fvisibility=hidden by default. Symbols intended to be used by
132
DRIZZLED_API CHARSET_INFO *all_charsets[256];
133
const DRIZZLED_API CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
1 by brian
clean slate
134
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
135
void add_compiled_collation(CHARSET_INFO * cs)
1 by brian
clean slate
136
{
137
  all_charsets[cs->number]= cs;
138
  cs->state|= MY_CS_AVAILABLE;
139
}
140
146 by Brian Aker
my_bool cleanup.
141
static bool init_available_charsets(myf myflags)
1 by brian
clean slate
142
{
862 by Brian Aker
Remove charset directory code.
143
  bool error= false;
1 by brian
clean slate
144
  /*
145
    We have to use charset_initialized to not lock on THR_LOCK_charset
146
    inside get_internal_charset...
147
  */
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
148
  if (charset_initialized == false)
1 by brian
clean slate
149
  {
150
    CHARSET_INFO **cs;
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
151
    memset(&all_charsets, 0, sizeof(all_charsets));
152
    init_compiled_charsets(myflags);
153
154
    /* Copy compiled charsets */
155
    for (cs=all_charsets;
156
         cs < all_charsets+array_elements(all_charsets)-1 ;
157
         cs++)
1 by brian
clean slate
158
    {
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
159
      if (*cs)
1 by brian
clean slate
160
      {
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
161
        if (cs[0]->ctype)
162
          if (init_state_maps(*cs))
163
            *cs= NULL;
1 by brian
clean slate
164
      }
165
    }
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
166
167
    charset_initialized= true;
1 by brian
clean slate
168
  }
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
169
  assert(charset_initialized);
170
1 by brian
clean slate
171
  return error;
172
}
173
174
2160.1.2 by Olaf van der Spek
casts
175
void free_charsets()
1 by brian
clean slate
176
{
2160.1.7 by Olaf van der Spek
fix
177
  charset_initialized= false;
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
178
2160.1.6 by Olaf van der Spek
USe "not" instead of "!"
179
  while (not memory_vector.empty())
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
180
  {
2160.1.2 by Olaf van der Spek
casts
181
    delete[] memory_vector.back();
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
182
    memory_vector.pop_back();
183
  }
1 by brian
clean slate
184
}
185
186
482 by Brian Aker
Remove uint.
187
uint32_t get_collation_number(const char *name)
1 by brian
clean slate
188
{
189
  init_available_charsets(MYF(0));
190
  return get_collation_number_internal(name);
191
}
192
193
482 by Brian Aker
Remove uint.
194
uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags)
1 by brian
clean slate
195
{
196
  CHARSET_INFO **cs;
197
  init_available_charsets(MYF(0));
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
198
1 by brian
clean slate
199
  for (cs= all_charsets;
200
       cs < all_charsets+array_elements(all_charsets)-1 ;
201
       cs++)
202
  {
2085.2.3 by Brian Aker
Fix strcasecmp issues (ie, check UTF-8).
203
    if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name))
1 by brian
clean slate
204
      return cs[0]->number;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
205
  }
1 by brian
clean slate
206
  return 0;
207
}
208
209
482 by Brian Aker
Remove uint.
210
const char *get_charset_name(uint32_t charset_number)
1 by brian
clean slate
211
{
212
  init_available_charsets(MYF(0));
213
2160.1.2 by Olaf van der Spek
casts
214
  const CHARSET_INFO *cs= all_charsets[charset_number];
1 by brian
clean slate
215
  if (cs && (cs->number == charset_number) && cs->name )
2160.1.2 by Olaf van der Spek
casts
216
    return cs->name;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
217
2160.1.2 by Olaf van der Spek
casts
218
  return "?";   /* this mimics find_type() */
1 by brian
clean slate
219
}
220
221
482 by Brian Aker
Remove uint.
222
static const CHARSET_INFO *get_internal_charset(uint32_t cs_number)
1 by brian
clean slate
223
{
224
  CHARSET_INFO *cs;
225
  /*
226
    To make things thread safe we are not allowing other threads to interfere
227
    while we may changing the cs_info_table
228
  */
229
  if ((cs= all_charsets[cs_number]))
230
  {
231
    if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED))
232
    {
383.1.7 by Brian Aker
Remove homebrew xml parser.
233
      assert(0);
1 by brian
clean slate
234
    }
235
    cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
236
  }
237
  if (cs && !(cs->state & MY_CS_READY))
238
  {
239
    if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
240
        (cs->coll->init && cs->coll->init(cs, cs_alloc)))
241
      cs= NULL;
242
    else
243
      cs->state|= MY_CS_READY;
244
  }
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
245
1 by brian
clean slate
246
  return cs;
247
}
248
249
862 by Brian Aker
Remove charset directory code.
250
const CHARSET_INFO *get_charset(uint32_t cs_number)
1 by brian
clean slate
251
{
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
252
  const CHARSET_INFO *cs;
1 by brian
clean slate
253
  if (cs_number == default_charset_info->number)
254
    return default_charset_info;
255
256
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
257
1 by brian
clean slate
258
  if (!cs_number || cs_number >= array_elements(all_charsets)-1)
259
    return NULL;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
260
383.1.7 by Brian Aker
Remove homebrew xml parser.
261
  cs= get_internal_charset(cs_number);
1 by brian
clean slate
262
263
  return cs;
264
}
265
862 by Brian Aker
Remove charset directory code.
266
const CHARSET_INFO *get_charset_by_name(const char *cs_name)
1 by brian
clean slate
267
{
482 by Brian Aker
Remove uint.
268
  uint32_t cs_number;
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
269
  const CHARSET_INFO *cs;
1 by brian
clean slate
270
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
271
1014.3.1 by Brian Aker
Simplify the calling stack for getting schema collation. We need to extend
272
  cs_number= get_collation_number(cs_name);
383.1.7 by Brian Aker
Remove homebrew xml parser.
273
  cs= cs_number ? get_internal_charset(cs_number) : NULL;
1 by brian
clean slate
274
275
  return cs;
276
}
277
278
862 by Brian Aker
Remove charset directory code.
279
const CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint32_t cs_flags)
1 by brian
clean slate
280
{
482 by Brian Aker
Remove uint.
281
  uint32_t cs_number;
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
282
  const CHARSET_INFO *cs;
1 by brian
clean slate
283
284
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
285
286
  cs_number= get_charset_number(cs_name, cs_flags);
383.1.7 by Brian Aker
Remove homebrew xml parser.
287
  cs= cs_number ? get_internal_charset(cs_number) : NULL;
1 by brian
clean slate
288
51.3.22 by Jay Pipes
Final round of removal of DBUG in mysys/, including Makefile
289
  return(cs);
1 by brian
clean slate
290
}
291
292
293
/*
294
  Escape apostrophes by doubling them up
295
296
  SYNOPSIS
236.3.4 by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle
297
    escape_quotes_for_drizzle()
1 by brian
clean slate
298
    charset_info        Charset of the strings
299
    to                  Buffer for escaped string
300
    to_length           Length of destination buffer, or 0
301
    from                The string to escape
302
    length              The length of the string to escape
303
304
  DESCRIPTION
305
    This escapes the contents of a string by doubling up any apostrophes that
306
    it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
307
    effect on the server.
308
309
  NOTE
310
    To be consistent with escape_string_for_mysql(), to_length may be 0 to
311
    mean "big enough"
312
313
  RETURN VALUES
365.2.9 by Monty Taylor
Got rid of all instances of ~0
314
    UINT32_MAX  The escaped string did not fit in the to buffer
1 by brian
clean slate
315
    >=0         The length of the escaped string
316
*/
317
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
318
size_t escape_quotes_for_drizzle(const CHARSET_INFO *charset_info,
236.3.4 by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle
319
                                 char *to, size_t to_length,
320
                                 const char *from, size_t length)
1 by brian
clean slate
321
{
322
  const char *to_start= to;
323
  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
163 by Brian Aker
Merge Monty's code.
324
  bool overflow= false;
146 by Brian Aker
my_bool cleanup.
325
  bool use_mb_flag= use_mb(charset_info);
1 by brian
clean slate
326
  for (end= from + length; from < end; from++)
327
  {
328
    int tmp_length;
329
    if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
330
    {
331
      if (to + tmp_length > to_end)
332
      {
163 by Brian Aker
Merge Monty's code.
333
        overflow= true;
1 by brian
clean slate
334
        break;
335
      }
336
      while (tmp_length--)
337
	*to++= *from++;
338
      from--;
339
      continue;
340
    }
341
    /*
342
      We don't have the same issue here with a non-multi-byte character being
343
      turned into a multi-byte character by the addition of an escaping
344
      character, because we are only escaping the ' character with itself.
345
     */
346
    if (*from == '\'')
347
    {
348
      if (to + 2 > to_end)
349
      {
163 by Brian Aker
Merge Monty's code.
350
        overflow= true;
1 by brian
clean slate
351
        break;
352
      }
353
      *to++= '\'';
354
      *to++= '\'';
355
    }
356
    else
357
    {
358
      if (to + 1 > to_end)
359
      {
163 by Brian Aker
Merge Monty's code.
360
        overflow= true;
1 by brian
clean slate
361
        break;
362
      }
363
      *to++= *from;
364
    }
365
  }
366
  *to= 0;
2160.1.2 by Olaf van der Spek
casts
367
  return overflow ? UINT32_MAX : to - to_start;
1 by brian
clean slate
368
}
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
369
370
} /* namespace drizzled */