~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
1802.10.2 by Monty Taylor
Update all of the copyright headers to include the correct address.
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
1 by brian
clean slate
15
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
16
#include <config.h>
1241.9.57 by Monty Taylor
Oy. Bigger change than I normally like - but this stuff is all intertwined.
17
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
18
#include <drizzled/charset.h>
19
#include <drizzled/error.h>
20
#include <drizzled/internal/m_string.h>
722.1.4 by Monty Taylor
Removed all the setting of DEFS everywhere. Use configmake.h to get the values
21
#include <drizzled/configmake.h>
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
22
#include <vector>
23
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
24
#include <drizzled/visibility.h>
2119.4.1 by Monty Taylor
Turns on -fvisibility=hidden by default. Symbols intended to be used by
25
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
26
using namespace std;
27
2318.4.7 by Olaf van der Spek
Refactor
28
namespace drizzled {
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
29
30
/*
31
  We collect memory in this vector that we free on delete.
32
*/
2160.1.2 by Olaf van der Spek
casts
33
static vector<unsigned char*> memory_vector;
1 by brian
clean slate
34
35
/*
36
  The code below implements this functionality:
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
37
1 by brian
clean slate
38
    - Initializing charset related structures
39
    - Loading dynamic charsets
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
40
    - Searching for a proper charset_info_st
1 by brian
clean slate
41
      using charset name, collation name or collation ID
42
    - Setting server default character set
43
*/
44
2318.6.17 by Olaf van der Spek
Silly icc
45
bool my_charset_same(const charset_info_st *cs1, const charset_info_st *cs2)
46
{
47
  return cs1 == cs2 || not strcmp(cs1->csname, cs2->csname);
1 by brian
clean slate
48
}
49
2318.4.7 by Olaf van der Spek
Refactor
50
static uint get_collation_number_internal(const char *name)
1 by brian
clean slate
51
{
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
52
  for (charset_info_st **cs= all_charsets;
2160.1.2 by Olaf van der Spek
casts
53
       cs < all_charsets+array_elements(all_charsets)-1;
1 by brian
clean slate
54
       cs++)
55
  {
2085.2.3 by Brian Aker
Fix strcasecmp issues (ie, check UTF-8).
56
    if ( cs[0] && cs[0]->name && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
57
    {
1 by brian
clean slate
58
      return cs[0]->number;
2085.2.3 by Brian Aker
Fix strcasecmp issues (ie, check UTF-8).
59
    }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
60
  }
1 by brian
clean slate
61
  return 0;
62
}
63
2318.4.7 by Olaf van der Spek
Refactor
64
static unsigned char* cs_alloc(size_t size)
2160.1.2 by Olaf van der Spek
casts
65
{
66
  memory_vector.push_back(new unsigned char[size]);
67
  return memory_vector.back();
68
}
1 by brian
clean slate
69
2318.4.7 by Olaf van der Spek
Refactor
70
static void init_state_maps(charset_info_st *cs)
1 by brian
clean slate
71
{
2318.4.7 by Olaf van der Spek
Refactor
72
  cs->state_map= cs_alloc(256);
73
  cs->ident_map= cs_alloc(256);
1 by brian
clean slate
74
2160.1.2 by Olaf van der Spek
casts
75
  unsigned char *state_map= cs->state_map;
76
  unsigned char *ident_map= cs->ident_map;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
77
1 by brian
clean slate
78
  /* Fill state_map with states to get a faster parser */
2160.1.2 by Olaf van der Spek
casts
79
  for (int i= 0; i < 256; i++)
1 by brian
clean slate
80
  {
81
    if (my_isalpha(cs,i))
2160.1.2 by Olaf van der Spek
casts
82
      state_map[i]= MY_LEX_IDENT;
1 by brian
clean slate
83
    else if (my_isdigit(cs,i))
2160.1.2 by Olaf van der Spek
casts
84
      state_map[i]= MY_LEX_NUMBER_IDENT;
1 by brian
clean slate
85
    else if (my_mbcharlen(cs, i)>1)
2160.1.2 by Olaf van der Spek
casts
86
      state_map[i]= MY_LEX_IDENT;
1 by brian
clean slate
87
    else if (my_isspace(cs,i))
2160.1.2 by Olaf van der Spek
casts
88
      state_map[i]= MY_LEX_SKIP;
1 by brian
clean slate
89
    else
2160.1.2 by Olaf van der Spek
casts
90
      state_map[i]= MY_LEX_CHAR;
1 by brian
clean slate
91
  }
2160.1.2 by Olaf van der Spek
casts
92
  state_map['_']=state_map['$']= MY_LEX_IDENT;
93
  state_map['\'']= MY_LEX_STRING;
94
  state_map['.']= MY_LEX_REAL_OR_POINT;
95
  state_map['>']=state_map['=']=state_map['!']=  MY_LEX_CMP_OP;
96
  state_map['<']=  MY_LEX_LONG_CMP_OP;
97
  state_map['&']=state_map['|']= MY_LEX_BOOL;
98
  state_map['#']= MY_LEX_COMMENT;
99
  state_map[';']= MY_LEX_SEMICOLON;
100
  state_map[':']= MY_LEX_SET_VAR;
101
  state_map[0]= MY_LEX_EOL;
102
  state_map['\\']=  MY_LEX_ESCAPE;
103
  state_map['/']=  MY_LEX_LONG_COMMENT;
104
  state_map['*']=  MY_LEX_END_LONG_COMMENT;
105
  state_map['@']=  MY_LEX_USER_END;
106
  state_map['`']=  MY_LEX_USER_VARIABLE_DELIMITER;
107
  state_map['"']=  MY_LEX_STRING_OR_DELIMITER;
1 by brian
clean slate
108
109
  /*
110
    Create a second map to make it faster to find identifiers
111
  */
2160.1.2 by Olaf van der Spek
casts
112
  for (int i= 0; i < 256; i++)
1 by brian
clean slate
113
  {
2160.1.2 by Olaf van der Spek
casts
114
    ident_map[i]= state_map[i] == MY_LEX_IDENT || state_map[i] == MY_LEX_NUMBER_IDENT;
1 by brian
clean slate
115
  }
116
117
  /* Special handling of hex and binary strings */
2160.1.2 by Olaf van der Spek
casts
118
  state_map['x']= state_map['X']=  MY_LEX_IDENT_OR_HEX;
119
  state_map['b']= state_map['B']=  MY_LEX_IDENT_OR_BIN;
1 by brian
clean slate
120
}
121
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
122
static bool charset_initialized= false;
1 by brian
clean slate
123
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
124
DRIZZLED_API charset_info_st *all_charsets[256];
125
const DRIZZLED_API charset_info_st *default_charset_info = &my_charset_utf8_general_ci;
1 by brian
clean slate
126
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
127
void add_compiled_collation(charset_info_st * cs)
1 by brian
clean slate
128
{
129
  all_charsets[cs->number]= cs;
130
  cs->state|= MY_CS_AVAILABLE;
131
}
132
2318.4.7 by Olaf van der Spek
Refactor
133
static void init_available_charsets(myf myflags)
1 by brian
clean slate
134
{
135
  /*
136
    We have to use charset_initialized to not lock on THR_LOCK_charset
137
    inside get_internal_charset...
138
  */
2318.4.7 by Olaf van der Spek
Refactor
139
  if (charset_initialized)
140
    return;
141
  memset(&all_charsets, 0, sizeof(all_charsets));
142
  init_compiled_charsets(myflags);
143
144
  /* Copy compiled charsets */
145
  for (charset_info_st**cs= all_charsets;
146
    cs < all_charsets+array_elements(all_charsets)-1;
147
    cs++)
1 by brian
clean slate
148
  {
2318.4.7 by Olaf van der Spek
Refactor
149
    if (*cs && cs[0]->ctype)
150
      init_state_maps(*cs);
1 by brian
clean slate
151
  }
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
152
2318.4.7 by Olaf van der Spek
Refactor
153
  charset_initialized= true;
1 by brian
clean slate
154
}
155
2160.1.2 by Olaf van der Spek
casts
156
void free_charsets()
1 by brian
clean slate
157
{
2160.1.7 by Olaf van der Spek
fix
158
  charset_initialized= false;
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
159
2160.1.6 by Olaf van der Spek
USe "not" instead of "!"
160
  while (not memory_vector.empty())
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
161
  {
2160.1.2 by Olaf van der Spek
casts
162
    delete[] memory_vector.back();
1106.1.1 by Brian Aker
Monty fixes pluss a few from me for charset.
163
    memory_vector.pop_back();
164
  }
1 by brian
clean slate
165
}
166
482 by Brian Aker
Remove uint.
167
uint32_t get_collation_number(const char *name)
1 by brian
clean slate
168
{
169
  init_available_charsets(MYF(0));
170
  return get_collation_number_internal(name);
171
}
172
482 by Brian Aker
Remove uint.
173
uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags)
1 by brian
clean slate
174
{
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
175
  charset_info_st **cs;
1 by brian
clean slate
176
  init_available_charsets(MYF(0));
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
177
1 by brian
clean slate
178
  for (cs= all_charsets;
179
       cs < all_charsets+array_elements(all_charsets)-1 ;
180
       cs++)
181
  {
2085.2.3 by Brian Aker
Fix strcasecmp issues (ie, check UTF-8).
182
    if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name))
1 by brian
clean slate
183
      return cs[0]->number;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
184
  }
1 by brian
clean slate
185
  return 0;
186
}
187
482 by Brian Aker
Remove uint.
188
const char *get_charset_name(uint32_t charset_number)
1 by brian
clean slate
189
{
190
  init_available_charsets(MYF(0));
191
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
192
  const charset_info_st *cs= all_charsets[charset_number];
1 by brian
clean slate
193
  if (cs && (cs->number == charset_number) && cs->name )
2160.1.2 by Olaf van der Spek
casts
194
    return cs->name;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
195
2160.1.2 by Olaf van der Spek
casts
196
  return "?";   /* this mimics find_type() */
1 by brian
clean slate
197
}
198
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
199
static const charset_info_st *get_internal_charset(uint32_t cs_number)
1 by brian
clean slate
200
{
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
201
  charset_info_st *cs;
1 by brian
clean slate
202
  /*
203
    To make things thread safe we are not allowing other threads to interfere
204
    while we may changing the cs_info_table
205
  */
206
  if ((cs= all_charsets[cs_number]))
207
  {
208
    if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED))
209
    {
383.1.7 by Brian Aker
Remove homebrew xml parser.
210
      assert(0);
1 by brian
clean slate
211
    }
212
    cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
213
  }
214
  if (cs && !(cs->state & MY_CS_READY))
215
  {
216
    if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
217
        (cs->coll->init && cs->coll->init(cs, cs_alloc)))
218
      cs= NULL;
219
    else
220
      cs->state|= MY_CS_READY;
221
  }
861 by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore)
222
1 by brian
clean slate
223
  return cs;
224
}
225
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
226
const charset_info_st *get_charset(uint32_t cs_number)
1 by brian
clean slate
227
{
228
  if (cs_number == default_charset_info->number)
229
    return default_charset_info;
230
2318.4.7 by Olaf van der Spek
Refactor
231
  init_available_charsets(MYF(0));	/* If it isn't initialized */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
232
1 by brian
clean slate
233
  if (!cs_number || cs_number >= array_elements(all_charsets)-1)
234
    return NULL;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
235
2318.4.7 by Olaf van der Spek
Refactor
236
  return get_internal_charset(cs_number);
1 by brian
clean slate
237
}
238
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
239
const charset_info_st *get_charset_by_name(const char *cs_name)
1 by brian
clean slate
240
{
2318.4.7 by Olaf van der Spek
Refactor
241
  init_available_charsets(MYF(0));	/* If it isn't initialized */
242
  uint32_t cs_number= get_collation_number(cs_name);
243
  return cs_number ? get_internal_charset(cs_number) : NULL;
1 by brian
clean slate
244
}
245
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
246
const charset_info_st *get_charset_by_csname(const char *cs_name, uint32_t cs_flags)
1 by brian
clean slate
247
{
2318.4.7 by Olaf van der Spek
Refactor
248
  init_available_charsets(MYF(0));	/* If it isn't initialized */
249
  uint32_t cs_number= get_charset_number(cs_name, cs_flags);
250
  return cs_number ? get_internal_charset(cs_number) : NULL;
1 by brian
clean slate
251
}
252
253
254
/*
255
  Escape apostrophes by doubling them up
256
257
  SYNOPSIS
236.3.4 by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle
258
    escape_quotes_for_drizzle()
1 by brian
clean slate
259
    charset_info        Charset of the strings
260
    to                  Buffer for escaped string
261
    to_length           Length of destination buffer, or 0
262
    from                The string to escape
263
    length              The length of the string to escape
264
265
  DESCRIPTION
266
    This escapes the contents of a string by doubling up any apostrophes that
267
    it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
268
    effect on the server.
269
270
  NOTE
271
    To be consistent with escape_string_for_mysql(), to_length may be 0 to
272
    mean "big enough"
273
274
  RETURN VALUES
365.2.9 by Monty Taylor
Got rid of all instances of ~0
275
    UINT32_MAX  The escaped string did not fit in the to buffer
1 by brian
clean slate
276
    >=0         The length of the escaped string
277
*/
278
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
279
size_t escape_quotes_for_drizzle(const charset_info_st *charset_info,
236.3.4 by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle
280
                                 char *to, size_t to_length,
281
                                 const char *from, size_t length)
1 by brian
clean slate
282
{
283
  const char *to_start= to;
284
  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
163 by Brian Aker
Merge Monty's code.
285
  bool overflow= false;
146 by Brian Aker
my_bool cleanup.
286
  bool use_mb_flag= use_mb(charset_info);
1 by brian
clean slate
287
  for (end= from + length; from < end; from++)
288
  {
289
    int tmp_length;
290
    if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
291
    {
292
      if (to + tmp_length > to_end)
293
      {
163 by Brian Aker
Merge Monty's code.
294
        overflow= true;
1 by brian
clean slate
295
        break;
296
      }
297
      while (tmp_length--)
298
	*to++= *from++;
299
      from--;
300
      continue;
301
    }
302
    /*
303
      We don't have the same issue here with a non-multi-byte character being
304
      turned into a multi-byte character by the addition of an escaping
305
      character, because we are only escaping the ' character with itself.
306
     */
307
    if (*from == '\'')
308
    {
309
      if (to + 2 > to_end)
310
      {
163 by Brian Aker
Merge Monty's code.
311
        overflow= true;
1 by brian
clean slate
312
        break;
313
      }
314
      *to++= '\'';
315
      *to++= '\'';
316
    }
317
    else
318
    {
319
      if (to + 1 > to_end)
320
      {
163 by Brian Aker
Merge Monty's code.
321
        overflow= true;
1 by brian
clean slate
322
        break;
323
      }
324
      *to++= *from;
325
    }
326
  }
327
  *to= 0;
2160.1.2 by Olaf van der Spek
casts
328
  return overflow ? UINT32_MAX : to - to_start;
1 by brian
clean slate
329
}
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
330
331
} /* namespace drizzled */