1
by brian
clean slate |
1 |
/* Copyright (C) 2000 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
1802.10.2
by Monty Taylor
Update all of the copyright headers to include the correct address. |
14 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
1
by brian
clean slate |
15 |
|
2173.2.1
by Monty Taylor
Fixes incorrect usage of include |
16 |
#include <config.h> |
1241.9.57
by Monty Taylor
Oy. Bigger change than I normally like - but this stuff is all intertwined. |
17 |
|
2173.2.1
by Monty Taylor
Fixes incorrect usage of include |
18 |
#include <drizzled/charset.h> |
19 |
#include <drizzled/error.h> |
|
20 |
#include <drizzled/internal/m_string.h> |
|
722.1.4
by Monty Taylor
Removed all the setting of DEFS everywhere. Use configmake.h to get the values |
21 |
#include <drizzled/configmake.h> |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
22 |
#include <vector> |
23 |
||
2173.2.1
by Monty Taylor
Fixes incorrect usage of include |
24 |
#include <drizzled/visibility.h> |
2119.4.1
by Monty Taylor
Turns on -fvisibility=hidden by default. Symbols intended to be used by |
25 |
|
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
26 |
using namespace std; |
27 |
||
2318.4.7
by Olaf van der Spek
Refactor |
28 |
namespace drizzled { |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
29 |
|
30 |
/*
|
|
31 |
We collect memory in this vector that we free on delete.
|
|
32 |
*/
|
|
2160.1.2
by Olaf van der Spek
casts |
33 |
static vector<unsigned char*> memory_vector; |
1
by brian
clean slate |
34 |
|
35 |
/*
|
|
36 |
The code below implements this functionality:
|
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
37 |
|
1
by brian
clean slate |
38 |
- Initializing charset related structures
|
39 |
- Loading dynamic charsets
|
|
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
40 |
- Searching for a proper charset_info_st
|
1
by brian
clean slate |
41 |
using charset name, collation name or collation ID
|
42 |
- Setting server default character set
|
|
43 |
*/
|
|
44 |
||
2318.6.17
by Olaf van der Spek
Silly icc |
45 |
bool my_charset_same(const charset_info_st *cs1, const charset_info_st *cs2) |
46 |
{
|
|
47 |
return cs1 == cs2 || not strcmp(cs1->csname, cs2->csname); |
|
1
by brian
clean slate |
48 |
}
|
49 |
||
2318.4.7
by Olaf van der Spek
Refactor |
50 |
static uint get_collation_number_internal(const char *name) |
1
by brian
clean slate |
51 |
{
|
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
52 |
for (charset_info_st **cs= all_charsets; |
2160.1.2
by Olaf van der Spek
casts |
53 |
cs < all_charsets+array_elements(all_charsets)-1; |
1
by brian
clean slate |
54 |
cs++) |
55 |
{
|
|
2085.2.3
by Brian Aker
Fix strcasecmp issues (ie, check UTF-8). |
56 |
if ( cs[0] && cs[0]->name && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name)) |
57 |
{
|
|
1
by brian
clean slate |
58 |
return cs[0]->number; |
2085.2.3
by Brian Aker
Fix strcasecmp issues (ie, check UTF-8). |
59 |
}
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
60 |
}
|
1
by brian
clean slate |
61 |
return 0; |
62 |
}
|
|
63 |
||
2318.4.7
by Olaf van der Spek
Refactor |
64 |
static unsigned char* cs_alloc(size_t size) |
2160.1.2
by Olaf van der Spek
casts |
65 |
{
|
66 |
memory_vector.push_back(new unsigned char[size]); |
|
67 |
return memory_vector.back(); |
|
68 |
}
|
|
1
by brian
clean slate |
69 |
|
2318.4.7
by Olaf van der Spek
Refactor |
70 |
static void init_state_maps(charset_info_st *cs) |
1
by brian
clean slate |
71 |
{
|
2318.4.7
by Olaf van der Spek
Refactor |
72 |
cs->state_map= cs_alloc(256); |
73 |
cs->ident_map= cs_alloc(256); |
|
1
by brian
clean slate |
74 |
|
2160.1.2
by Olaf van der Spek
casts |
75 |
unsigned char *state_map= cs->state_map; |
76 |
unsigned char *ident_map= cs->ident_map; |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
77 |
|
1
by brian
clean slate |
78 |
/* Fill state_map with states to get a faster parser */
|
2160.1.2
by Olaf van der Spek
casts |
79 |
for (int i= 0; i < 256; i++) |
1
by brian
clean slate |
80 |
{
|
81 |
if (my_isalpha(cs,i)) |
|
2160.1.2
by Olaf van der Spek
casts |
82 |
state_map[i]= MY_LEX_IDENT; |
1
by brian
clean slate |
83 |
else if (my_isdigit(cs,i)) |
2160.1.2
by Olaf van der Spek
casts |
84 |
state_map[i]= MY_LEX_NUMBER_IDENT; |
1
by brian
clean slate |
85 |
else if (my_mbcharlen(cs, i)>1) |
2160.1.2
by Olaf van der Spek
casts |
86 |
state_map[i]= MY_LEX_IDENT; |
1
by brian
clean slate |
87 |
else if (my_isspace(cs,i)) |
2160.1.2
by Olaf van der Spek
casts |
88 |
state_map[i]= MY_LEX_SKIP; |
1
by brian
clean slate |
89 |
else
|
2160.1.2
by Olaf van der Spek
casts |
90 |
state_map[i]= MY_LEX_CHAR; |
1
by brian
clean slate |
91 |
}
|
2160.1.2
by Olaf van der Spek
casts |
92 |
state_map['_']=state_map['$']= MY_LEX_IDENT; |
93 |
state_map['\'']= MY_LEX_STRING; |
|
94 |
state_map['.']= MY_LEX_REAL_OR_POINT; |
|
95 |
state_map['>']=state_map['=']=state_map['!']= MY_LEX_CMP_OP; |
|
96 |
state_map['<']= MY_LEX_LONG_CMP_OP; |
|
97 |
state_map['&']=state_map['|']= MY_LEX_BOOL; |
|
98 |
state_map['#']= MY_LEX_COMMENT; |
|
99 |
state_map[';']= MY_LEX_SEMICOLON; |
|
100 |
state_map[':']= MY_LEX_SET_VAR; |
|
101 |
state_map[0]= MY_LEX_EOL; |
|
102 |
state_map['\\']= MY_LEX_ESCAPE; |
|
103 |
state_map['/']= MY_LEX_LONG_COMMENT; |
|
104 |
state_map['*']= MY_LEX_END_LONG_COMMENT; |
|
105 |
state_map['@']= MY_LEX_USER_END; |
|
106 |
state_map['`']= MY_LEX_USER_VARIABLE_DELIMITER; |
|
107 |
state_map['"']= MY_LEX_STRING_OR_DELIMITER; |
|
1
by brian
clean slate |
108 |
|
109 |
/*
|
|
110 |
Create a second map to make it faster to find identifiers
|
|
111 |
*/
|
|
2160.1.2
by Olaf van der Spek
casts |
112 |
for (int i= 0; i < 256; i++) |
1
by brian
clean slate |
113 |
{
|
2160.1.2
by Olaf van der Spek
casts |
114 |
ident_map[i]= state_map[i] == MY_LEX_IDENT || state_map[i] == MY_LEX_NUMBER_IDENT; |
1
by brian
clean slate |
115 |
}
|
116 |
||
117 |
/* Special handling of hex and binary strings */
|
|
2160.1.2
by Olaf van der Spek
casts |
118 |
state_map['x']= state_map['X']= MY_LEX_IDENT_OR_HEX; |
119 |
state_map['b']= state_map['B']= MY_LEX_IDENT_OR_BIN; |
|
1
by brian
clean slate |
120 |
}
|
121 |
||
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
122 |
static bool charset_initialized= false; |
1
by brian
clean slate |
123 |
|
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
124 |
DRIZZLED_API charset_info_st *all_charsets[256]; |
125 |
const DRIZZLED_API charset_info_st *default_charset_info = &my_charset_utf8_general_ci; |
|
1
by brian
clean slate |
126 |
|
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
127 |
void add_compiled_collation(charset_info_st * cs) |
1
by brian
clean slate |
128 |
{
|
129 |
all_charsets[cs->number]= cs; |
|
130 |
cs->state|= MY_CS_AVAILABLE; |
|
131 |
}
|
|
132 |
||
2318.4.7
by Olaf van der Spek
Refactor |
133 |
static void init_available_charsets(myf myflags) |
1
by brian
clean slate |
134 |
{
|
135 |
/*
|
|
136 |
We have to use charset_initialized to not lock on THR_LOCK_charset
|
|
137 |
inside get_internal_charset...
|
|
138 |
*/
|
|
2318.4.7
by Olaf van der Spek
Refactor |
139 |
if (charset_initialized) |
140 |
return; |
|
141 |
memset(&all_charsets, 0, sizeof(all_charsets)); |
|
142 |
init_compiled_charsets(myflags); |
|
143 |
||
144 |
/* Copy compiled charsets */
|
|
145 |
for (charset_info_st**cs= all_charsets; |
|
146 |
cs < all_charsets+array_elements(all_charsets)-1; |
|
147 |
cs++) |
|
1
by brian
clean slate |
148 |
{
|
2318.4.7
by Olaf van der Spek
Refactor |
149 |
if (*cs && cs[0]->ctype) |
150 |
init_state_maps(*cs); |
|
1
by brian
clean slate |
151 |
}
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
152 |
|
2318.4.7
by Olaf van der Spek
Refactor |
153 |
charset_initialized= true; |
1
by brian
clean slate |
154 |
}
|
155 |
||
2160.1.2
by Olaf van der Spek
casts |
156 |
void free_charsets() |
1
by brian
clean slate |
157 |
{
|
2160.1.7
by Olaf van der Spek
fix |
158 |
charset_initialized= false; |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
159 |
|
2160.1.6
by Olaf van der Spek
USe "not" instead of "!" |
160 |
while (not memory_vector.empty()) |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
161 |
{
|
2160.1.2
by Olaf van der Spek
casts |
162 |
delete[] memory_vector.back(); |
1106.1.1
by Brian Aker
Monty fixes pluss a few from me for charset. |
163 |
memory_vector.pop_back(); |
164 |
}
|
|
1
by brian
clean slate |
165 |
}
|
166 |
||
482
by Brian Aker
Remove uint. |
167 |
uint32_t get_collation_number(const char *name) |
1
by brian
clean slate |
168 |
{
|
169 |
init_available_charsets(MYF(0)); |
|
170 |
return get_collation_number_internal(name); |
|
171 |
}
|
|
172 |
||
482
by Brian Aker
Remove uint. |
173 |
uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags) |
1
by brian
clean slate |
174 |
{
|
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
175 |
charset_info_st **cs; |
1
by brian
clean slate |
176 |
init_available_charsets(MYF(0)); |
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
177 |
|
1
by brian
clean slate |
178 |
for (cs= all_charsets; |
179 |
cs < all_charsets+array_elements(all_charsets)-1 ; |
|
180 |
cs++) |
|
181 |
{
|
|
2085.2.3
by Brian Aker
Fix strcasecmp issues (ie, check UTF-8). |
182 |
if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name)) |
1
by brian
clean slate |
183 |
return cs[0]->number; |
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
184 |
}
|
1
by brian
clean slate |
185 |
return 0; |
186 |
}
|
|
187 |
||
482
by Brian Aker
Remove uint. |
188 |
const char *get_charset_name(uint32_t charset_number) |
1
by brian
clean slate |
189 |
{
|
190 |
init_available_charsets(MYF(0)); |
|
191 |
||
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
192 |
const charset_info_st *cs= all_charsets[charset_number]; |
1
by brian
clean slate |
193 |
if (cs && (cs->number == charset_number) && cs->name ) |
2160.1.2
by Olaf van der Spek
casts |
194 |
return cs->name; |
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
195 |
|
2160.1.2
by Olaf van der Spek
casts |
196 |
return "?"; /* this mimics find_type() */ |
1
by brian
clean slate |
197 |
}
|
198 |
||
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
199 |
static const charset_info_st *get_internal_charset(uint32_t cs_number) |
1
by brian
clean slate |
200 |
{
|
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
201 |
charset_info_st *cs; |
1
by brian
clean slate |
202 |
/*
|
203 |
To make things thread safe we are not allowing other threads to interfere
|
|
204 |
while we may changing the cs_info_table
|
|
205 |
*/
|
|
206 |
if ((cs= all_charsets[cs_number])) |
|
207 |
{
|
|
208 |
if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED)) |
|
209 |
{
|
|
383.1.7
by Brian Aker
Remove homebrew xml parser. |
210 |
assert(0); |
1
by brian
clean slate |
211 |
}
|
212 |
cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL; |
|
213 |
}
|
|
214 |
if (cs && !(cs->state & MY_CS_READY)) |
|
215 |
{
|
|
216 |
if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) || |
|
217 |
(cs->coll->init && cs->coll->init(cs, cs_alloc))) |
|
218 |
cs= NULL; |
|
219 |
else
|
|
220 |
cs->state|= MY_CS_READY; |
|
221 |
}
|
|
861
by Brian Aker
Remove THR_LOCK_charset (we never recall it anymore) |
222 |
|
1
by brian
clean slate |
223 |
return cs; |
224 |
}
|
|
225 |
||
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
226 |
const charset_info_st *get_charset(uint32_t cs_number) |
1
by brian
clean slate |
227 |
{
|
228 |
if (cs_number == default_charset_info->number) |
|
229 |
return default_charset_info; |
|
230 |
||
2318.4.7
by Olaf van der Spek
Refactor |
231 |
init_available_charsets(MYF(0)); /* If it isn't initialized */ |
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
232 |
|
1
by brian
clean slate |
233 |
if (!cs_number || cs_number >= array_elements(all_charsets)-1) |
234 |
return NULL; |
|
660.1.3
by Eric Herman
removed trailing whitespace with simple script: |
235 |
|
2318.4.7
by Olaf van der Spek
Refactor |
236 |
return get_internal_charset(cs_number); |
1
by brian
clean slate |
237 |
}
|
238 |
||
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
239 |
const charset_info_st *get_charset_by_name(const char *cs_name) |
1
by brian
clean slate |
240 |
{
|
2318.4.7
by Olaf van der Spek
Refactor |
241 |
init_available_charsets(MYF(0)); /* If it isn't initialized */ |
242 |
uint32_t cs_number= get_collation_number(cs_name); |
|
243 |
return cs_number ? get_internal_charset(cs_number) : NULL; |
|
1
by brian
clean slate |
244 |
}
|
245 |
||
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
246 |
const charset_info_st *get_charset_by_csname(const char *cs_name, uint32_t cs_flags) |
1
by brian
clean slate |
247 |
{
|
2318.4.7
by Olaf van der Spek
Refactor |
248 |
init_available_charsets(MYF(0)); /* If it isn't initialized */ |
249 |
uint32_t cs_number= get_charset_number(cs_name, cs_flags); |
|
250 |
return cs_number ? get_internal_charset(cs_number) : NULL; |
|
1
by brian
clean slate |
251 |
}
|
252 |
||
253 |
||
254 |
/*
|
|
255 |
Escape apostrophes by doubling them up
|
|
256 |
||
257 |
SYNOPSIS
|
|
236.3.4
by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle |
258 |
escape_quotes_for_drizzle()
|
1
by brian
clean slate |
259 |
charset_info Charset of the strings
|
260 |
to Buffer for escaped string
|
|
261 |
to_length Length of destination buffer, or 0
|
|
262 |
from The string to escape
|
|
263 |
length The length of the string to escape
|
|
264 |
||
265 |
DESCRIPTION
|
|
266 |
This escapes the contents of a string by doubling up any apostrophes that
|
|
267 |
it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
|
|
268 |
effect on the server.
|
|
269 |
||
270 |
NOTE
|
|
271 |
To be consistent with escape_string_for_mysql(), to_length may be 0 to
|
|
272 |
mean "big enough"
|
|
273 |
||
274 |
RETURN VALUES
|
|
365.2.9
by Monty Taylor
Got rid of all instances of ~0 |
275 |
UINT32_MAX The escaped string did not fit in the to buffer
|
1
by brian
clean slate |
276 |
>=0 The length of the escaped string
|
277 |
*/
|
|
278 |
||
2254
by Brian Aker
Shift CHARSET_INFO to charset_info_st |
279 |
size_t escape_quotes_for_drizzle(const charset_info_st *charset_info, |
236.3.4
by Andrey Hristov
Rename escape_(string|quotes)_for_mysql to escape_(string|quotes)_for_drizzle |
280 |
char *to, size_t to_length, |
281 |
const char *from, size_t length) |
|
1
by brian
clean slate |
282 |
{
|
283 |
const char *to_start= to; |
|
284 |
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
|
163
by Brian Aker
Merge Monty's code. |
285 |
bool overflow= false; |
146
by Brian Aker
my_bool cleanup. |
286 |
bool use_mb_flag= use_mb(charset_info); |
1
by brian
clean slate |
287 |
for (end= from + length; from < end; from++) |
288 |
{
|
|
289 |
int tmp_length; |
|
290 |
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) |
|
291 |
{
|
|
292 |
if (to + tmp_length > to_end) |
|
293 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
294 |
overflow= true; |
1
by brian
clean slate |
295 |
break; |
296 |
}
|
|
297 |
while (tmp_length--) |
|
298 |
*to++= *from++; |
|
299 |
from--; |
|
300 |
continue; |
|
301 |
}
|
|
302 |
/*
|
|
303 |
We don't have the same issue here with a non-multi-byte character being
|
|
304 |
turned into a multi-byte character by the addition of an escaping
|
|
305 |
character, because we are only escaping the ' character with itself.
|
|
306 |
*/
|
|
307 |
if (*from == '\'') |
|
308 |
{
|
|
309 |
if (to + 2 > to_end) |
|
310 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
311 |
overflow= true; |
1
by brian
clean slate |
312 |
break; |
313 |
}
|
|
314 |
*to++= '\''; |
|
315 |
*to++= '\''; |
|
316 |
}
|
|
317 |
else
|
|
318 |
{
|
|
319 |
if (to + 1 > to_end) |
|
320 |
{
|
|
163
by Brian Aker
Merge Monty's code. |
321 |
overflow= true; |
1
by brian
clean slate |
322 |
break; |
323 |
}
|
|
324 |
*to++= *from; |
|
325 |
}
|
|
326 |
}
|
|
327 |
*to= 0; |
|
2160.1.2
by Olaf van der Spek
casts |
328 |
return overflow ? UINT32_MAX : to - to_start; |
1
by brian
clean slate |
329 |
}
|
1280.1.10
by Monty Taylor
Put everything in drizzled into drizzled namespace. |
330 |
|
331 |
} /* namespace drizzled */ |