12
12
You should have received a copy of the GNU General Public License
13
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
18
#include <drizzled/charset.h>
19
#include <drizzled/error.h>
20
#include <drizzled/charset_info.h>
21
#include <drizzled/internal/m_string.h>
22
#include <drizzled/configmake.h>
25
#include <drizzled/visibility.h>
33
We collect memory in this vector that we free on delete.
35
static vector<unsigned char*> memory_vector;
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
#include "mysys_priv.h"
17
#include "mysys_err.h"
18
#include <mystrings/m_ctype.h>
19
#include <mystrings/m_string.h>
38
24
The code below implements this functionality:
40
26
- Initializing charset related structures
41
27
- Loading dynamic charsets
42
- Searching for a proper CHARSET_INFO
28
- Searching for a proper CHARSET_INFO
43
29
using charset name, collation name or collation ID
44
30
- Setting server default character set
54
40
get_collation_number_internal(const char *name)
56
for (CHARSET_INFO **cs= all_charsets;
57
cs < all_charsets+array_elements(all_charsets)-1;
43
for (cs= all_charsets;
44
cs < all_charsets+array_elements(all_charsets)-1 ;
60
if ( cs[0] && cs[0]->name && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
47
if ( cs[0] && cs[0]->name &&
48
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
62
49
return cs[0]->number;
68
static unsigned char *cs_alloc(size_t size)
70
memory_vector.push_back(new unsigned char[size]);
71
return memory_vector.back();
74
55
static bool init_state_maps(CHARSET_INFO *cs)
76
if (!(cs->state_map= cs_alloc(256)))
61
if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
79
if (!(cs->ident_map= cs_alloc(256)))
64
if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
82
unsigned char *state_map= cs->state_map;
83
unsigned char *ident_map= cs->ident_map;
67
state_map= cs->state_map;
68
ident_map= cs->ident_map;
85
70
/* Fill state_map with states to get a faster parser */
86
for (int i= 0; i < 256; i++)
71
for (i=0; i < 256 ; i++)
88
73
if (my_isalpha(cs,i))
89
state_map[i]= MY_LEX_IDENT;
74
state_map[i]=(uchar) MY_LEX_IDENT;
90
75
else if (my_isdigit(cs,i))
91
state_map[i]= MY_LEX_NUMBER_IDENT;
76
state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
77
#if defined(USE_MB) && defined(USE_MB_IDENT)
92
78
else if (my_mbcharlen(cs, i)>1)
93
state_map[i]= MY_LEX_IDENT;
79
state_map[i]=(uchar) MY_LEX_IDENT;
94
81
else if (my_isspace(cs,i))
95
state_map[i]= MY_LEX_SKIP;
82
state_map[i]=(uchar) MY_LEX_SKIP;
97
state_map[i]= MY_LEX_CHAR;
84
state_map[i]=(uchar) MY_LEX_CHAR;
99
state_map['_']=state_map['$']= MY_LEX_IDENT;
100
state_map['\'']= MY_LEX_STRING;
101
state_map['.']= MY_LEX_REAL_OR_POINT;
102
state_map['>']=state_map['=']=state_map['!']= MY_LEX_CMP_OP;
103
state_map['<']= MY_LEX_LONG_CMP_OP;
104
state_map['&']=state_map['|']= MY_LEX_BOOL;
105
state_map['#']= MY_LEX_COMMENT;
106
state_map[';']= MY_LEX_SEMICOLON;
107
state_map[':']= MY_LEX_SET_VAR;
108
state_map[0]= MY_LEX_EOL;
109
state_map['\\']= MY_LEX_ESCAPE;
110
state_map['/']= MY_LEX_LONG_COMMENT;
111
state_map['*']= MY_LEX_END_LONG_COMMENT;
112
state_map['@']= MY_LEX_USER_END;
113
state_map['`']= MY_LEX_USER_VARIABLE_DELIMITER;
114
state_map['"']= MY_LEX_STRING_OR_DELIMITER;
86
state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT;
87
state_map[(uchar)'\'']=(uchar) MY_LEX_STRING;
88
state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT;
89
state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP;
90
state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP;
91
state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL;
92
state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT;
93
state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON;
94
state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR;
95
state_map[0]=(uchar) MY_LEX_EOL;
96
state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE;
97
state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT;
98
state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT;
99
state_map[(uchar)'@']= (uchar) MY_LEX_USER_END;
100
state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER;
101
state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER;
117
104
Create a second map to make it faster to find identifiers
119
for (int i= 0; i < 256; i++)
106
for (i=0; i < 256 ; i++)
121
ident_map[i]= state_map[i] == MY_LEX_IDENT || state_map[i] == MY_LEX_NUMBER_IDENT;
108
ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT ||
109
state_map[i] == MY_LEX_NUMBER_IDENT);
124
112
/* Special handling of hex and binary strings */
125
state_map['x']= state_map['X']= MY_LEX_IDENT_OR_HEX;
126
state_map['b']= state_map['B']= MY_LEX_IDENT_OR_BIN;
113
state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
114
state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN;
130
static bool charset_initialized= false;
132
DRIZZLED_API CHARSET_INFO *all_charsets[256];
133
const DRIZZLED_API CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
119
#define MY_MAX_ALLOWED_BUF 1024*1024
120
#define MY_CHARSET_INDEX "Index.xml"
122
const char *charsets_dir= NULL;
123
static int charset_initialized=0;
126
char *get_charsets_dir(char *buf)
128
const char *sharedir= SHAREDIR;
131
if (charsets_dir != NULL)
132
strmake(buf, charsets_dir, FN_REFLEN-1);
135
if (test_if_hard_path(sharedir) ||
136
is_prefix(sharedir, DEFAULT_CHARSET_HOME))
137
strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);
139
strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
142
res= convert_dirname(buf,buf,NullS);
146
CHARSET_INFO *all_charsets[256];
147
const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
135
149
void add_compiled_collation(CHARSET_INFO * cs)
138
152
cs->state|= MY_CS_AVAILABLE;
155
static void *cs_alloc(size_t size)
157
return my_once_alloc(size, MYF(MY_WME));
141
161
static bool init_available_charsets(myf myflags)
163
char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
145
166
We have to use charset_initialized to not lock on THR_LOCK_charset
146
167
inside get_internal_charset...
148
if (charset_initialized == false)
169
if (!charset_initialized)
150
171
CHARSET_INFO **cs;
151
memset(&all_charsets, 0, sizeof(all_charsets));
152
init_compiled_charsets(myflags);
154
/* Copy compiled charsets */
155
for (cs=all_charsets;
156
cs < all_charsets+array_elements(all_charsets)-1 ;
173
To make things thread safe we are not allowing other threads to interfere
174
while we may changing the cs_info_table
176
pthread_mutex_lock(&THR_LOCK_charset);
177
if (!charset_initialized)
179
memset(&all_charsets, 0, sizeof(all_charsets));
180
init_compiled_charsets(myflags);
182
/* Copy compiled charsets */
183
for (cs=all_charsets;
184
cs < all_charsets+array_elements(all_charsets)-1 ;
162
if (init_state_maps(*cs))
190
if (init_state_maps(*cs))
195
my_stpcpy(get_charsets_dir(fname), MY_CHARSET_INDEX);
196
charset_initialized=1;
167
charset_initialized= true;
198
pthread_mutex_unlock(&THR_LOCK_charset);
169
assert(charset_initialized);
204
void free_charsets(void)
177
charset_initialized= false;
179
while (not memory_vector.empty())
181
delete[] memory_vector.back();
182
memory_vector.pop_back();
206
charset_initialized=0;
187
uint32_t get_collation_number(const char *name)
210
uint get_collation_number(const char *name)
189
212
init_available_charsets(MYF(0));
190
213
return get_collation_number_internal(name);
194
uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags)
217
uint get_charset_number(const char *charset_name, uint cs_flags)
196
219
CHARSET_INFO **cs;
197
220
init_available_charsets(MYF(0));
199
222
for (cs= all_charsets;
200
223
cs < all_charsets+array_elements(all_charsets)-1 ;
203
if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name))
226
if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
227
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name))
204
228
return cs[0]->number;
210
const char *get_charset_name(uint32_t charset_number)
234
const char *get_charset_name(uint charset_number)
236
const CHARSET_INFO *cs;
212
237
init_available_charsets(MYF(0));
214
const CHARSET_INFO *cs= all_charsets[charset_number];
239
cs=all_charsets[charset_number];
215
240
if (cs && (cs->number == charset_number) && cs->name )
218
return "?"; /* this mimics find_type() */
241
return (char*) cs->name;
243
return (char*) "?"; /* this mimics find_type() */
222
static const CHARSET_INFO *get_internal_charset(uint32_t cs_number)
247
static const CHARSET_INFO *get_internal_charset(uint cs_number)
224
249
CHARSET_INFO *cs;
226
251
To make things thread safe we are not allowing other threads to interfere
227
252
while we may changing the cs_info_table
254
pthread_mutex_lock(&THR_LOCK_charset);
229
255
if ((cs= all_charsets[cs_number]))
231
257
if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED))
243
269
cs->state|= MY_CS_READY;
271
pthread_mutex_unlock(&THR_LOCK_charset);
250
const CHARSET_INFO *get_charset(uint32_t cs_number)
276
const const CHARSET_INFO *get_charset(uint cs_number, myf flags)
252
278
const CHARSET_INFO *cs;
253
279
if (cs_number == default_charset_info->number)
254
280
return default_charset_info;
256
282
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
258
284
if (!cs_number || cs_number >= array_elements(all_charsets)-1)
261
287
cs= get_internal_charset(cs_number);
289
if (!cs && (flags & MY_WME))
291
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
292
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
294
int10_to_str(cs_number, cs_string+1, 10);
295
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
266
const CHARSET_INFO *get_charset_by_name(const char *cs_name)
300
const CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
269
303
const CHARSET_INFO *cs;
270
304
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
272
cs_number= get_collation_number(cs_name);
306
cs_number=get_collation_number(cs_name);
273
307
cs= cs_number ? get_internal_charset(cs_number) : NULL;
309
if (!cs && (flags & MY_WME))
311
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
312
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
313
my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file);
279
const CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint32_t cs_flags)
320
const CHARSET_INFO *get_charset_by_csname(const char *cs_name,
282
325
const CHARSET_INFO *cs;
284
327
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
286
329
cs_number= get_charset_number(cs_name, cs_flags);
287
330
cs= cs_number ? get_internal_charset(cs_number) : NULL;
332
if (!cs && (flags & MY_WME))
334
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
335
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
336
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
344
Resolve character set by the character set name (utf8, latin1, ...).
346
The function tries to resolve character set by the specified name. If
347
there is character set with the given name, it is assigned to the "cs"
348
parameter and false is returned. If there is no such character set,
349
"default_cs" is assigned to the "cs" and true is returned.
351
@param[in] cs_name Character set name.
352
@param[in] default_cs Default character set.
353
@param[out] cs Variable to store character set.
355
@return false if character set was resolved successfully; true if there
356
is no character set with given name.
359
bool resolve_charset(const char *cs_name,
360
const CHARSET_INFO *default_cs,
361
const CHARSET_INFO **cs)
363
*cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
376
Resolve collation by the collation name (utf8_general_ci, ...).
378
The function tries to resolve collation by the specified name. If there
379
is collation with the given name, it is assigned to the "cl" parameter
380
and false is returned. If there is no such collation, "default_cl" is
381
assigned to the "cl" and true is returned.
383
@param[out] cl Variable to store collation.
384
@param[in] cl_name Collation name.
385
@param[in] default_cl Default collation.
387
@return false if collation was resolved successfully; true if there is no
388
collation with given name.
391
bool resolve_collation(const char *cl_name,
392
const CHARSET_INFO *default_cl,
393
const CHARSET_INFO **cl)
395
*cl= get_charset_by_name(cl_name, MYF(0));
407
#ifdef BACKSLASH_MBTAIL
408
static CHARSET_INFO *fs_cset_cache= NULL;
410
CHARSET_INFO *fs_character_set()
415
GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
416
buf+2, sizeof(buf)-3);
418
We cannot call get_charset_by_name here
419
because fs_character_set() is executed before
420
LOCK_THD_charset mutex initialization, which
421
is used inside get_charset_by_name.
422
As we're now interested in cp932 only,
423
let's just detect it using strcmp().
425
fs_cset_cache= !strcmp(buf, "cp932") ?
426
&my_charset_cp932_japanese_ci : &my_charset_bin;
428
return fs_cset_cache;
294
433
Escape apostrophes by doubling them up