13
13
along with this program; if not, write to the Free Software
14
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
18
#include "drizzled/charset.h"
19
#include "drizzled/error.h"
20
#include "drizzled/charset_info.h"
21
#include "drizzled/internal/m_string.h"
22
#include <drizzled/configmake.h>
31
We collect memory in this vector that we free on delete.
33
static vector<void *>memory_vector;
16
#include "mysys_priv.h"
17
#include "mysys_err.h"
18
#include <mystrings/m_ctype.h>
19
#include <mystrings/m_string.h>
36
24
The code below implements this functionality:
38
26
- Initializing charset related structures
39
27
- Loading dynamic charsets
40
- Searching for a proper CHARSET_INFO
28
- Searching for a proper CHARSET_INFO
41
29
using charset name, collation name or collation ID
42
30
- Setting server default character set
70
58
unsigned char *state_map;
71
59
unsigned char *ident_map;
73
if (!(cs->state_map= (unsigned char*) cs_alloc(256)))
61
if (!(cs->state_map= (unsigned char*) my_once_alloc(256, MYF(MY_WME))))
76
if (!(cs->ident_map= (unsigned char*) cs_alloc(256)))
64
if (!(cs->ident_map= (unsigned char*) my_once_alloc(256, MYF(MY_WME))))
79
67
state_map= cs->state_map;
80
68
ident_map= cs->ident_map;
82
70
/* Fill state_map with states to get a faster parser */
83
71
for (i=0; i < 256 ; i++)
86
74
state_map[i]=(unsigned char) MY_LEX_IDENT;
87
75
else if (my_isdigit(cs,i))
88
76
state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT;
77
#if defined(USE_MB) && defined(USE_MB_IDENT)
89
78
else if (my_mbcharlen(cs, i)>1)
90
79
state_map[i]=(unsigned char) MY_LEX_IDENT;
91
81
else if (my_isspace(cs,i))
92
82
state_map[i]=(unsigned char) MY_LEX_SKIP;
129
static bool charset_initialized= false;
119
#define MY_CHARSET_INDEX "Index.xml"
121
const char *charsets_dir= NULL;
122
static int charset_initialized=0;
125
char *get_charsets_dir(char *buf)
127
const char *sharedir= SHAREDIR;
130
if (charsets_dir != NULL)
131
strmake(buf, charsets_dir, FN_REFLEN-1);
134
if (test_if_hard_path(sharedir) ||
135
is_prefix(sharedir, DEFAULT_CHARSET_HOME))
136
strxmov(buf, sharedir, "/", CHARSET_DIR, NULL);
138
strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
141
res= convert_dirname(buf,buf,NULL);
131
145
CHARSET_INFO *all_charsets[256];
132
146
const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
137
151
cs->state|= MY_CS_AVAILABLE;
140
void *cs_alloc(size_t size)
154
static void *cs_alloc(size_t size)
142
void *ptr= malloc(size);
144
memory_vector.push_back(ptr);
156
return my_once_alloc(size, MYF(MY_WME));
151
160
static bool init_available_charsets(myf myflags)
162
char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
155
165
We have to use charset_initialized to not lock on THR_LOCK_charset
156
166
inside get_internal_charset...
158
if (charset_initialized == false)
168
if (!charset_initialized)
160
170
CHARSET_INFO **cs;
161
memset(&all_charsets, 0, sizeof(all_charsets));
162
init_compiled_charsets(myflags);
164
/* Copy compiled charsets */
165
for (cs=all_charsets;
166
cs < all_charsets+array_elements(all_charsets)-1 ;
172
To make things thread safe we are not allowing other threads to interfere
173
while we may changing the cs_info_table
175
pthread_mutex_lock(&THR_LOCK_charset);
176
if (!charset_initialized)
178
memset(&all_charsets, 0, sizeof(all_charsets));
179
init_compiled_charsets(myflags);
181
/* Copy compiled charsets */
182
for (cs=all_charsets;
183
cs < all_charsets+array_elements(all_charsets)-1 ;
172
if (init_state_maps(*cs))
189
if (init_state_maps(*cs))
194
my_stpcpy(get_charsets_dir(fname), MY_CHARSET_INDEX);
195
charset_initialized=1;
177
charset_initialized= true;
197
pthread_mutex_unlock(&THR_LOCK_charset);
179
assert(charset_initialized);
185
203
void free_charsets(void)
187
charset_initialized= true;
189
while (memory_vector.empty() == false)
191
void *ptr= memory_vector.back();
192
memory_vector.pop_back();
195
memory_vector.clear();
205
charset_initialized=0;
258
268
cs->state|= MY_CS_READY;
270
pthread_mutex_unlock(&THR_LOCK_charset);
265
const CHARSET_INFO *get_charset(uint32_t cs_number)
275
const CHARSET_INFO *get_charset(uint32_t cs_number, myf flags)
267
277
const CHARSET_INFO *cs;
268
278
if (cs_number == default_charset_info->number)
269
279
return default_charset_info;
271
281
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
273
283
if (!cs_number || cs_number >= array_elements(all_charsets)-1)
276
286
cs= get_internal_charset(cs_number);
288
if (!cs && (flags & MY_WME))
290
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
291
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
293
int10_to_str(cs_number, cs_string+1, 10);
294
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
281
const CHARSET_INFO *get_charset_by_name(const char *cs_name)
299
const CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
283
301
uint32_t cs_number;
284
302
const CHARSET_INFO *cs;
285
303
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
287
cs_number= get_collation_number(cs_name);
305
cs_number=get_collation_number(cs_name);
288
306
cs= cs_number ? get_internal_charset(cs_number) : NULL;
308
if (!cs && (flags & MY_WME))
310
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
311
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
312
my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file);
294
const CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint32_t cs_flags)
319
const CHARSET_INFO *get_charset_by_csname(const char *cs_name,
296
323
uint32_t cs_number;
297
324
const CHARSET_INFO *cs;
301
328
cs_number= get_charset_number(cs_name, cs_flags);
302
329
cs= cs_number ? get_internal_charset(cs_number) : NULL;
331
if (!cs && (flags & MY_WME))
333
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
334
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
335
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
343
Resolve character set by the character set name (utf8, latin1, ...).
345
The function tries to resolve character set by the specified name. If
346
there is character set with the given name, it is assigned to the "cs"
347
parameter and false is returned. If there is no such character set,
348
"default_cs" is assigned to the "cs" and true is returned.
350
@param[in] cs_name Character set name.
351
@param[in] default_cs Default character set.
352
@param[out] cs Variable to store character set.
354
@return false if character set was resolved successfully; true if there
355
is no character set with given name.
358
bool resolve_charset(const char *cs_name,
359
const CHARSET_INFO *default_cs,
360
const CHARSET_INFO **cs)
362
*cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
375
Resolve collation by the collation name (utf8_general_ci, ...).
377
The function tries to resolve collation by the specified name. If there
378
is collation with the given name, it is assigned to the "cl" parameter
379
and false is returned. If there is no such collation, "default_cl" is
380
assigned to the "cl" and true is returned.
382
@param[out] cl Variable to store collation.
383
@param[in] cl_name Collation name.
384
@param[in] default_cl Default collation.
386
@return false if collation was resolved successfully; true if there is no
387
collation with given name.
390
bool resolve_collation(const char *cl_name,
391
const CHARSET_INFO *default_cl,
392
const CHARSET_INFO **cl)
394
*cl= get_charset_by_name(cl_name, MYF(0));
406
#ifdef BACKSLASH_MBTAIL
407
static CHARSET_INFO *fs_cset_cache= NULL;
409
CHARSET_INFO *fs_character_set()
414
GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
415
buf+2, sizeof(buf)-3);
417
We cannot call get_charset_by_name here
418
because fs_character_set() is executed before
419
LOCK_THD_charset mutex initialization, which
420
is used inside get_charset_by_name.
421
As we're now interested in cp932 only,
422
let's just detect it using strcmp().
424
fs_cset_cache= !strcmp(buf, "cp932") ?
425
&my_charset_cp932_japanese_ci : &my_charset_bin;
427
return fs_cset_cache;
309
432
Escape apostrophes by doubling them up