12
12
You should have received a copy of the GNU General Public License
13
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
18
#include "drizzled/charset.h"
19
#include "drizzled/error.h"
20
#include "drizzled/charset_info.h"
21
#include "drizzled/internal/m_string.h"
22
#include <drizzled/configmake.h>
31
We collect memory in this vector that we free on delete.
33
static vector<void *>memory_vector;
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
#include "mysys_priv.h"
17
#include "mysys_err.h"
18
#include <mystrings/m_ctype.h>
19
#include <mystrings/m_string.h>
36
24
The code below implements this functionality:
38
26
- Initializing charset related structures
39
27
- Loading dynamic charsets
40
- Searching for a proper CHARSET_INFO
28
- Searching for a proper CHARSET_INFO
41
29
using charset name, collation name or collation ID
42
30
- Setting server default character set
70
58
unsigned char *state_map;
71
59
unsigned char *ident_map;
73
if (!(cs->state_map= (unsigned char*) cs_alloc(256)))
61
if (!(cs->state_map= (unsigned char*) my_once_alloc(256, MYF(MY_WME))))
76
if (!(cs->ident_map= (unsigned char*) cs_alloc(256)))
64
if (!(cs->ident_map= (unsigned char*) my_once_alloc(256, MYF(MY_WME))))
79
67
state_map= cs->state_map;
80
68
ident_map= cs->ident_map;
82
70
/* Fill state_map with states to get a faster parser */
83
71
for (i=0; i < 256 ; i++)
86
74
state_map[i]=(unsigned char) MY_LEX_IDENT;
87
75
else if (my_isdigit(cs,i))
88
76
state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT;
77
#if defined(USE_MB) && defined(USE_MB_IDENT)
89
78
else if (my_mbcharlen(cs, i)>1)
90
79
state_map[i]=(unsigned char) MY_LEX_IDENT;
91
81
else if (my_isspace(cs,i))
92
82
state_map[i]=(unsigned char) MY_LEX_SKIP;
129
static bool charset_initialized= false;
119
#define MY_MAX_ALLOWED_BUF 1024*1024
120
#define MY_CHARSET_INDEX "Index.xml"
122
const char *charsets_dir= NULL;
123
static int charset_initialized=0;
126
char *get_charsets_dir(char *buf)
128
const char *sharedir= SHAREDIR;
131
if (charsets_dir != NULL)
132
strmake(buf, charsets_dir, FN_REFLEN-1);
135
if (test_if_hard_path(sharedir) ||
136
is_prefix(sharedir, DEFAULT_CHARSET_HOME))
137
strxmov(buf, sharedir, "/", CHARSET_DIR, NULL);
139
strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
142
res= convert_dirname(buf,buf,NULL);
131
146
CHARSET_INFO *all_charsets[256];
132
147
const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
137
152
cs->state|= MY_CS_AVAILABLE;
140
void *cs_alloc(size_t size)
155
static void *cs_alloc(size_t size)
142
void *ptr= malloc(size);
144
memory_vector.push_back(ptr);
157
return my_once_alloc(size, MYF(MY_WME));
151
161
static bool init_available_charsets(myf myflags)
163
char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
155
166
We have to use charset_initialized to not lock on THR_LOCK_charset
156
167
inside get_internal_charset...
158
if (charset_initialized == false)
169
if (!charset_initialized)
160
171
CHARSET_INFO **cs;
161
memset(&all_charsets, 0, sizeof(all_charsets));
162
init_compiled_charsets(myflags);
164
/* Copy compiled charsets */
165
for (cs=all_charsets;
166
cs < all_charsets+array_elements(all_charsets)-1 ;
173
To make things thread safe we are not allowing other threads to interfere
174
while we may changing the cs_info_table
176
pthread_mutex_lock(&THR_LOCK_charset);
177
if (!charset_initialized)
179
memset(&all_charsets, 0, sizeof(all_charsets));
180
init_compiled_charsets(myflags);
182
/* Copy compiled charsets */
183
for (cs=all_charsets;
184
cs < all_charsets+array_elements(all_charsets)-1 ;
172
if (init_state_maps(*cs))
190
if (init_state_maps(*cs))
195
my_stpcpy(get_charsets_dir(fname), MY_CHARSET_INDEX);
196
charset_initialized=1;
177
charset_initialized= true;
198
pthread_mutex_unlock(&THR_LOCK_charset);
179
assert(charset_initialized);
185
204
void free_charsets(void)
187
charset_initialized= true;
189
while (memory_vector.empty() == false)
191
void *ptr= memory_vector.back();
192
memory_vector.pop_back();
195
memory_vector.clear();
206
charset_initialized=0;
258
269
cs->state|= MY_CS_READY;
271
pthread_mutex_unlock(&THR_LOCK_charset);
265
const CHARSET_INFO *get_charset(uint32_t cs_number)
276
const const CHARSET_INFO *get_charset(uint32_t cs_number, myf flags)
267
278
const CHARSET_INFO *cs;
268
279
if (cs_number == default_charset_info->number)
269
280
return default_charset_info;
271
282
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
273
284
if (!cs_number || cs_number >= array_elements(all_charsets)-1)
276
287
cs= get_internal_charset(cs_number);
289
if (!cs && (flags & MY_WME))
291
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
292
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
294
int10_to_str(cs_number, cs_string+1, 10);
295
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
281
const CHARSET_INFO *get_charset_by_name(const char *cs_name)
300
const CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
283
302
uint32_t cs_number;
284
303
const CHARSET_INFO *cs;
285
304
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
287
cs_number= get_collation_number(cs_name);
306
cs_number=get_collation_number(cs_name);
288
307
cs= cs_number ? get_internal_charset(cs_number) : NULL;
309
if (!cs && (flags & MY_WME))
311
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
312
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
313
my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file);
294
const CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint32_t cs_flags)
320
const CHARSET_INFO *get_charset_by_csname(const char *cs_name,
296
324
uint32_t cs_number;
297
325
const CHARSET_INFO *cs;
301
329
cs_number= get_charset_number(cs_name, cs_flags);
302
330
cs= cs_number ? get_internal_charset(cs_number) : NULL;
332
if (!cs && (flags & MY_WME))
334
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
335
my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
336
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
344
Resolve character set by the character set name (utf8, latin1, ...).
346
The function tries to resolve character set by the specified name. If
347
there is character set with the given name, it is assigned to the "cs"
348
parameter and false is returned. If there is no such character set,
349
"default_cs" is assigned to the "cs" and true is returned.
351
@param[in] cs_name Character set name.
352
@param[in] default_cs Default character set.
353
@param[out] cs Variable to store character set.
355
@return false if character set was resolved successfully; true if there
356
is no character set with given name.
359
bool resolve_charset(const char *cs_name,
360
const CHARSET_INFO *default_cs,
361
const CHARSET_INFO **cs)
363
*cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
376
Resolve collation by the collation name (utf8_general_ci, ...).
378
The function tries to resolve collation by the specified name. If there
379
is collation with the given name, it is assigned to the "cl" parameter
380
and false is returned. If there is no such collation, "default_cl" is
381
assigned to the "cl" and true is returned.
383
@param[out] cl Variable to store collation.
384
@param[in] cl_name Collation name.
385
@param[in] default_cl Default collation.
387
@return false if collation was resolved successfully; true if there is no
388
collation with given name.
391
bool resolve_collation(const char *cl_name,
392
const CHARSET_INFO *default_cl,
393
const CHARSET_INFO **cl)
395
*cl= get_charset_by_name(cl_name, MYF(0));
407
#ifdef BACKSLASH_MBTAIL
408
static CHARSET_INFO *fs_cset_cache= NULL;
410
CHARSET_INFO *fs_character_set()
415
GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
416
buf+2, sizeof(buf)-3);
418
We cannot call get_charset_by_name here
419
because fs_character_set() is executed before
420
LOCK_THD_charset mutex initialization, which
421
is used inside get_charset_by_name.
422
As we're now interested in cp932 only,
423
let's just detect it using strcmp().
425
fs_cset_cache= !strcmp(buf, "cp932") ?
426
&my_charset_cp932_japanese_ci : &my_charset_bin;
428
return fs_cset_cache;
309
433
Escape apostrophes by doubling them up