12
12
You should have received a copy of the GNU General Public License
13
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
18
#include <drizzled/charset.h>
19
#include <drizzled/error.h>
20
#include <drizzled/charset_info.h>
21
#include <drizzled/internal/m_string.h>
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
#include "mysys_priv.h"
17
#include "mysys_err.h"
18
#include <mystrings/m_ctype.h>
19
#include <mystrings/m_string.h>
22
20
#include <drizzled/configmake.h>
25
#include <drizzled/visibility.h>
33
We collect memory in this vector that we free on delete.
35
static vector<unsigned char*> memory_vector;
38
24
The code below implements this functionality:
54
40
get_collation_number_internal(const char *name)
56
for (CHARSET_INFO **cs= all_charsets;
57
cs < all_charsets+array_elements(all_charsets)-1;
43
for (cs= all_charsets;
44
cs < all_charsets+array_elements(all_charsets)-1 ;
60
if ( cs[0] && cs[0]->name && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
47
if ( cs[0] && cs[0]->name &&
48
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
62
49
return cs[0]->number;
68
static unsigned char *cs_alloc(size_t size)
70
memory_vector.push_back(new unsigned char[size]);
71
return memory_vector.back();
74
55
static bool init_state_maps(CHARSET_INFO *cs)
76
if (!(cs->state_map= cs_alloc(256)))
58
unsigned char *state_map;
59
unsigned char *ident_map;
61
if (!(cs->state_map= (unsigned char*) malloc(256)))
79
if (!(cs->ident_map= cs_alloc(256)))
64
if (!(cs->ident_map= (unsigned char*) malloc(256)))
82
unsigned char *state_map= cs->state_map;
83
unsigned char *ident_map= cs->ident_map;
67
state_map= cs->state_map;
68
ident_map= cs->ident_map;
85
70
/* Fill state_map with states to get a faster parser */
86
for (int i= 0; i < 256; i++)
71
for (i=0; i < 256 ; i++)
88
73
if (my_isalpha(cs,i))
89
state_map[i]= MY_LEX_IDENT;
74
state_map[i]=(unsigned char) MY_LEX_IDENT;
90
75
else if (my_isdigit(cs,i))
91
state_map[i]= MY_LEX_NUMBER_IDENT;
76
state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT;
77
#if defined(USE_MB) && defined(USE_MB_IDENT)
92
78
else if (my_mbcharlen(cs, i)>1)
93
state_map[i]= MY_LEX_IDENT;
79
state_map[i]=(unsigned char) MY_LEX_IDENT;
94
81
else if (my_isspace(cs,i))
95
state_map[i]= MY_LEX_SKIP;
82
state_map[i]=(unsigned char) MY_LEX_SKIP;
97
state_map[i]= MY_LEX_CHAR;
84
state_map[i]=(unsigned char) MY_LEX_CHAR;
99
state_map['_']=state_map['$']= MY_LEX_IDENT;
100
state_map['\'']= MY_LEX_STRING;
101
state_map['.']= MY_LEX_REAL_OR_POINT;
102
state_map['>']=state_map['=']=state_map['!']= MY_LEX_CMP_OP;
103
state_map['<']= MY_LEX_LONG_CMP_OP;
104
state_map['&']=state_map['|']= MY_LEX_BOOL;
105
state_map['#']= MY_LEX_COMMENT;
106
state_map[';']= MY_LEX_SEMICOLON;
107
state_map[':']= MY_LEX_SET_VAR;
108
state_map[0]= MY_LEX_EOL;
109
state_map['\\']= MY_LEX_ESCAPE;
110
state_map['/']= MY_LEX_LONG_COMMENT;
111
state_map['*']= MY_LEX_END_LONG_COMMENT;
112
state_map['@']= MY_LEX_USER_END;
113
state_map['`']= MY_LEX_USER_VARIABLE_DELIMITER;
114
state_map['"']= MY_LEX_STRING_OR_DELIMITER;
86
state_map[(unsigned char)'_']=state_map[(unsigned char)'$']=(unsigned char) MY_LEX_IDENT;
87
state_map[(unsigned char)'\'']=(unsigned char) MY_LEX_STRING;
88
state_map[(unsigned char)'.']=(unsigned char) MY_LEX_REAL_OR_POINT;
89
state_map[(unsigned char)'>']=state_map[(unsigned char)'=']=state_map[(unsigned char)'!']= (unsigned char) MY_LEX_CMP_OP;
90
state_map[(unsigned char)'<']= (unsigned char) MY_LEX_LONG_CMP_OP;
91
state_map[(unsigned char)'&']=state_map[(unsigned char)'|']=(unsigned char) MY_LEX_BOOL;
92
state_map[(unsigned char)'#']=(unsigned char) MY_LEX_COMMENT;
93
state_map[(unsigned char)';']=(unsigned char) MY_LEX_SEMICOLON;
94
state_map[(unsigned char)':']=(unsigned char) MY_LEX_SET_VAR;
95
state_map[0]=(unsigned char) MY_LEX_EOL;
96
state_map[(unsigned char)'\\']= (unsigned char) MY_LEX_ESCAPE;
97
state_map[(unsigned char)'/']= (unsigned char) MY_LEX_LONG_COMMENT;
98
state_map[(unsigned char)'*']= (unsigned char) MY_LEX_END_LONG_COMMENT;
99
state_map[(unsigned char)'@']= (unsigned char) MY_LEX_USER_END;
100
state_map[(unsigned char) '`']= (unsigned char) MY_LEX_USER_VARIABLE_DELIMITER;
101
state_map[(unsigned char)'"']= (unsigned char) MY_LEX_STRING_OR_DELIMITER;
117
104
Create a second map to make it faster to find identifiers
119
for (int i= 0; i < 256; i++)
106
for (i=0; i < 256 ; i++)
121
ident_map[i]= state_map[i] == MY_LEX_IDENT || state_map[i] == MY_LEX_NUMBER_IDENT;
108
ident_map[i]= (unsigned char) (state_map[i] == MY_LEX_IDENT ||
109
state_map[i] == MY_LEX_NUMBER_IDENT);
124
112
/* Special handling of hex and binary strings */
125
state_map['x']= state_map['X']= MY_LEX_IDENT_OR_HEX;
126
state_map['b']= state_map['B']= MY_LEX_IDENT_OR_BIN;
113
state_map[(unsigned char)'x']= state_map[(unsigned char)'X']= (unsigned char) MY_LEX_IDENT_OR_HEX;
114
state_map[(unsigned char)'b']= state_map[(unsigned char)'B']= (unsigned char) MY_LEX_IDENT_OR_BIN;
130
119
static bool charset_initialized= false;
132
DRIZZLED_API CHARSET_INFO *all_charsets[256];
133
const DRIZZLED_API CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
121
CHARSET_INFO *all_charsets[256];
122
const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
135
124
void add_compiled_collation(CHARSET_INFO * cs)
285
Resolve character set by the character set name (utf8, latin1, ...).
287
The function tries to resolve character set by the specified name. If
288
there is character set with the given name, it is assigned to the "cs"
289
parameter and false is returned. If there is no such character set,
290
"default_cs" is assigned to the "cs" and true is returned.
292
@param[in] cs_name Character set name.
293
@param[in] default_cs Default character set.
294
@param[out] cs Variable to store character set.
296
@return false if character set was resolved successfully; true if there
297
is no character set with given name.
300
bool resolve_charset(const char *cs_name,
301
const CHARSET_INFO *default_cs,
302
const CHARSET_INFO **cs)
304
*cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY);
317
Resolve collation by the collation name (utf8_general_ci, ...).
319
The function tries to resolve collation by the specified name. If there
320
is collation with the given name, it is assigned to the "cl" parameter
321
and false is returned. If there is no such collation, "default_cl" is
322
assigned to the "cl" and true is returned.
324
@param[out] cl Variable to store collation.
325
@param[in] cl_name Collation name.
326
@param[in] default_cl Default collation.
328
@return false if collation was resolved successfully; true if there is no
329
collation with given name.
332
bool resolve_collation(const char *cl_name,
333
const CHARSET_INFO *default_cl,
334
const CHARSET_INFO **cl)
336
*cl= get_charset_by_name(cl_name);
348
#ifdef BACKSLASH_MBTAIL
349
static CHARSET_INFO *fs_cset_cache= NULL;
351
CHARSET_INFO *fs_character_set()
356
GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
357
buf+2, sizeof(buf)-3);
359
We cannot call get_charset_by_name here
360
because fs_character_set() is executed before
361
LOCK_THD_charset mutex initialization, which
362
is used inside get_charset_by_name.
363
As we're now interested in cp932 only,
364
let's just detect it using strcmp().
366
fs_cset_cache= !strcmp(buf, "cp932") ?
367
&my_charset_cp932_japanese_ci : &my_charset_bin;
369
return fs_cset_cache;
294
374
Escape apostrophes by doubling them up