~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
212.5.39 by Monty Taylor
Phew. Moved my_base and my_global.
16
#include <m_string.h>
1 by brian
clean slate
17
#include <m_ctype.h>
18
19
20
/*
21
22
  This files implements routines which parse XML based
23
  character set and collation description files.
24
  
25
  Unicode collations are encoded according to
26
  
27
    Unicode Technical Standard #35
28
    Locale Data Markup Language (LDML)
29
    http://www.unicode.org/reports/tr35/
30
  
31
  and converted into ICU string according to
32
  
33
    Collation Customization
34
    http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
35
  
36
*/
37
38
struct my_cs_file_section_st
39
{
40
  int        state;
41
  const char *str;
42
};
43
44
#define _CS_MISC	1
45
#define _CS_ID		2
46
#define _CS_CSNAME	3
47
#define _CS_FAMILY	4
48
#define _CS_ORDER	5
49
#define _CS_COLNAME	6
50
#define _CS_FLAG	7
51
#define _CS_CHARSET	8
52
#define _CS_COLLATION	9
53
#define _CS_UPPERMAP	10
54
#define _CS_LOWERMAP	11
55
#define _CS_UNIMAP	12
56
#define _CS_COLLMAP	13
57
#define _CS_CTYPEMAP	14
58
#define _CS_PRIMARY_ID	15
59
#define _CS_BINARY_ID	16
60
#define _CS_CSDESCRIPT	17
61
#define _CS_RESET	18
62
#define	_CS_DIFF1	19
63
#define	_CS_DIFF2	20
64
#define	_CS_DIFF3	21
65
66
67
#define MY_CS_CSDESCR_SIZE	64
68
#define MY_CS_TAILORING_SIZE	1024
69
70
typedef struct my_cs_file_info
71
{
72
  char   csname[MY_CS_NAME_SIZE];
73
  char   name[MY_CS_NAME_SIZE];
74
  uchar  ctype[MY_CS_CTYPE_TABLE_SIZE];
75
  uchar  to_lower[MY_CS_TO_LOWER_TABLE_SIZE];
76
  uchar  to_upper[MY_CS_TO_UPPER_TABLE_SIZE];
77
  uchar  sort_order[MY_CS_SORT_ORDER_TABLE_SIZE];
206 by Brian Aker
Removed final uint dead types.
78
  uint16_t tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE];
1 by brian
clean slate
79
  char   comment[MY_CS_CSDESCR_SIZE];
80
  char   tailoring[MY_CS_TAILORING_SIZE];
81
  size_t tailoring_length;
82
  CHARSET_INFO cs;
83
  int (*add_collation)(CHARSET_INFO *cs);
84
} MY_CHARSET_LOADER;
85
86
87
88
/*
89
  Check repertoire: detect pure ascii strings
90
*/
91
uint
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
92
my_string_repertoire(const CHARSET_INFO * const cs, const char *str, ulong length)
1 by brian
clean slate
93
{
94
  const char *strend= str + length;
95
  if (cs->mbminlen == 1)
96
  {
97
    for ( ; str < strend; str++)
98
    {
99
      if (((uchar) *str) > 0x7F)
100
        return MY_REPERTOIRE_UNICODE30;
101
    }
102
  }
103
  else
104
  {
105
    my_wc_t wc;
106
    int chlen;
53.2.11 by Monty Taylor
Added cast for a type-signedness problem.
107
    for (; (chlen= cs->cset->mb_wc(cs, &wc, (uchar *)str, (uchar *)strend)) > 0; str+= chlen)
1 by brian
clean slate
108
    {
109
      if (wc > 0x7F)
110
        return MY_REPERTOIRE_UNICODE30;
111
    }
112
  }
113
  return MY_REPERTOIRE_ASCII;
114
}
115
116
117
/*
118
  Detect whether a character set is ASCII compatible.
119
163 by Brian Aker
Merge Monty's code.
120
  Returns true for:
1 by brian
clean slate
121
  
122
  - all 8bit character sets whose Unicode mapping of 0x7B is '{'
123
    (ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
124
  
125
  - all multi-byte character sets having mbminlen == 1
126
    (ignores ucs2 whose mbminlen is 2)
127
  
128
  TODO:
129
  
130
  When merging to 5.2, this function should be changed
131
  to check a new flag MY_CS_NONASCII, 
132
  
133
     return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
134
  
135
  This flag was previously added into 5.2 under terms
136
  of WL#3759 "Optimize identifier conversion in client-server protocol"
137
  especially to mark character sets not compatible with ASCII.
138
  
139
  We won't backport this flag to 5.0 or 5.1.
140
  This function is Ok for 5.0 and 5.1, because we're not going
141
  to introduce new tricky character sets between 5.0 and 5.2.
142
*/
276 by Brian Aker
Cleaned out my_bool from strings.
143
bool
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
144
my_charset_is_ascii_based(const CHARSET_INFO * const cs)
1 by brian
clean slate
145
{
146
  return 
147
    (cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
148
    (cs->mbminlen == 1 && cs->mbmaxlen > 1);
149
}
150
151
152
/*
153
  Detect if a character set is 8bit,
154
  and it is pure ascii, i.e. doesn't have
155
  characters outside U+0000..U+007F
156
  This functions is shared between "conf_to_src"
157
  and dynamic charsets loader in "mysqld".
158
*/
276 by Brian Aker
Cleaned out my_bool from strings.
159
bool
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
160
my_charset_is_8bit_pure_ascii(const CHARSET_INFO * const cs)
1 by brian
clean slate
161
{
162
  size_t code;
163
  if (!cs->tab_to_uni)
164
    return 0;
165
  for (code= 0; code < 256; code++)
166
  {
167
    if (cs->tab_to_uni[code] > 0x7F)
168
      return 0;
169
  }
170
  return 1;
171
}
172
173
174
/*
175
  Shared function between conf_to_src and mysys.
176
  Check if a 8bit character set is compatible with
177
  ascii on the range 0x00..0x7F.
178
*/
276 by Brian Aker
Cleaned out my_bool from strings.
179
bool
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
180
my_charset_is_ascii_compatible(const CHARSET_INFO * const cs)
1 by brian
clean slate
181
{
182
  uint i;
183
  if (!cs->tab_to_uni)
184
    return 1;
185
  for (i= 0; i < 128; i++)
186
  {
187
    if (cs->tab_to_uni[i] != i)
188
      return 0;
189
  }
190
  return 1;
191
}