1
by brian
clean slate |
1 |
/* Copyright (C) 2006 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
14 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
15 |
||
16 |
/*
|
|
17 |
#include <stdio.h>
|
|
18 |
#include <stdlib.h>
|
|
19 |
#include <string.h>
|
|
20 |
*/
|
|
21 |
#include <my_global.h> |
|
22 |
#include <m_string.h> |
|
23 |
#include <m_ctype.h> |
|
24 |
#include "m_ctype.h" |
|
25 |
||
26 |
||
27 |
typedef struct my_ctype_name_st |
|
28 |
{
|
|
29 |
const char *name; |
|
30 |
int val; |
|
31 |
} MY_CTYPE_NAME_ST; |
|
32 |
||
33 |
||
34 |
static MY_CTYPE_NAME_ST my_ctype_name[]= |
|
35 |
{
|
|
36 |
{"Lu", _MY_U}, /* Letter, Uppercase */ |
|
37 |
{"Ll", _MY_L}, /* Letter, Lowercase */ |
|
38 |
{"Lt", _MY_U}, /* Letter, Titlecase */ |
|
39 |
{"Lm", _MY_L}, /* Letter, Modifier */ |
|
40 |
{"Lo", _MY_L}, /* Letter, other */ |
|
41 |
||
42 |
{"Nd", _MY_NMR}, /* Number, Decimal Digit */ |
|
43 |
{"Nl", _MY_NMR|_MY_U|_MY_L}, /* Number, Letter */ |
|
44 |
{"No", _MY_NMR|_MY_PNT}, /* Number, Other */ |
|
45 |
||
46 |
{"Mn", _MY_L|_MY_PNT}, /* Mark, Nonspacing */ |
|
47 |
{"Mc", _MY_L|_MY_PNT}, /* Mark, Spacing Combining */ |
|
48 |
{"Me", _MY_L|_MY_PNT}, /* Mark, Enclosing */ |
|
49 |
||
50 |
{"Pc", _MY_PNT}, /* Punctuation, Connector */ |
|
51 |
{"Pd", _MY_PNT}, /* Punctuation, Dash */ |
|
52 |
{"Ps", _MY_PNT}, /* Punctuation, Open */ |
|
53 |
{"Pe", _MY_PNT}, /* Punctuation, Close */ |
|
54 |
{"Pi", _MY_PNT}, /* Punctuation, Initial quote */ |
|
55 |
{"Pf", _MY_PNT}, /* Punctuation, Final quote */ |
|
56 |
{"Po", _MY_PNT}, /* Punctuation, Other */ |
|
57 |
||
58 |
{"Sm", _MY_PNT}, /* Symbol, Math */ |
|
59 |
{"Sc", _MY_PNT}, /* Symbol, Currency */ |
|
60 |
{"Sk", _MY_PNT}, /* Symbol, Modifier */ |
|
61 |
{"So", _MY_PNT}, /* Symbol, Other */ |
|
62 |
||
63 |
{"Zs", _MY_SPC}, /* Separator, Space */ |
|
64 |
{"Zl", _MY_SPC}, /* Separator, Line */ |
|
65 |
{"Zp", _MY_SPC}, /* Separator, Paragraph */ |
|
66 |
||
67 |
{"Cc", _MY_CTR}, /* Other, Control */ |
|
68 |
{"Cf", _MY_CTR}, /* Other, Format */ |
|
69 |
{"Cs", _MY_CTR}, /* Other, Surrogate */ |
|
70 |
{"Co", _MY_CTR}, /* Other, Private Use */ |
|
71 |
{"Cn", _MY_CTR}, /* Other, Not Assigned */ |
|
72 |
{NULL, 0} |
|
73 |
};
|
|
74 |
||
75 |
||
76 |
static int |
|
77 |
ctypestr2num(const char *tok) |
|
78 |
{
|
|
79 |
MY_CTYPE_NAME_ST *p; |
|
80 |
for (p= my_ctype_name; p->name; p++) |
|
81 |
{
|
|
82 |
if (!strncasecmp(p->name, tok, 2)) |
|
83 |
return p->val; |
|
84 |
}
|
|
85 |
return 0; |
|
86 |
}
|
|
87 |
||
88 |
||
89 |
int main(int ac, char ** av) |
|
90 |
{
|
|
91 |
char str[1024]; |
|
92 |
unsigned char ctypea[64*1024]; |
|
93 |
size_t i; |
|
94 |
size_t plane; |
|
95 |
MY_UNI_CTYPE uctype[256]; |
|
96 |
FILE *f= stdin; |
|
97 |
||
98 |
if (ac > 1 && av[1] && !(f= fopen(av[1],"r"))) |
|
99 |
{
|
|
100 |
fprintf(stderr, "Can't open file %s\n", av[1]); |
|
101 |
exit(1); |
|
102 |
}
|
|
103 |
bzero(&ctypea,sizeof(ctypea)); |
|
104 |
bzero(&uctype, sizeof(uctype)); |
|
105 |
||
106 |
printf("/*\n"); |
|
107 |
printf(" Unicode ctype data\n"); |
|
108 |
printf(" Generated from %s\n", av[1] ? av[1] : "stdin"); |
|
109 |
printf("*/\n"); |
|
110 |
||
111 |
while(fgets(str, sizeof(str), f)) |
|
112 |
{
|
|
113 |
size_t n= 0, code= 0; |
|
114 |
char *s,*e; |
|
115 |
int ctype= 0; |
|
116 |
||
117 |
for(s= str; s; ) |
|
118 |
{
|
|
119 |
char *end; |
|
120 |
char tok[1024]=""; |
|
121 |
e=strchr(s,';'); |
|
122 |
if(e) |
|
123 |
{
|
|
124 |
strncpy(tok,s,(unsigned int)(e-s)); |
|
125 |
tok[e-s]=0; |
|
126 |
}
|
|
127 |
else
|
|
128 |
{
|
|
129 |
strcpy(tok,s); |
|
130 |
}
|
|
131 |
||
132 |
end=tok+strlen(tok); |
|
133 |
||
134 |
switch(n) |
|
135 |
{
|
|
136 |
case 0: code= strtol(tok,&end,16);break; |
|
137 |
case 2: ctype= ctypestr2num(tok);break; |
|
138 |
}
|
|
139 |
||
140 |
n++; |
|
141 |
if(e) s=e+1; |
|
142 |
else s=e; |
|
143 |
}
|
|
144 |
if(code<=0xFFFF) |
|
145 |
{
|
|
146 |
ctypea[code]= ctype; |
|
147 |
}
|
|
148 |
}
|
|
149 |
||
150 |
/* Fill digits */
|
|
151 |
for (i= '0'; i <= '9'; i++) |
|
152 |
ctypea[i]= _MY_NMR; |
|
153 |
||
154 |
for (i= 'a'; i <= 'z'; i++) |
|
155 |
ctypea[i]|= _MY_X; |
|
156 |
for (i= 'A'; i <= 'Z'; i++) |
|
157 |
ctypea[i]|= _MY_X; |
|
158 |
||
159 |
||
160 |
/* Fill ideographs */
|
|
161 |
||
162 |
/* CJK Ideographs Extension A (U+3400 - U+4DB5) */
|
|
163 |
for(i=0x3400;i<=0x4DB5;i++) |
|
164 |
{
|
|
165 |
ctypea[i]= _MY_L | _MY_U; |
|
166 |
}
|
|
167 |
||
168 |
/* CJK Ideographs (U+4E00 - U+9FA5) */
|
|
169 |
for(i=0x4E00;i<=0x9FA5;i++){ |
|
170 |
ctypea[i]= _MY_L | _MY_U; |
|
171 |
}
|
|
172 |
||
173 |
/* Hangul Syllables (U+AC00 - U+D7A3) */
|
|
174 |
for(i=0xAC00;i<=0xD7A3;i++) |
|
175 |
{
|
|
176 |
ctypea[i]= _MY_L | _MY_U; |
|
177 |
}
|
|
178 |
||
179 |
||
180 |
/* Calc plane parameters */
|
|
181 |
for(plane=0;plane<256;plane++) |
|
182 |
{
|
|
183 |
size_t character; |
|
184 |
uctype[plane].ctype= ctypea+plane*256; |
|
185 |
||
186 |
uctype[plane].pctype= uctype[plane].ctype[0]; |
|
187 |
for(character=1;character<256;character++) |
|
188 |
{
|
|
189 |
if (uctype[plane].ctype[character] != uctype[plane].pctype) |
|
190 |
{
|
|
191 |
uctype[plane].pctype= 0; /* Mixed plane */ |
|
192 |
break; |
|
193 |
}
|
|
194 |
}
|
|
195 |
if (character==256) /* All the same, no needs to dump whole plane */ |
|
196 |
uctype[plane].ctype= NULL; |
|
197 |
}
|
|
198 |
||
199 |
/* Dump mixed planes */
|
|
200 |
for(plane=0;plane<256;plane++) |
|
201 |
{
|
|
202 |
if(uctype[plane].ctype) |
|
203 |
{
|
|
204 |
int charnum=0; |
|
205 |
int num=0; |
|
206 |
||
207 |
printf("static unsigned char uctype_page%02X[256]=\n{\n",plane); |
|
208 |
||
209 |
for(charnum=0;charnum<256;charnum++) |
|
210 |
{
|
|
211 |
int cod; |
|
212 |
||
213 |
cod=(plane<<8)+charnum; |
|
214 |
printf(" %2d%s",uctype[plane].ctype[charnum],charnum<255?",":""); |
|
215 |
||
216 |
num++; |
|
217 |
if(num==16) |
|
218 |
{
|
|
219 |
printf("\n"); |
|
220 |
num=0; |
|
221 |
}
|
|
222 |
}
|
|
223 |
printf("};\n\n"); |
|
224 |
}
|
|
225 |
}
|
|
226 |
||
227 |
||
228 |
/* Dump plane index */
|
|
229 |
printf("MY_UNI_CTYPE my_uni_ctype[256]={\n"); |
|
230 |
for(plane=0;plane<256;plane++) |
|
231 |
{
|
|
232 |
char plane_name[128]="NULL"; |
|
233 |
if(uctype[plane].ctype){ |
|
234 |
sprintf(plane_name,"uctype_page%02X",plane); |
|
235 |
}
|
|
236 |
printf("\t{%d,%s}%s\n",uctype[plane].pctype,plane_name,plane<255?",":""); |
|
237 |
}
|
|
238 |
printf("};\n"); |
|
239 |
||
240 |
return 0; |
|
241 |
}
|