1
by brian
clean slate |
1 |
/* Copyright (C) 2004 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
14 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
15 |
||
16 |
#include <stdio.h> |
|
17 |
#include <stdlib.h> |
|
18 |
#include <string.h> |
|
19 |
||
20 |
typedef unsigned char uchar; |
|
21 |
typedef unsigned short uint16; |
|
22 |
||
23 |
struct uca_item_st |
|
24 |
{
|
|
25 |
uchar num; |
|
26 |
uint16 weight[4][9]; |
|
27 |
};
|
|
28 |
||
29 |
#if 0
|
|
30 |
#define MY_UCA_NPAGES 1024
|
|
31 |
#define MY_UCA_NCHARS 64
|
|
32 |
#define MY_UCA_CMASK 63
|
|
33 |
#define MY_UCA_PSHIFT 6
|
|
34 |
#else
|
|
35 |
#define MY_UCA_NPAGES 256
|
|
36 |
#define MY_UCA_NCHARS 256
|
|
37 |
#define MY_UCA_CMASK 255
|
|
38 |
#define MY_UCA_PSHIFT 8
|
|
39 |
#endif
|
|
40 |
||
41 |
static char *pname[]= {"", "2", "3"}; |
|
42 |
||
43 |
int main(int ac, char **av) |
|
44 |
{
|
|
45 |
char str[256]; |
|
46 |
char *weights[64]; |
|
47 |
struct uca_item_st uca[64*1024]; |
|
48 |
size_t code, w; |
|
49 |
int pageloaded[MY_UCA_NPAGES]; |
|
50 |
||
51 |
bzero(uca, sizeof(uca)); |
|
52 |
bzero(pageloaded, sizeof(pageloaded)); |
|
53 |
||
54 |
while (fgets(str,sizeof(str),stdin)) |
|
55 |
{
|
|
56 |
char *comment; |
|
57 |
char *weight; |
|
58 |
char *s; |
|
59 |
size_t codenum; |
|
60 |
||
61 |
code= strtol(str,NULL,16); |
|
62 |
||
63 |
if (str[0]=='#' || (code > 0xFFFF)) |
|
64 |
continue; |
|
65 |
if ((comment=strchr(str,'#'))) |
|
66 |
{
|
|
67 |
*comment++= '\0'; |
|
68 |
for ( ; *comment==' ' ; comment++); |
|
69 |
}else |
|
70 |
continue; |
|
71 |
||
72 |
if ((weight=strchr(str,';'))) |
|
73 |
{
|
|
74 |
*weight++= '\0'; |
|
75 |
for ( ; *weight==' ' ; weight++); |
|
76 |
}
|
|
77 |
else
|
|
78 |
continue; |
|
79 |
||
80 |
codenum= 0; |
|
81 |
s= strtok(str, " \t"); |
|
82 |
while (s) |
|
83 |
{
|
|
84 |
s= strtok(NULL, " \t"); |
|
85 |
codenum++; |
|
86 |
}
|
|
87 |
||
88 |
if (codenum>1) |
|
89 |
{
|
|
90 |
/* Multi-character weight,
|
|
91 |
i.e. contraction.
|
|
92 |
Not supported yet.
|
|
93 |
*/
|
|
94 |
continue; |
|
95 |
}
|
|
96 |
||
97 |
uca[code].num= 0; |
|
98 |
s= strtok(weight, " []"); |
|
99 |
while (s) |
|
100 |
{
|
|
101 |
weights[uca[code].num]= s; |
|
102 |
s= strtok(NULL, " []"); |
|
103 |
uca[code].num++; |
|
104 |
}
|
|
105 |
||
106 |
for (w=0; w < uca[code].num; w++) |
|
107 |
{
|
|
108 |
size_t partnum; |
|
109 |
||
110 |
partnum= 0; |
|
111 |
s= weights[w]; |
|
112 |
while (*s) |
|
113 |
{
|
|
114 |
char *endptr; |
|
115 |
size_t part; |
|
116 |
part= strtol(s+1,&endptr,16); |
|
117 |
uca[code].weight[partnum][w]= part; |
|
118 |
s= endptr; |
|
119 |
partnum++; |
|
120 |
}
|
|
121 |
}
|
|
122 |
/* Mark that a character from this page was loaded */
|
|
123 |
pageloaded[code >> MY_UCA_PSHIFT]++; |
|
124 |
}
|
|
125 |
||
126 |
||
127 |
||
128 |
/* Now set implicit weights */
|
|
129 |
for (code=0; code <= 0xFFFF; code++) |
|
130 |
{
|
|
131 |
size_t base, aaaa, bbbb; |
|
132 |
||
133 |
if (uca[code].num) |
|
134 |
continue; |
|
135 |
||
136 |
/*
|
|
137 |
3400;<CJK Ideograph Extension A, First>
|
|
138 |
4DB5;<CJK Ideograph Extension A, Last>
|
|
139 |
4E00;<CJK Ideograph, First>
|
|
140 |
9FA5;<CJK Ideograph, Last>
|
|
141 |
*/
|
|
142 |
||
143 |
if (code >= 0x3400 && code <= 0x4DB5) |
|
144 |
base= 0xFB80; |
|
145 |
else if (code >= 0x4E00 && code <= 0x9FA5) |
|
146 |
base= 0xFB40; |
|
147 |
else
|
|
148 |
base= 0xFBC0; |
|
149 |
||
150 |
aaaa= base + (code >> 15); |
|
151 |
bbbb= (code & 0x7FFF) | 0x8000; |
|
152 |
uca[code].weight[0][0]= aaaa; |
|
153 |
uca[code].weight[0][1]= bbbb; |
|
154 |
||
155 |
uca[code].weight[1][0]= 0x0020; |
|
156 |
uca[code].weight[1][1]= 0x0000; |
|
157 |
||
158 |
uca[code].weight[2][0]= 0x0002; |
|
159 |
uca[code].weight[2][1]= 0x0000; |
|
160 |
||
161 |
uca[code].weight[3][0]= 0x0001; |
|
162 |
uca[code].weight[3][2]= 0x0000; |
|
163 |
||
164 |
uca[code].num= 2; |
|
165 |
}
|
|
166 |
||
167 |
printf("#include \"my_uca.h\"\n"); |
|
168 |
||
169 |
printf("#define MY_UCA_NPAGES %d\n",MY_UCA_NPAGES); |
|
170 |
printf("#define MY_UCA_NCHARS %d\n",MY_UCA_NCHARS); |
|
171 |
printf("#define MY_UCA_CMASK %d\n",MY_UCA_CMASK); |
|
172 |
printf("#define MY_UCA_PSHIFT %d\n",MY_UCA_PSHIFT); |
|
173 |
||
174 |
for (w=0; w<3; w++) |
|
175 |
{
|
|
176 |
size_t page; |
|
177 |
int pagemaxlen[MY_UCA_NPAGES]; |
|
178 |
||
179 |
for (page=0; page < MY_UCA_NPAGES; page++) |
|
180 |
{
|
|
181 |
size_t offs; |
|
182 |
size_t maxnum= 0; |
|
183 |
size_t nchars= 0; |
|
184 |
size_t mchars; |
|
185 |
size_t ndefs= 0; |
|
186 |
||
187 |
pagemaxlen[page]= 0; |
|
188 |
||
189 |
/*
|
|
190 |
Skip this page if no weights were loaded
|
|
191 |
*/
|
|
192 |
||
193 |
if (!pageloaded[page]) |
|
194 |
continue; |
|
195 |
||
196 |
/*
|
|
197 |
Calculate maximum weight
|
|
198 |
length for this page
|
|
199 |
*/
|
|
200 |
||
201 |
for (offs=0; offs < MY_UCA_NCHARS; offs++) |
|
202 |
{
|
|
203 |
size_t i, num; |
|
204 |
||
205 |
code= page*MY_UCA_NCHARS+offs; |
|
206 |
||
207 |
/* Calculate only non-zero weights */
|
|
208 |
for (num=0, i=0; i < uca[code].num; i++) |
|
209 |
if (uca[code].weight[w][i]) |
|
210 |
num++; |
|
211 |
||
212 |
maxnum= maxnum < num ? num : maxnum; |
|
213 |
||
214 |
/* Check if default weight */
|
|
215 |
if (w == 1 && num == 1) |
|
216 |
{
|
|
217 |
/* 0020 0000 ... */
|
|
218 |
if (uca[code].weight[w][0] == 0x0020) |
|
219 |
ndefs++; |
|
220 |
}
|
|
221 |
else if (w == 2 && num == 1) |
|
222 |
{
|
|
223 |
/* 0002 0000 ... */
|
|
224 |
if (uca[code].weight[w][0] == 0x0002) |
|
225 |
ndefs++; |
|
226 |
}
|
|
227 |
}
|
|
228 |
maxnum++; |
|
229 |
||
230 |
/*
|
|
231 |
If the page have only default weights
|
|
232 |
then no needs to dump it, skip.
|
|
233 |
*/
|
|
234 |
if (ndefs == MY_UCA_NCHARS) |
|
235 |
{
|
|
236 |
continue; |
|
237 |
}
|
|
238 |
switch (maxnum) |
|
239 |
{
|
|
240 |
case 0: mchars= 8; break; |
|
241 |
case 1: mchars= 8; break; |
|
242 |
case 2: mchars= 8; break; |
|
243 |
case 3: mchars= 9; break; |
|
244 |
case 4: mchars= 8; break; |
|
245 |
default: mchars= uca[code].num; |
|
246 |
}
|
|
247 |
||
248 |
pagemaxlen[page]= maxnum; |
|
249 |
||
250 |
||
251 |
/*
|
|
252 |
Now print this page
|
|
253 |
*/
|
|
254 |
||
255 |
||
256 |
printf("uint16 page%03Xdata%s[]= { /* %04X (%d weights per char) */\n", |
|
257 |
page, pname[w], page*MY_UCA_NCHARS, maxnum); |
|
258 |
||
259 |
for (offs=0; offs < MY_UCA_NCHARS; offs++) |
|
260 |
{
|
|
261 |
uint16 weight[8]; |
|
262 |
size_t num, i; |
|
263 |
||
264 |
code= page*MY_UCA_NCHARS+offs; |
|
265 |
||
266 |
bzero(weight,sizeof(weight)); |
|
267 |
||
268 |
/* Copy non-zero weights */
|
|
269 |
for (num=0, i=0; i < uca[code].num; i++) |
|
270 |
{
|
|
271 |
if (uca[code].weight[w][i]) |
|
272 |
{
|
|
273 |
weight[num]= uca[code].weight[w][i]; |
|
274 |
num++; |
|
275 |
}
|
|
276 |
}
|
|
277 |
||
278 |
for (i=0; i < maxnum; i++) |
|
279 |
{
|
|
280 |
/*
|
|
281 |
Invert weights for secondary level to
|
|
282 |
sort upper case letters before their
|
|
283 |
lower case counter part.
|
|
284 |
*/
|
|
285 |
int tmp= weight[i]; |
|
286 |
if (w == 2 && tmp) |
|
287 |
tmp= (int)(0x20 - weight[i]); |
|
288 |
||
289 |
||
290 |
printf("0x%04X", tmp); |
|
291 |
if ((offs+1 != MY_UCA_NCHARS) || (i+1!=maxnum)) |
|
292 |
printf(","); |
|
293 |
nchars++; |
|
294 |
}
|
|
295 |
if (nchars >=mchars) |
|
296 |
{
|
|
297 |
printf("\n"); |
|
298 |
nchars=0; |
|
299 |
}
|
|
300 |
else
|
|
301 |
{
|
|
302 |
printf(" "); |
|
303 |
}
|
|
304 |
}
|
|
305 |
printf("};\n\n"); |
|
306 |
}
|
|
307 |
||
308 |
printf("uchar uca_length%s[%d]={\n", pname[w], MY_UCA_NPAGES); |
|
309 |
for (page=0; page < MY_UCA_NPAGES; page++) |
|
310 |
{
|
|
311 |
printf("%d%s%s",pagemaxlen[page],page<MY_UCA_NPAGES-1?",":"",(page+1) % 16 ? "":"\n"); |
|
312 |
}
|
|
313 |
printf("};\n"); |
|
314 |
||
315 |
||
316 |
printf("uint16 *uca_weight%s[%d]={\n", pname[w], MY_UCA_NPAGES); |
|
317 |
for (page=0; page < MY_UCA_NPAGES; page++) |
|
318 |
{
|
|
319 |
const char *comma= page < MY_UCA_NPAGES-1 ? "," : ""; |
|
320 |
const char *nline= (page+1) % 4 ? "" : "\n"; |
|
321 |
if (!pagemaxlen[page]) |
|
322 |
printf("NULL %s%s%s", w ? " ": "", comma , nline); |
|
323 |
else
|
|
324 |
printf("page%03Xdata%s%s%s", page, pname[w], comma, nline); |
|
325 |
}
|
|
326 |
printf("};\n"); |
|
327 |
}
|
|
328 |
||
329 |
||
330 |
printf("int main(void){ return 0;};\n"); |
|
331 |
return 0; |
|
332 |
}
|