~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2004 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
15
16
#include <stdio.h>
17
#include <stdlib.h>
18
#include <string.h>
19
20
typedef unsigned char uchar;
21
typedef unsigned short uint16;
22
23
struct uca_item_st
24
{
25
  uchar  num;
26
  uint16 weight[4][9];
27
};
28
29
#if 0
30
#define MY_UCA_NPAGES	1024
31
#define MY_UCA_NCHARS	64
32
#define MY_UCA_CMASK	63
33
#define MY_UCA_PSHIFT	6
34
#else
35
#define MY_UCA_NPAGES	256
36
#define MY_UCA_NCHARS	256
37
#define MY_UCA_CMASK	255
38
#define MY_UCA_PSHIFT	8
39
#endif
40
41
static char *pname[]= {"", "2", "3"};
42
43
int main(int ac, char **av)
44
{
45
  char str[256];
46
  char *weights[64];
47
  struct uca_item_st uca[64*1024];
48
  size_t code, w;
49
  int pageloaded[MY_UCA_NPAGES];
50
  
51
  bzero(uca, sizeof(uca));
52
  bzero(pageloaded, sizeof(pageloaded));
53
  
54
  while (fgets(str,sizeof(str),stdin))
55
  {
56
    char *comment;
57
    char *weight;
58
    char *s;
59
    size_t codenum;
60
    
61
    code= strtol(str,NULL,16);
62
    
63
    if (str[0]=='#' || (code > 0xFFFF))
64
      continue;
65
    if ((comment=strchr(str,'#')))
66
    {
67
      *comment++= '\0';
68
      for ( ; *comment==' ' ; comment++);
69
    }else
70
      continue;
71
    
72
    if ((weight=strchr(str,';')))
73
    {
74
      *weight++= '\0';
75
      for ( ; *weight==' ' ; weight++);
76
    }
77
    else
78
      continue;
79
    
80
    codenum= 0;
81
    s= strtok(str, " \t");
82
    while (s)
83
    {
84
      s= strtok(NULL, " \t");
85
      codenum++;
86
    }
87
    
88
    if (codenum>1)
89
    {
90
      /* Multi-character weight, 
91
         i.e. contraction. 
92
         Not supported yet.
93
      */
94
      continue;
95
    }
96
    
97
    uca[code].num= 0;
98
    s= strtok(weight, " []");
99
    while (s)
100
    {
101
      weights[uca[code].num]= s;
102
      s= strtok(NULL, " []");
103
      uca[code].num++;
104
    }
105
    
106
    for (w=0; w < uca[code].num; w++)
107
    {
108
      size_t partnum;
109
      
110
      partnum= 0;
111
      s= weights[w];
112
      while (*s)
113
      {
114
        char *endptr;
115
        size_t part;
116
        part= strtol(s+1,&endptr,16);
117
        uca[code].weight[partnum][w]= part;
118
        s= endptr;
119
        partnum++;
120
      }
121
    }
122
    /* Mark that a character from this page was loaded */
123
    pageloaded[code >> MY_UCA_PSHIFT]++;
124
  }
125
  
126
  
127
  
128
  /* Now set implicit weights */
129
  for (code=0; code <= 0xFFFF; code++)
130
  {
131
    size_t base, aaaa, bbbb;
132
    
133
    if (uca[code].num)
134
      continue;
135
    
136
    /*
137
    3400;<CJK Ideograph Extension A, First>
138
    4DB5;<CJK Ideograph Extension A, Last>
139
    4E00;<CJK Ideograph, First>
140
    9FA5;<CJK Ideograph, Last>
141
    */
142
    
143
    if (code >= 0x3400 && code <= 0x4DB5)
144
      base= 0xFB80;
145
    else if (code >= 0x4E00 && code <= 0x9FA5)
146
      base= 0xFB40;
147
    else
148
      base= 0xFBC0;
149
    
150
    aaaa= base +  (code >> 15);
151
    bbbb= (code & 0x7FFF) | 0x8000;
152
    uca[code].weight[0][0]= aaaa;
153
    uca[code].weight[0][1]= bbbb;
154
    
155
    uca[code].weight[1][0]= 0x0020;
156
    uca[code].weight[1][1]= 0x0000;
157
    
158
    uca[code].weight[2][0]= 0x0002;
159
    uca[code].weight[2][1]= 0x0000;
160
    
161
    uca[code].weight[3][0]= 0x0001;
162
    uca[code].weight[3][2]= 0x0000;
163
    
164
    uca[code].num= 2;
165
  }
166
  
167
  printf("#include \"my_uca.h\"\n");
168
  
169
  printf("#define MY_UCA_NPAGES %d\n",MY_UCA_NPAGES);
170
  printf("#define MY_UCA_NCHARS %d\n",MY_UCA_NCHARS);
171
  printf("#define MY_UCA_CMASK  %d\n",MY_UCA_CMASK);
172
  printf("#define MY_UCA_PSHIFT %d\n",MY_UCA_PSHIFT);
173
  
174
  for (w=0; w<3; w++)
175
  {
176
    size_t page;
177
    int pagemaxlen[MY_UCA_NPAGES];
178
179
    for (page=0; page < MY_UCA_NPAGES; page++)
180
    {
181
      size_t offs;
182
      size_t maxnum= 0;
183
      size_t nchars= 0;
184
      size_t mchars;
185
      size_t ndefs= 0;
186
      
187
      pagemaxlen[page]= 0;
188
      
189
      /*
190
        Skip this page if no weights were loaded
191
      */
192
      
193
      if (!pageloaded[page])
194
        continue;
195
      
196
      /* 
197
        Calculate maximum weight
198
        length for this page
199
      */
200
      
201
      for (offs=0; offs < MY_UCA_NCHARS; offs++)
202
      {
203
        size_t i, num;
204
        
205
        code= page*MY_UCA_NCHARS+offs;
206
        
207
        /* Calculate only non-zero weights */
208
        for (num=0, i=0; i < uca[code].num; i++)
209
          if (uca[code].weight[w][i])
210
            num++;
211
        
212
        maxnum= maxnum < num ? num : maxnum;
213
        
214
        /* Check if default weight */
215
        if (w == 1 && num == 1)
216
        {
217
          /* 0020 0000 ... */
218
          if (uca[code].weight[w][0] == 0x0020)
219
            ndefs++;
220
        }
221
        else if (w == 2 && num == 1)
222
        {
223
          /* 0002 0000 ... */
224
          if (uca[code].weight[w][0] == 0x0002)
225
            ndefs++;
226
        }
227
      } 
228
      maxnum++;
229
      
230
      /*
231
        If the page have only default weights
232
        then no needs to dump it, skip.
233
      */
234
      if (ndefs == MY_UCA_NCHARS)
235
      {
236
        continue;
237
      }
238
      switch (maxnum)
239
      {
240
        case 0: mchars= 8; break;
241
        case 1: mchars= 8; break;
242
        case 2: mchars= 8; break;
243
        case 3: mchars= 9; break;
244
        case 4: mchars= 8; break;
245
        default: mchars= uca[code].num;
246
      }
247
      
248
      pagemaxlen[page]= maxnum;
249
250
251
      /*
252
        Now print this page
253
      */
254
      
255
      
256
      printf("uint16 page%03Xdata%s[]= { /* %04X (%d weights per char) */\n",
257
              page, pname[w], page*MY_UCA_NCHARS, maxnum);
258
      
259
      for (offs=0; offs < MY_UCA_NCHARS; offs++)
260
      {
261
        uint16 weight[8];
262
        size_t num, i;
263
        
264
        code= page*MY_UCA_NCHARS+offs;
265
        
266
        bzero(weight,sizeof(weight));
267
        
268
        /* Copy non-zero weights */
269
        for (num=0, i=0; i < uca[code].num; i++)
270
        {
271
          if (uca[code].weight[w][i])
272
          {
273
            weight[num]= uca[code].weight[w][i];
274
            num++;
275
          }
276
        }
277
        
278
        for (i=0; i < maxnum; i++)
279
        {
280
          /* 
281
            Invert weights for secondary level to
282
            sort upper case letters before their
283
            lower case counter part.
284
          */
285
          int tmp= weight[i];
286
          if (w == 2 && tmp)
287
            tmp= (int)(0x20 - weight[i]);
288
          
289
          
290
          printf("0x%04X", tmp);
291
          if ((offs+1 != MY_UCA_NCHARS) || (i+1!=maxnum))
292
            printf(",");
293
          nchars++;
294
        }
295
        if (nchars >=mchars)
296
        {
297
          printf("\n");
298
          nchars=0;
299
        }
300
        else
301
        {
302
          printf(" ");
303
        }
304
      }
305
      printf("};\n\n");
306
    }
307
308
    printf("uchar uca_length%s[%d]={\n", pname[w], MY_UCA_NPAGES);
309
    for (page=0; page < MY_UCA_NPAGES; page++)
310
    {
311
      printf("%d%s%s",pagemaxlen[page],page<MY_UCA_NPAGES-1?",":"",(page+1) % 16 ? "":"\n");
312
    }
313
    printf("};\n");
314
315
316
    printf("uint16 *uca_weight%s[%d]={\n", pname[w], MY_UCA_NPAGES);
317
    for (page=0; page < MY_UCA_NPAGES; page++)
318
    {
319
      const char *comma= page < MY_UCA_NPAGES-1 ? "," : "";
320
      const char *nline= (page+1) % 4 ? "" : "\n";
321
      if (!pagemaxlen[page])
322
        printf("NULL       %s%s%s", w ? " ": "",  comma , nline);
323
      else
324
        printf("page%03Xdata%s%s%s", page, pname[w], comma, nline);
325
    }
326
    printf("};\n");
327
  }
328
329
  
330
  printf("int main(void){ return 0;};\n");
331
  return 0;
332
}