~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2006 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
15
16
/*
17
#include <stdio.h>
18
#include <stdlib.h>
19
#include <string.h>
20
*/
21
#include <my_global.h>
22
#include <m_string.h>
23
#include <m_ctype.h>
24
#include "m_ctype.h"
25
26
27
typedef struct my_ctype_name_st
28
{
29
  const char *name;
30
  int val;
31
} MY_CTYPE_NAME_ST;
32
33
34
static MY_CTYPE_NAME_ST my_ctype_name[]=
35
{
36
  {"Lu", _MY_U},                /* Letter, Uppercase          */
37
  {"Ll", _MY_L},                /* Letter, Lowercase          */
38
  {"Lt", _MY_U},                /* Letter, Titlecase          */
39
  {"Lm", _MY_L},                /* Letter, Modifier           */
40
  {"Lo", _MY_L},                /* Letter, other              */
41
  
42
  {"Nd", _MY_NMR},              /* Number, Decimal Digit      */
43
  {"Nl", _MY_NMR|_MY_U|_MY_L},  /* Number, Letter             */
44
  {"No", _MY_NMR|_MY_PNT},      /* Number, Other              */
45
  
46
  {"Mn", _MY_L|_MY_PNT},        /* Mark, Nonspacing           */
47
  {"Mc", _MY_L|_MY_PNT},        /* Mark, Spacing Combining    */
48
  {"Me", _MY_L|_MY_PNT},        /* Mark, Enclosing            */
49
  
50
  {"Pc", _MY_PNT},              /* Punctuation, Connector     */
51
  {"Pd", _MY_PNT},              /* Punctuation, Dash          */
52
  {"Ps", _MY_PNT},              /* Punctuation, Open          */
53
  {"Pe", _MY_PNT},              /* Punctuation, Close         */
54
  {"Pi", _MY_PNT},              /* Punctuation, Initial quote */
55
  {"Pf", _MY_PNT},              /* Punctuation, Final quote   */
56
  {"Po", _MY_PNT},              /* Punctuation, Other         */
57
  
58
  {"Sm", _MY_PNT},              /* Symbol, Math               */
59
  {"Sc", _MY_PNT},              /* Symbol, Currency           */
60
  {"Sk", _MY_PNT},              /* Symbol, Modifier           */
61
  {"So", _MY_PNT},              /* Symbol, Other              */
62
  
63
  {"Zs", _MY_SPC},              /* Separator, Space           */
64
  {"Zl", _MY_SPC},              /* Separator, Line            */
65
  {"Zp", _MY_SPC},              /* Separator, Paragraph       */
66
  
67
  {"Cc", _MY_CTR},              /* Other, Control             */
68
  {"Cf", _MY_CTR},              /* Other, Format              */
69
  {"Cs", _MY_CTR},              /* Other, Surrogate           */
70
  {"Co", _MY_CTR},              /* Other, Private Use         */
71
  {"Cn", _MY_CTR},              /* Other, Not Assigned        */
72
  {NULL, 0}
73
};
74
75
76
static int
77
ctypestr2num(const char *tok)
78
{
79
  MY_CTYPE_NAME_ST *p;
80
  for (p= my_ctype_name; p->name; p++)
81
  {
82
    if (!strncasecmp(p->name, tok, 2))
83
      return p->val;
84
  }
85
  return 0;
86
}
87
88
89
int main(int ac, char ** av)
90
{
91
  char str[1024];
92
  unsigned char ctypea[64*1024];
93
  size_t i;
94
  size_t plane;
95
  MY_UNI_CTYPE uctype[256];
96
  FILE *f= stdin;
97
98
  if (ac > 1 && av[1] && !(f= fopen(av[1],"r")))
99
  {
100
    fprintf(stderr, "Can't open file %s\n", av[1]);
101
    exit(1);
102
  }
103
  bzero(&ctypea,sizeof(ctypea));
104
  bzero(&uctype, sizeof(uctype));
105
  
106
  printf("/*\n");
107
  printf("  Unicode ctype data\n");
108
  printf("  Generated from %s\n", av[1] ? av[1] : "stdin");
109
  printf("*/\n");
110
  
111
  while(fgets(str, sizeof(str), f))
112
  {
113
    size_t n= 0, code= 0;
114
    char *s,*e;
115
    int ctype= 0;
116
    
117
    for(s= str; s; )
118
    {
119
      char *end;
120
      char tok[1024]="";
121
      e=strchr(s,';');
122
      if(e)
123
      {
124
        strncpy(tok,s,(unsigned int)(e-s));
125
        tok[e-s]=0;
126
      }
127
      else
128
      {
129
        strcpy(tok,s);
130
      }
131
      
132
      end=tok+strlen(tok);
133
      
134
      switch(n)
135
      {
136
        case 0: code= strtol(tok,&end,16);break;
137
        case 2: ctype= ctypestr2num(tok);break;
138
      }
139
      
140
      n++;
141
      if(e)  s=e+1;
142
      else  s=e;
143
    }
144
    if(code<=0xFFFF)
145
    {
146
      ctypea[code]= ctype;
147
    }
148
  }
149
  
150
  /* Fill digits */
151
  for (i= '0'; i <= '9'; i++)
152
    ctypea[i]= _MY_NMR;
153
    
154
  for (i= 'a'; i <= 'z'; i++)
155
    ctypea[i]|= _MY_X;
156
  for (i= 'A'; i <= 'Z'; i++)
157
    ctypea[i]|= _MY_X;
158
  
159
  
160
  /* Fill ideographs  */
161
  
162
  /* CJK Ideographs Extension A (U+3400 - U+4DB5) */
163
  for(i=0x3400;i<=0x4DB5;i++)
164
  {
165
    ctypea[i]= _MY_L | _MY_U;
166
  }
167
  
168
  /* CJK Ideographs (U+4E00 - U+9FA5) */
169
  for(i=0x4E00;i<=0x9FA5;i++){
170
    ctypea[i]= _MY_L | _MY_U;
171
  }
172
  
173
  /* Hangul Syllables (U+AC00 - U+D7A3)  */
174
  for(i=0xAC00;i<=0xD7A3;i++)
175
  {
176
    ctypea[i]= _MY_L | _MY_U;
177
  }
178
  
179
  
180
  /* Calc plane parameters */
181
  for(plane=0;plane<256;plane++)
182
  {
183
    size_t character;
184
    uctype[plane].ctype= ctypea+plane*256;
185
    
186
    uctype[plane].pctype= uctype[plane].ctype[0];
187
    for(character=1;character<256;character++)
188
    {
189
      if (uctype[plane].ctype[character] != uctype[plane].pctype)
190
      {
191
        uctype[plane].pctype= 0; /* Mixed plane */
192
        break;
193
      }
194
    }
195
    if (character==256)	/* All the same, no needs to dump whole plane */
196
      uctype[plane].ctype= NULL; 
197
  }
198
  
199
  /* Dump mixed planes */
200
  for(plane=0;plane<256;plane++)
201
  {
202
    if(uctype[plane].ctype)
203
    {
204
      int charnum=0;
205
      int num=0;
206
      
207
      printf("static unsigned char uctype_page%02X[256]=\n{\n",plane);
208
      
209
      for(charnum=0;charnum<256;charnum++)
210
      {
211
        int cod;
212
        
213
        cod=(plane<<8)+charnum;
214
        printf(" %2d%s",uctype[plane].ctype[charnum],charnum<255?",":"");
215
      
216
        num++;
217
        if(num==16)
218
        {
219
          printf("\n");
220
          num=0;
221
        }
222
      }
223
      printf("};\n\n");
224
    }
225
  }
226
  
227
  
228
  /* Dump plane index */
229
  printf("MY_UNI_CTYPE my_uni_ctype[256]={\n");
230
  for(plane=0;plane<256;plane++)
231
  {
232
    char plane_name[128]="NULL";
233
    if(uctype[plane].ctype){
234
      sprintf(plane_name,"uctype_page%02X",plane);
235
    }
236
    printf("\t{%d,%s}%s\n",uctype[plane].pctype,plane_name,plane<255?",":"");
237
  }
238
  printf("};\n");
239
  
240
  return 0;
241
}