~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
#include "my_global.h"
17
#include "m_string.h"
18
#include "my_xml.h"
19
20
21
#define MY_XML_UNKNOWN  'U'
22
#define MY_XML_EOF	'E'
23
#define MY_XML_STRING	'S'
24
#define MY_XML_IDENT	'I'
25
#define MY_XML_EQ	'='
26
#define MY_XML_LT	'<'
27
#define MY_XML_GT	'>'
28
#define MY_XML_SLASH	'/'
29
#define MY_XML_COMMENT	'C'
30
#define MY_XML_TEXT	'T'
31
#define MY_XML_QUESTION	'?'
32
#define MY_XML_EXCLAM   '!'
33
#define MY_XML_CDATA    'D'
34
35
typedef struct xml_attr_st
36
{
37
  const char *beg;
38
  const char *end;
39
} MY_XML_ATTR;
40
41
42
/*
43
  XML ctype:
44
*/
45
#define	MY_XML_ID0  0x01 /* Identifier initial character */
46
#define	MY_XML_ID1  0x02 /* Identifier medial  character */
47
#define	MY_XML_SPC  0x08 /* Spacing character */
48
49
50
/*
51
 http://www.w3.org/TR/REC-xml/ 
52
 [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
53
                  CombiningChar | Extender
54
 [5] Name ::= (Letter | '_' | ':') (NameChar)*
55
*/
56
57
static char my_xml_ctype[256]=
58
{
59
/*00*/  0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
60
/*10*/  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
61
/*20*/  8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,  /*  !"#$%&'()*+,-./ */
62
/*30*/  2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0,  /* 0123456789:;<=>? */
63
/*40*/  0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,  /* @ABCDEFGHIJKLMNO */
64
/*50*/  3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,  /* PQRSTUVWXYZ[\]^_ */
65
/*60*/  0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,  /* `abcdefghijklmno */
66
/*70*/  3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,  /* pqrstuvwxyz{|}~  */
67
/*80*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
68
/*90*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
69
/*A0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
70
/*B0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
71
/*C0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
72
/*D0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
73
/*E0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
74
/*F0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
75
};
76
77
#define my_xml_is_space(c)  (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
78
#define my_xml_is_id0(c)    (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
79
#define my_xml_is_id1(c)    (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
80
81
82
static const char *lex2str(int lex)
83
{
84
  switch(lex)
85
  {
86
    case MY_XML_EOF:      return "END-OF-INPUT";
87
    case MY_XML_STRING:   return "STRING";
88
    case MY_XML_IDENT:    return "IDENT";
89
    case MY_XML_CDATA:    return "CDATA";
90
    case MY_XML_EQ:       return "'='";
91
    case MY_XML_LT:       return "'<'";
92
    case MY_XML_GT:       return "'>'";
93
    case MY_XML_SLASH:    return "'/'";
94
    case MY_XML_COMMENT:  return "COMMENT";
95
    case MY_XML_TEXT:     return "TEXT";
96
    case MY_XML_QUESTION: return "'?'";
97
    case MY_XML_EXCLAM:   return "'!'";
98
  }
99
  return "unknown token";
100
}
101
102
static void my_xml_norm_text(MY_XML_ATTR *a)
103
{
104
  for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
105
  for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
106
}
107
108
109
static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
110
{
111
  int lex;
112
  
113
  for (; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ;  p->cur++);
114
  
115
  if (p->cur >= p->end)
116
  {
117
    a->beg=p->end;
118
    a->end=p->end;
119
    lex=MY_XML_EOF;
120
    goto ret;
121
  }
122
  
123
  a->beg=p->cur;
124
  a->end=p->cur;
125
  
126
  if ((p->end - p->cur > 3) && !bcmp(p->cur,"<!--",4))
127
  {
128
    for (; (p->cur < p->end) && bcmp(p->cur, "-->", 3); p->cur++)
129
    {}
130
    if (!bcmp(p->cur, "-->", 3))
131
      p->cur+=3;
132
    a->end=p->cur;
133
    lex=MY_XML_COMMENT;
134
  }
135
  else if (!bcmp(p->cur, "<![CDATA[",9))
136
  {
137
    p->cur+= 9;
138
    for (; p->cur < p->end - 2 ; p->cur++)
139
    {
140
      if (p->cur[0] == ']' && p->cur[1] == ']' && p->cur[2] == '>')
141
      {
142
        p->cur+= 3;
143
        a->end= p->cur;
144
        break;
145
      }
146
    }
147
    lex= MY_XML_CDATA;
148
  }
149
  else if (strchr("?=/<>!",p->cur[0]))
150
  {
151
    p->cur++;
152
    a->end=p->cur;
153
    lex=a->beg[0];
154
  }
155
  else if ( (p->cur[0] == '"') || (p->cur[0] == '\'') )
156
  {
157
    p->cur++;
158
    for (; ( p->cur < p->end ) && (p->cur[0] != a->beg[0]); p->cur++)
159
    {}
160
    a->end=p->cur;
161
    if (a->beg[0] == p->cur[0])p->cur++;
162
    a->beg++;
163
    if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
164
      my_xml_norm_text(a);
165
    lex=MY_XML_STRING;
166
  }
167
  else if (my_xml_is_id0(p->cur[0]))
168
  {
169
    p->cur++;
170
    while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
171
      p->cur++;
172
    a->end=p->cur;
173
    my_xml_norm_text(a);
174
    lex=MY_XML_IDENT;
175
  }
176
  else
177
    lex= MY_XML_UNKNOWN;
178
179
#if 0
180
  printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
181
#endif
182
183
ret:
184
  return lex;
185
}
186
187
188
static int my_xml_value(MY_XML_PARSER *st, const char *str, size_t len)
189
{
190
  return (st->value) ? (st->value)(st,str,len) : MY_XML_OK;
191
}
192
193
194
static int my_xml_enter(MY_XML_PARSER *st, const char *str, size_t len)
195
{
196
  if ((size_t) (st->attrend-st->attr+len+1) > sizeof(st->attr))
197
  {
198
    sprintf(st->errstr,"To deep XML");
199
    return MY_XML_ERROR;
200
  }
201
  if (st->attrend > st->attr)
202
  {
203
    st->attrend[0]= '/';
204
    st->attrend++;
205
  }
206
  memcpy(st->attrend,str,len);
207
  st->attrend+=len;
208
  st->attrend[0]='\0';
209
  if (st->flags & MY_XML_FLAG_RELATIVE_NAMES)
210
    return st->enter ? st->enter(st, str, len) : MY_XML_OK;
211
  else
212
    return st->enter ?  st->enter(st,st->attr,st->attrend-st->attr) : MY_XML_OK;
213
}
214
215
216
static void mstr(char *s,const char *src,size_t l1, size_t l2)
217
{
218
  l1 = l1<l2 ? l1 : l2;
219
  memcpy(s,src,l1);
220
  s[l1]='\0';
221
}
222
223
224
static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
225
{
226
  char *e;
227
  size_t glen;
228
  char s[32];
229
  char g[32];
230
  int  rc;
231
232
  /* Find previous '/' or beginning */
233
  for (e=p->attrend; (e>p->attr) && (e[0] != '/') ; e--);
234
  glen = (size_t) ((e[0] == '/') ? (p->attrend-e-1) : p->attrend-e);
235
  
236
  if (str && (slen != glen))
237
  {
238
    mstr(s,str,sizeof(s)-1,slen);
239
    if (glen)
240
    {
241
      mstr(g,e+1,sizeof(g)-1,glen),
242
      sprintf(p->errstr,"'</%s>' unexpected ('</%s>' wanted)",s,g);
243
    }
244
    else
245
      sprintf(p->errstr,"'</%s>' unexpected (END-OF-INPUT wanted)", s);
246
    return MY_XML_ERROR;
247
  }
248
  
249
  if (p->flags & MY_XML_FLAG_RELATIVE_NAMES)
250
    rc= p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK;
251
  else
252
    rc= (p->leave_xml ?  p->leave_xml(p,p->attr,p->attrend-p->attr) :
253
         MY_XML_OK);
254
  
255
  *e='\0';
256
  p->attrend=e;
257
  
258
  return rc;
259
}
260
261
262
int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
263
{
264
  p->attrend=p->attr;
265
  p->beg=str;
266
  p->cur=str;
267
  p->end=str+len;
268
  
269
  while ( p->cur < p->end )
270
  {
271
    MY_XML_ATTR a;
272
    if (p->cur[0] == '<')
273
    {
274
      int lex;
275
      int question=0;
276
      int exclam=0;
277
      
278
      lex=my_xml_scan(p,&a);
279
      
280
      if (MY_XML_COMMENT == lex)
281
        continue;
282
      
283
      if (lex == MY_XML_CDATA)
284
      {
285
        a.beg+= 9;
286
        a.end-= 3;
287
        my_xml_value(p, a.beg, (size_t) (a.end-a.beg));
288
        continue;
289
      }
290
      
291
      lex=my_xml_scan(p,&a);
292
      
293
      if (MY_XML_SLASH == lex)
294
      {
295
        if (MY_XML_IDENT != (lex=my_xml_scan(p,&a)))
296
        {
297
          sprintf(p->errstr,"%s unexpected (ident wanted)",lex2str(lex));
298
          return MY_XML_ERROR;
299
        }
300
        if (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg)))
301
          return MY_XML_ERROR;
302
        lex=my_xml_scan(p,&a);
303
        goto gt;
304
      }
305
      
306
      if (MY_XML_EXCLAM == lex)
307
      {
308
        lex=my_xml_scan(p,&a);
309
        exclam=1;
310
      }
311
      else if (MY_XML_QUESTION == lex)
312
      {
313
        lex=my_xml_scan(p,&a);
314
        question=1;
315
      }
316
      
317
      if (MY_XML_IDENT == lex)
318
      {
319
        p->current_node_type= MY_XML_NODE_TAG;
320
        if (MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg)))
321
          return MY_XML_ERROR;
322
      }
323
      else
324
      {
325
        sprintf(p->errstr,"%s unexpected (ident or '/' wanted)",
326
		lex2str(lex));
327
        return MY_XML_ERROR;
328
      }
329
      
330
      while ((MY_XML_IDENT == (lex=my_xml_scan(p,&a))) ||
331
             (MY_XML_STRING == lex))
332
      {
333
        MY_XML_ATTR b;
334
        if (MY_XML_EQ == (lex=my_xml_scan(p,&b)))
335
        {
336
          lex=my_xml_scan(p,&b);
337
          if ( (lex == MY_XML_IDENT) || (lex == MY_XML_STRING) )
338
          {
339
            p->current_node_type= MY_XML_NODE_ATTR;
340
            if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg)))  ||
341
                (MY_XML_OK != my_xml_value(p,b.beg,(size_t) (b.end-b.beg)))  ||
342
                (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
343
              return MY_XML_ERROR;
344
          }
345
          else
346
          {
347
            sprintf(p->errstr,"%s unexpected (ident or string wanted)",
348
		    lex2str(lex));
349
            return MY_XML_ERROR;
350
          }
351
        }
352
        else if ((MY_XML_STRING == lex) || (MY_XML_IDENT == lex))
353
        {
354
          p->current_node_type= MY_XML_NODE_ATTR;
355
          if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) ||
356
              (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
357
           return MY_XML_ERROR;
358
        }
359
        else
360
          break;
361
      }
362
      
363
      if (lex == MY_XML_SLASH)
364
      {
365
        if (MY_XML_OK != my_xml_leave(p,NULL,0))
366
          return MY_XML_ERROR;
367
        lex=my_xml_scan(p,&a);
368
      }
369
      
370
gt:
371
      if (question)
372
      {
373
        if (lex != MY_XML_QUESTION)
374
        {
375
          sprintf(p->errstr,"%s unexpected ('?' wanted)",lex2str(lex));
376
          return MY_XML_ERROR;
377
        }
378
        if (MY_XML_OK != my_xml_leave(p,NULL,0))
379
          return MY_XML_ERROR;
380
        lex=my_xml_scan(p,&a);
381
      }
382
      
383
      if (exclam)
384
      {
385
        if (MY_XML_OK != my_xml_leave(p,NULL,0))
386
          return MY_XML_ERROR;
387
      }
388
      
389
      if (lex != MY_XML_GT)
390
      {
391
        sprintf(p->errstr,"%s unexpected ('>' wanted)",lex2str(lex));
392
        return MY_XML_ERROR;
393
      }
394
    }
395
    else
396
    {
397
      a.beg=p->cur;
398
      for ( ; (p->cur < p->end) && (p->cur[0] != '<')  ; p->cur++);
399
      a.end=p->cur;
400
      
401
      if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
402
        my_xml_norm_text(&a);
403
      if (a.beg != a.end)
404
      {
405
        my_xml_value(p,a.beg,(size_t) (a.end-a.beg));
406
      }
407
    }
408
  }
409
410
  if (p->attr[0])
411
  {
412
    sprintf(p->errstr,"unexpected END-OF-INPUT");
413
    return MY_XML_ERROR;
414
  }
415
  return MY_XML_OK;
416
}
417
418
419
void my_xml_parser_create(MY_XML_PARSER *p)
420
{
421
  bzero((void*)p,sizeof(p[0]));
422
}
423
424
425
void my_xml_parser_free(MY_XML_PARSER *p  __attribute__((unused)))
426
{
427
}
428
429
430
void my_xml_set_value_handler(MY_XML_PARSER *p,
431
			      int (*action)(MY_XML_PARSER *p, const char *s,
432
					    size_t l))
433
{
434
  p->value=action;
435
}
436
437
void my_xml_set_enter_handler(MY_XML_PARSER *p,
438
			      int (*action)(MY_XML_PARSER *p, const char *s,
439
					    size_t l))
440
{
441
  p->enter=action;
442
}
443
444
445
void my_xml_set_leave_handler(MY_XML_PARSER *p,
446
			      int (*action)(MY_XML_PARSER *p, const char *s,
447
					    size_t l))
448
{
449
  p->leave_xml=action;
450
}
451
452
453
void my_xml_set_user_data(MY_XML_PARSER *p, void *user_data)
454
{
455
  p->user_data=user_data;
456
}
457
458
459
const char *my_xml_error_string(MY_XML_PARSER *p)
460
{
461
  return p->errstr;
462
}
463
464
465
size_t my_xml_error_pos(MY_XML_PARSER *p)
466
{
467
  const char *beg=p->beg;
468
  const char *s;
469
  for ( s=p->beg ; s<p->cur; s++)
470
  {
471
    if (s[0] == '\n')
472
      beg=s;
473
  }
474
  return (size_t) (p->cur-beg);
475
}
476
477
uint my_xml_error_lineno(MY_XML_PARSER *p)
478
{
479
  uint res=0;
480
  const char *s;
481
  for (s=p->beg ; s<p->cur; s++)
482
  {
483
    if (s[0] == '\n')
484
      res++;
485
  }
486
  return res;
487
}