~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
#include "m_string.h"
17
#include "my_xml.h"
18
19
20
#define MY_XML_UNKNOWN  'U'
21
#define MY_XML_EOF	'E'
22
#define MY_XML_STRING	'S'
23
#define MY_XML_IDENT	'I'
24
#define MY_XML_EQ	'='
25
#define MY_XML_LT	'<'
26
#define MY_XML_GT	'>'
27
#define MY_XML_SLASH	'/'
28
#define MY_XML_COMMENT	'C'
29
#define MY_XML_TEXT	'T'
30
#define MY_XML_QUESTION	'?'
31
#define MY_XML_EXCLAM   '!'
32
#define MY_XML_CDATA    'D'
33
34
typedef struct xml_attr_st
35
{
36
  const char *beg;
37
  const char *end;
38
} MY_XML_ATTR;
39
40
41
/*
42
  XML ctype:
43
*/
44
#define	MY_XML_ID0  0x01 /* Identifier initial character */
45
#define	MY_XML_ID1  0x02 /* Identifier medial  character */
46
#define	MY_XML_SPC  0x08 /* Spacing character */
47
48
49
/*
50
 http://www.w3.org/TR/REC-xml/ 
51
 [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
52
                  CombiningChar | Extender
53
 [5] Name ::= (Letter | '_' | ':') (NameChar)*
54
*/
55
56
static char my_xml_ctype[256]=
57
{
58
/*00*/  0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
59
/*10*/  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
60
/*20*/  8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,  /*  !"#$%&'()*+,-./ */
61
/*30*/  2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0,  /* 0123456789:;<=>? */
62
/*40*/  0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,  /* @ABCDEFGHIJKLMNO */
63
/*50*/  3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,  /* PQRSTUVWXYZ[\]^_ */
64
/*60*/  0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,  /* `abcdefghijklmno */
65
/*70*/  3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,  /* pqrstuvwxyz{|}~  */
66
/*80*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
67
/*90*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
68
/*A0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
69
/*B0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
70
/*C0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
71
/*D0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
72
/*E0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
73
/*F0*/  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
74
};
75
76
#define my_xml_is_space(c)  (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
77
#define my_xml_is_id0(c)    (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
78
#define my_xml_is_id1(c)    (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
79
80
81
static const char *lex2str(int lex)
82
{
83
  switch(lex)
84
  {
85
    case MY_XML_EOF:      return "END-OF-INPUT";
86
    case MY_XML_STRING:   return "STRING";
87
    case MY_XML_IDENT:    return "IDENT";
88
    case MY_XML_CDATA:    return "CDATA";
89
    case MY_XML_EQ:       return "'='";
90
    case MY_XML_LT:       return "'<'";
91
    case MY_XML_GT:       return "'>'";
92
    case MY_XML_SLASH:    return "'/'";
93
    case MY_XML_COMMENT:  return "COMMENT";
94
    case MY_XML_TEXT:     return "TEXT";
95
    case MY_XML_QUESTION: return "'?'";
96
    case MY_XML_EXCLAM:   return "'!'";
97
  }
98
  return "unknown token";
99
}
100
101
static void my_xml_norm_text(MY_XML_ATTR *a)
102
{
103
  for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
104
  for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
105
}
106
107
108
static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
109
{
110
  int lex;
111
  
112
  for (; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ;  p->cur++);
113
  
114
  if (p->cur >= p->end)
115
  {
116
    a->beg=p->end;
117
    a->end=p->end;
118
    lex=MY_XML_EOF;
119
    goto ret;
120
  }
121
  
122
  a->beg=p->cur;
123
  a->end=p->cur;
124
  
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
125
  if ((p->end - p->cur > 3) && !memcmp(p->cur,"<!--",4))
1 by brian
clean slate
126
  {
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
127
    for (; (p->cur < p->end) && memcmp(p->cur, "-->", 3); p->cur++)
1 by brian
clean slate
128
    {}
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
129
    if (!memcmp(p->cur, "-->", 3))
1 by brian
clean slate
130
      p->cur+=3;
131
    a->end=p->cur;
132
    lex=MY_XML_COMMENT;
133
  }
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
134
  else if (!memcmp(p->cur, "<![CDATA[",9))
1 by brian
clean slate
135
  {
136
    p->cur+= 9;
137
    for (; p->cur < p->end - 2 ; p->cur++)
138
    {
139
      if (p->cur[0] == ']' && p->cur[1] == ']' && p->cur[2] == '>')
140
      {
141
        p->cur+= 3;
142
        a->end= p->cur;
143
        break;
144
      }
145
    }
146
    lex= MY_XML_CDATA;
147
  }
148
  else if (strchr("?=/<>!",p->cur[0]))
149
  {
150
    p->cur++;
151
    a->end=p->cur;
152
    lex=a->beg[0];
153
  }
154
  else if ( (p->cur[0] == '"') || (p->cur[0] == '\'') )
155
  {
156
    p->cur++;
157
    for (; ( p->cur < p->end ) && (p->cur[0] != a->beg[0]); p->cur++)
158
    {}
159
    a->end=p->cur;
160
    if (a->beg[0] == p->cur[0])p->cur++;
161
    a->beg++;
162
    if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
163
      my_xml_norm_text(a);
164
    lex=MY_XML_STRING;
165
  }
166
  else if (my_xml_is_id0(p->cur[0]))
167
  {
168
    p->cur++;
169
    while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
170
      p->cur++;
171
    a->end=p->cur;
172
    my_xml_norm_text(a);
173
    lex=MY_XML_IDENT;
174
  }
175
  else
176
    lex= MY_XML_UNKNOWN;
177
178
#if 0
179
  printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
180
#endif
181
182
ret:
183
  return lex;
184
}
185
186
187
static int my_xml_value(MY_XML_PARSER *st, const char *str, size_t len)
188
{
189
  return (st->value) ? (st->value)(st,str,len) : MY_XML_OK;
190
}
191
192
193
static int my_xml_enter(MY_XML_PARSER *st, const char *str, size_t len)
194
{
195
  if ((size_t) (st->attrend-st->attr+len+1) > sizeof(st->attr))
196
  {
197
    sprintf(st->errstr,"To deep XML");
198
    return MY_XML_ERROR;
199
  }
200
  if (st->attrend > st->attr)
201
  {
202
    st->attrend[0]= '/';
203
    st->attrend++;
204
  }
205
  memcpy(st->attrend,str,len);
206
  st->attrend+=len;
207
  st->attrend[0]='\0';
208
  if (st->flags & MY_XML_FLAG_RELATIVE_NAMES)
209
    return st->enter ? st->enter(st, str, len) : MY_XML_OK;
210
  else
211
    return st->enter ?  st->enter(st,st->attr,st->attrend-st->attr) : MY_XML_OK;
212
}
213
214
215
static void mstr(char *s,const char *src,size_t l1, size_t l2)
216
{
217
  l1 = l1<l2 ? l1 : l2;
218
  memcpy(s,src,l1);
219
  s[l1]='\0';
220
}
221
222
223
static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
224
{
225
  char *e;
226
  size_t glen;
227
  char s[32];
228
  char g[32];
229
  int  rc;
230
231
  /* Find previous '/' or beginning */
232
  for (e=p->attrend; (e>p->attr) && (e[0] != '/') ; e--);
233
  glen = (size_t) ((e[0] == '/') ? (p->attrend-e-1) : p->attrend-e);
234
  
235
  if (str && (slen != glen))
236
  {
237
    mstr(s,str,sizeof(s)-1,slen);
238
    if (glen)
239
    {
240
      mstr(g,e+1,sizeof(g)-1,glen),
241
      sprintf(p->errstr,"'</%s>' unexpected ('</%s>' wanted)",s,g);
242
    }
243
    else
244
      sprintf(p->errstr,"'</%s>' unexpected (END-OF-INPUT wanted)", s);
245
    return MY_XML_ERROR;
246
  }
247
  
248
  if (p->flags & MY_XML_FLAG_RELATIVE_NAMES)
249
    rc= p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK;
250
  else
251
    rc= (p->leave_xml ?  p->leave_xml(p,p->attr,p->attrend-p->attr) :
252
         MY_XML_OK);
253
  
254
  *e='\0';
255
  p->attrend=e;
256
  
257
  return rc;
258
}
259
260
261
int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
262
{
263
  p->attrend=p->attr;
264
  p->beg=str;
265
  p->cur=str;
266
  p->end=str+len;
267
  
268
  while ( p->cur < p->end )
269
  {
270
    MY_XML_ATTR a;
271
    if (p->cur[0] == '<')
272
    {
273
      int lex;
274
      int question=0;
275
      int exclam=0;
276
      
277
      lex=my_xml_scan(p,&a);
278
      
279
      if (MY_XML_COMMENT == lex)
280
        continue;
281
      
282
      if (lex == MY_XML_CDATA)
283
      {
284
        a.beg+= 9;
285
        a.end-= 3;
286
        my_xml_value(p, a.beg, (size_t) (a.end-a.beg));
287
        continue;
288
      }
289
      
290
      lex=my_xml_scan(p,&a);
291
      
292
      if (MY_XML_SLASH == lex)
293
      {
294
        if (MY_XML_IDENT != (lex=my_xml_scan(p,&a)))
295
        {
296
          sprintf(p->errstr,"%s unexpected (ident wanted)",lex2str(lex));
297
          return MY_XML_ERROR;
298
        }
299
        if (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg)))
300
          return MY_XML_ERROR;
301
        lex=my_xml_scan(p,&a);
302
        goto gt;
303
      }
304
      
305
      if (MY_XML_EXCLAM == lex)
306
      {
307
        lex=my_xml_scan(p,&a);
308
        exclam=1;
309
      }
310
      else if (MY_XML_QUESTION == lex)
311
      {
312
        lex=my_xml_scan(p,&a);
313
        question=1;
314
      }
315
      
316
      if (MY_XML_IDENT == lex)
317
      {
318
        p->current_node_type= MY_XML_NODE_TAG;
319
        if (MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg)))
320
          return MY_XML_ERROR;
321
      }
322
      else
323
      {
324
        sprintf(p->errstr,"%s unexpected (ident or '/' wanted)",
325
		lex2str(lex));
326
        return MY_XML_ERROR;
327
      }
328
      
329
      while ((MY_XML_IDENT == (lex=my_xml_scan(p,&a))) ||
330
             (MY_XML_STRING == lex))
331
      {
332
        MY_XML_ATTR b;
333
        if (MY_XML_EQ == (lex=my_xml_scan(p,&b)))
334
        {
335
          lex=my_xml_scan(p,&b);
336
          if ( (lex == MY_XML_IDENT) || (lex == MY_XML_STRING) )
337
          {
338
            p->current_node_type= MY_XML_NODE_ATTR;
339
            if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg)))  ||
340
                (MY_XML_OK != my_xml_value(p,b.beg,(size_t) (b.end-b.beg)))  ||
341
                (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
342
              return MY_XML_ERROR;
343
          }
344
          else
345
          {
346
            sprintf(p->errstr,"%s unexpected (ident or string wanted)",
347
		    lex2str(lex));
348
            return MY_XML_ERROR;
349
          }
350
        }
351
        else if ((MY_XML_STRING == lex) || (MY_XML_IDENT == lex))
352
        {
353
          p->current_node_type= MY_XML_NODE_ATTR;
354
          if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) ||
355
              (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
356
           return MY_XML_ERROR;
357
        }
358
        else
359
          break;
360
      }
361
      
362
      if (lex == MY_XML_SLASH)
363
      {
364
        if (MY_XML_OK != my_xml_leave(p,NULL,0))
365
          return MY_XML_ERROR;
366
        lex=my_xml_scan(p,&a);
367
      }
368
      
369
gt:
370
      if (question)
371
      {
372
        if (lex != MY_XML_QUESTION)
373
        {
374
          sprintf(p->errstr,"%s unexpected ('?' wanted)",lex2str(lex));
375
          return MY_XML_ERROR;
376
        }
377
        if (MY_XML_OK != my_xml_leave(p,NULL,0))
378
          return MY_XML_ERROR;
379
        lex=my_xml_scan(p,&a);
380
      }
381
      
382
      if (exclam)
383
      {
384
        if (MY_XML_OK != my_xml_leave(p,NULL,0))
385
          return MY_XML_ERROR;
386
      }
387
      
388
      if (lex != MY_XML_GT)
389
      {
390
        sprintf(p->errstr,"%s unexpected ('>' wanted)",lex2str(lex));
391
        return MY_XML_ERROR;
392
      }
393
    }
394
    else
395
    {
396
      a.beg=p->cur;
397
      for ( ; (p->cur < p->end) && (p->cur[0] != '<')  ; p->cur++);
398
      a.end=p->cur;
399
      
400
      if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
401
        my_xml_norm_text(&a);
402
      if (a.beg != a.end)
403
      {
404
        my_xml_value(p,a.beg,(size_t) (a.end-a.beg));
405
      }
406
    }
407
  }
408
409
  if (p->attr[0])
410
  {
411
    sprintf(p->errstr,"unexpected END-OF-INPUT");
412
    return MY_XML_ERROR;
413
  }
414
  return MY_XML_OK;
415
}
416
417
418
void my_xml_parser_create(MY_XML_PARSER *p)
419
{
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
420
  memset(p, 0, sizeof(p[0]));
1 by brian
clean slate
421
}
422
423
424
void my_xml_parser_free(MY_XML_PARSER *p  __attribute__((unused)))
425
{
426
}
427
428
429
void my_xml_set_value_handler(MY_XML_PARSER *p,
430
			      int (*action)(MY_XML_PARSER *p, const char *s,
431
					    size_t l))
432
{
433
  p->value=action;
434
}
435
436
void my_xml_set_enter_handler(MY_XML_PARSER *p,
437
			      int (*action)(MY_XML_PARSER *p, const char *s,
438
					    size_t l))
439
{
440
  p->enter=action;
441
}
442
443
444
void my_xml_set_leave_handler(MY_XML_PARSER *p,
445
			      int (*action)(MY_XML_PARSER *p, const char *s,
446
					    size_t l))
447
{
448
  p->leave_xml=action;
449
}
450
451
452
void my_xml_set_user_data(MY_XML_PARSER *p, void *user_data)
453
{
454
  p->user_data=user_data;
455
}
456
457
458
const char *my_xml_error_string(MY_XML_PARSER *p)
459
{
460
  return p->errstr;
461
}
462
463
464
size_t my_xml_error_pos(MY_XML_PARSER *p)
465
{
466
  const char *beg=p->beg;
467
  const char *s;
468
  for ( s=p->beg ; s<p->cur; s++)
469
  {
470
    if (s[0] == '\n')
471
      beg=s;
472
  }
473
  return (size_t) (p->cur-beg);
474
}
475
476
uint my_xml_error_lineno(MY_XML_PARSER *p)
477
{
478
  uint res=0;
479
  const char *s;
480
  for (s=p->beg ; s<p->cur; s++)
481
  {
482
    if (s[0] == '\n')
483
      res++;
484
  }
485
  return res;
486
}