1
/* Copyright (C) 2000 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
#include "my_global.h"
21
#define MY_XML_UNKNOWN 'U'
22
#define MY_XML_EOF 'E'
23
#define MY_XML_STRING 'S'
24
#define MY_XML_IDENT 'I'
28
#define MY_XML_SLASH '/'
29
#define MY_XML_COMMENT 'C'
30
#define MY_XML_TEXT 'T'
31
#define MY_XML_QUESTION '?'
32
#define MY_XML_EXCLAM '!'
33
#define MY_XML_CDATA 'D'
35
typedef struct xml_attr_st
45
#define MY_XML_ID0 0x01 /* Identifier initial character */
46
#define MY_XML_ID1 0x02 /* Identifier medial character */
47
#define MY_XML_SPC 0x08 /* Spacing character */
51
http://www.w3.org/TR/REC-xml/
52
[4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
53
CombiningChar | Extender
54
[5] Name ::= (Letter | '_' | ':') (NameChar)*
57
static char my_xml_ctype[256]=
59
/*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
60
/*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
61
/*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */
62
/*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */
63
/*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */
64
/*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */
65
/*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */
66
/*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */
67
/*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
68
/*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
69
/*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
70
/*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
71
/*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
72
/*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
73
/*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
74
/*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
77
#define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
78
#define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
79
#define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
82
static const char *lex2str(int lex)
86
case MY_XML_EOF: return "END-OF-INPUT";
87
case MY_XML_STRING: return "STRING";
88
case MY_XML_IDENT: return "IDENT";
89
case MY_XML_CDATA: return "CDATA";
90
case MY_XML_EQ: return "'='";
91
case MY_XML_LT: return "'<'";
92
case MY_XML_GT: return "'>'";
93
case MY_XML_SLASH: return "'/'";
94
case MY_XML_COMMENT: return "COMMENT";
95
case MY_XML_TEXT: return "TEXT";
96
case MY_XML_QUESTION: return "'?'";
97
case MY_XML_EXCLAM: return "'!'";
99
return "unknown token";
102
static void my_xml_norm_text(MY_XML_ATTR *a)
104
for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
105
for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
109
static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
113
for (; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++);
115
if (p->cur >= p->end)
126
if ((p->end - p->cur > 3) && !bcmp(p->cur,"<!--",4))
128
for (; (p->cur < p->end) && bcmp(p->cur, "-->", 3); p->cur++)
130
if (!bcmp(p->cur, "-->", 3))
135
else if (!bcmp(p->cur, "<![CDATA[",9))
138
for (; p->cur < p->end - 2 ; p->cur++)
140
if (p->cur[0] == ']' && p->cur[1] == ']' && p->cur[2] == '>')
149
else if (strchr("?=/<>!",p->cur[0]))
155
else if ( (p->cur[0] == '"') || (p->cur[0] == '\'') )
158
for (; ( p->cur < p->end ) && (p->cur[0] != a->beg[0]); p->cur++)
161
if (a->beg[0] == p->cur[0])p->cur++;
163
if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
167
else if (my_xml_is_id0(p->cur[0]))
170
while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
180
printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
188
static int my_xml_value(MY_XML_PARSER *st, const char *str, size_t len)
190
return (st->value) ? (st->value)(st,str,len) : MY_XML_OK;
194
static int my_xml_enter(MY_XML_PARSER *st, const char *str, size_t len)
196
if ((size_t) (st->attrend-st->attr+len+1) > sizeof(st->attr))
198
sprintf(st->errstr,"To deep XML");
201
if (st->attrend > st->attr)
206
memcpy(st->attrend,str,len);
209
if (st->flags & MY_XML_FLAG_RELATIVE_NAMES)
210
return st->enter ? st->enter(st, str, len) : MY_XML_OK;
212
return st->enter ? st->enter(st,st->attr,st->attrend-st->attr) : MY_XML_OK;
216
static void mstr(char *s,const char *src,size_t l1, size_t l2)
218
l1 = l1<l2 ? l1 : l2;
224
static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
232
/* Find previous '/' or beginning */
233
for (e=p->attrend; (e>p->attr) && (e[0] != '/') ; e--);
234
glen = (size_t) ((e[0] == '/') ? (p->attrend-e-1) : p->attrend-e);
236
if (str && (slen != glen))
238
mstr(s,str,sizeof(s)-1,slen);
241
mstr(g,e+1,sizeof(g)-1,glen),
242
sprintf(p->errstr,"'</%s>' unexpected ('</%s>' wanted)",s,g);
245
sprintf(p->errstr,"'</%s>' unexpected (END-OF-INPUT wanted)", s);
249
if (p->flags & MY_XML_FLAG_RELATIVE_NAMES)
250
rc= p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK;
252
rc= (p->leave_xml ? p->leave_xml(p,p->attr,p->attrend-p->attr) :
262
int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
269
while ( p->cur < p->end )
272
if (p->cur[0] == '<')
278
lex=my_xml_scan(p,&a);
280
if (MY_XML_COMMENT == lex)
283
if (lex == MY_XML_CDATA)
287
my_xml_value(p, a.beg, (size_t) (a.end-a.beg));
291
lex=my_xml_scan(p,&a);
293
if (MY_XML_SLASH == lex)
295
if (MY_XML_IDENT != (lex=my_xml_scan(p,&a)))
297
sprintf(p->errstr,"%s unexpected (ident wanted)",lex2str(lex));
300
if (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg)))
302
lex=my_xml_scan(p,&a);
306
if (MY_XML_EXCLAM == lex)
308
lex=my_xml_scan(p,&a);
311
else if (MY_XML_QUESTION == lex)
313
lex=my_xml_scan(p,&a);
317
if (MY_XML_IDENT == lex)
319
p->current_node_type= MY_XML_NODE_TAG;
320
if (MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg)))
325
sprintf(p->errstr,"%s unexpected (ident or '/' wanted)",
330
while ((MY_XML_IDENT == (lex=my_xml_scan(p,&a))) ||
331
(MY_XML_STRING == lex))
334
if (MY_XML_EQ == (lex=my_xml_scan(p,&b)))
336
lex=my_xml_scan(p,&b);
337
if ( (lex == MY_XML_IDENT) || (lex == MY_XML_STRING) )
339
p->current_node_type= MY_XML_NODE_ATTR;
340
if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) ||
341
(MY_XML_OK != my_xml_value(p,b.beg,(size_t) (b.end-b.beg))) ||
342
(MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
347
sprintf(p->errstr,"%s unexpected (ident or string wanted)",
352
else if ((MY_XML_STRING == lex) || (MY_XML_IDENT == lex))
354
p->current_node_type= MY_XML_NODE_ATTR;
355
if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) ||
356
(MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
363
if (lex == MY_XML_SLASH)
365
if (MY_XML_OK != my_xml_leave(p,NULL,0))
367
lex=my_xml_scan(p,&a);
373
if (lex != MY_XML_QUESTION)
375
sprintf(p->errstr,"%s unexpected ('?' wanted)",lex2str(lex));
378
if (MY_XML_OK != my_xml_leave(p,NULL,0))
380
lex=my_xml_scan(p,&a);
385
if (MY_XML_OK != my_xml_leave(p,NULL,0))
389
if (lex != MY_XML_GT)
391
sprintf(p->errstr,"%s unexpected ('>' wanted)",lex2str(lex));
398
for ( ; (p->cur < p->end) && (p->cur[0] != '<') ; p->cur++);
401
if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
402
my_xml_norm_text(&a);
405
my_xml_value(p,a.beg,(size_t) (a.end-a.beg));
412
sprintf(p->errstr,"unexpected END-OF-INPUT");
419
void my_xml_parser_create(MY_XML_PARSER *p)
421
bzero((void*)p,sizeof(p[0]));
425
void my_xml_parser_free(MY_XML_PARSER *p __attribute__((unused)))
430
void my_xml_set_value_handler(MY_XML_PARSER *p,
431
int (*action)(MY_XML_PARSER *p, const char *s,
437
void my_xml_set_enter_handler(MY_XML_PARSER *p,
438
int (*action)(MY_XML_PARSER *p, const char *s,
445
void my_xml_set_leave_handler(MY_XML_PARSER *p,
446
int (*action)(MY_XML_PARSER *p, const char *s,
453
void my_xml_set_user_data(MY_XML_PARSER *p, void *user_data)
455
p->user_data=user_data;
459
const char *my_xml_error_string(MY_XML_PARSER *p)
465
size_t my_xml_error_pos(MY_XML_PARSER *p)
467
const char *beg=p->beg;
469
for ( s=p->beg ; s<p->cur; s++)
474
return (size_t) (p->cur-beg);
477
uint my_xml_error_lineno(MY_XML_PARSER *p)
481
for (s=p->beg ; s<p->cur; s++)