1548.2.11
by Barry.Leslie at PrimeBase
Removed libxml reqirement by using a home grown xml parser. |
1 |
/* Copyright (c) 2010 PrimeBase Technologies GmbH, Germany
|
2 |
*
|
|
3 |
* PrimeBase Media Stream for MySQL
|
|
4 |
*
|
|
5 |
* This program is free software; you can redistribute it and/or modify
|
|
6 |
* it under the terms of the GNU General Public License as published by
|
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
8 |
* (at your option) any later version.
|
|
9 |
*
|
|
10 |
* This program is distributed in the hope that it will be useful,
|
|
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 |
* GNU General Public License for more details.
|
|
14 |
*
|
|
15 |
* You should have received a copy of the GNU General Public License
|
|
16 |
* along with this program; if not, write to the Free Software
|
|
17 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
18 |
*
|
|
19 |
* Paul McCullagh (H&G2JCtL)
|
|
20 |
*
|
|
21 |
* 2010-01-12
|
|
22 |
*
|
|
23 |
* CORE SYSTEM:
|
|
24 |
* XML Parsing
|
|
25 |
*
|
|
26 |
*/
|
|
27 |
||
28 |
#include "CSConfig.h" |
|
29 |
#include <inttypes.h> |
|
30 |
||
31 |
||
1643.1.8
by Monty Taylor
Fixed a couple of solaris build issues. Callback functions passed to |
32 |
#include <string.h> |
33 |
#include <stdlib.h> |
|
34 |
#include <ctype.h> |
|
35 |
#include <stdio.h> |
|
36 |
#include <errno.h> |
|
37 |
||
1644.3.1
by Barry.Leslie at PrimeBase
Merged in changes from PBMS project. |
38 |
#ifdef DRIZZLED
|
1643.1.8
by Monty Taylor
Fixed a couple of solaris build issues. Callback functions passed to |
39 |
#include <boost/algorithm/string.hpp> |
1644.3.1
by Barry.Leslie at PrimeBase
Merged in changes from PBMS project. |
40 |
#define STRCASESTR(s1, s2) boost::ifind_first(s1, s2)
|
41 |
#else
|
|
42 |
#define STRCASESTR(s1, s2) strcasestr(s1, s2)
|
|
43 |
#endif
|
|
1548.2.11
by Barry.Leslie at PrimeBase
Removed libxml reqirement by using a home grown xml parser. |
44 |
|
45 |
#include "CSXML.h" |
|
46 |
||
47 |
#define ISSPACE(ch) (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
|
|
48 |
#define ISSINGLE(ch) (ch == '*' || ch == '+' || ch == '(' || ch == ')' || ch == ',' || ch == '|' || ch == '[' || ch == ']' || ch == '?' || ch == '/')
|
|
49 |
||
50 |
#define SET_CHAR(x, ch) { x->buffer[0] = ch; x->count = 1; }
|
|
51 |
#define ADD_CHAR(x, ch) { if (x->count < PARSE_BUFFER_SIZE) { x->buffer[x->count] = ch; x->count++; } else x->buffer[PARSE_BUFFER_SIZE-1] = ch; }
|
|
52 |
||
53 |
bool CSXMLParser::match_string(const char *ch) |
|
54 |
{
|
|
55 |
int32_t i; |
|
56 |
||
57 |
for (i=0; i<this->count; i++) { |
|
58 |
if (this->buffer[i] != *ch) |
|
59 |
return false; |
|
60 |
ch++; |
|
61 |
}
|
|
62 |
if (*ch) |
|
63 |
return false; |
|
64 |
return(i == this->count); |
|
65 |
}
|
|
66 |
||
67 |
void CSXMLParser::increment_nesting(wchar_t ch) |
|
68 |
{
|
|
69 |
if (this->nesting < PARSE_STACK_SIZE) { |
|
70 |
switch (ch) { |
|
71 |
case '/': |
|
72 |
this->end_type[this->nesting] = XML_OP_1_END_CLOSE_TAG; |
|
73 |
break; |
|
74 |
case '?': |
|
75 |
this->end_type[this->nesting] = XML_OP_1_END_PI_TAG; |
|
76 |
break; |
|
77 |
case '!': |
|
78 |
this->end_type[this->nesting] = XML_OP_1_END_ENTITY_TAG; |
|
79 |
break; |
|
80 |
case '[': |
|
81 |
this->end_type[this->nesting] = XML_OP_1_END_BRACKET_TAG; |
|
82 |
break; |
|
83 |
default: |
|
84 |
if (ISSPACE(ch)) |
|
85 |
this->end_type[this->nesting] = XML_OP_1_END_UNKNOWN_TAG; |
|
86 |
else
|
|
87 |
this->end_type[this->nesting] = XML_OP_1_END_TAG; |
|
88 |
break; |
|
89 |
}
|
|
90 |
}
|
|
91 |
this->nesting++; |
|
92 |
}
|
|
93 |
||
94 |
int32_t CSXMLParser::parseChar(wchar_t ch) |
|
95 |
/* This function does the actual work of parsing. It is expects
|
|
96 |
* "complete" characters as input. This could be 4 byte characters
|
|
97 |
* as long as it is able to recognize the characters that are
|
|
98 |
* relevant to parsing.
|
|
99 |
* The function outputs processing instructions, and indicates
|
|
100 |
* how the output data is to be understood.
|
|
101 |
*/
|
|
102 |
{
|
|
103 |
switch (this->state) { |
|
104 |
case XML_BEFORE_CDATA: |
|
105 |
this->nesting = 0; |
|
106 |
/* This is the initial state! */
|
|
107 |
if (ch == '<') { |
|
108 |
this->state = XML_LT; |
|
109 |
this->type = XML_noop; |
|
110 |
}
|
|
111 |
else { |
|
112 |
this->state = XML_IN_CDATA; |
|
113 |
this->type = XML_CDATA_CH; |
|
114 |
}
|
|
115 |
SET_CHAR(this, ch); |
|
116 |
break; |
|
117 |
case XML_IN_CDATA: |
|
118 |
if (ch == '<') { |
|
119 |
this->state = XML_LT; |
|
120 |
this->type = XML_noop; |
|
121 |
}
|
|
122 |
else
|
|
123 |
this->type = XML_CDATA_CH; |
|
124 |
SET_CHAR(this, ch); |
|
125 |
break; |
|
126 |
case XML_LT: |
|
127 |
if (ISSPACE(ch)) { |
|
128 |
if (this->nesting) { |
|
129 |
this->state = XML_BEFORE_ATTR; |
|
130 |
if (this->step == XML_STEP_TAG) |
|
131 |
this->type = XML_start_tag_TAG_CH; |
|
132 |
else if (this->step == XML_STEP_NESTED) |
|
133 |
this->type = XML_TAG_CH; |
|
134 |
else if (this->step == XML_STEP_NONE) |
|
135 |
this->type = XML_end_cdata_TAG_CH; |
|
136 |
else
|
|
137 |
this->type = XML_add_attr_TAG_CH; |
|
138 |
this->step = XML_STEP_TAG; |
|
139 |
increment_nesting(ch); |
|
140 |
this->count = 0; |
|
141 |
}
|
|
142 |
else { |
|
143 |
this->state = XML_IN_CDATA; |
|
144 |
this->type = XML_CDATA_CH; |
|
145 |
ADD_CHAR(this, ch); |
|
146 |
}
|
|
147 |
}
|
|
148 |
else if (ch == '!') { |
|
149 |
this->state = XML_LT_BANG; |
|
150 |
this->type = XML_noop; |
|
151 |
ADD_CHAR(this, ch); |
|
152 |
}
|
|
153 |
else { |
|
154 |
this->state = XML_IN_TAG_NAME; |
|
155 |
if (this->step == XML_STEP_TAG) |
|
156 |
this->type = XML_start_tag_TAG_CH; |
|
157 |
else if (this->step == XML_STEP_NESTED) |
|
158 |
this->type = XML_TAG_CH; |
|
159 |
else if (this->step == XML_STEP_NONE) |
|
160 |
this->type = XML_end_cdata_TAG_CH; |
|
161 |
else
|
|
162 |
this->type = XML_add_attr_TAG_CH; |
|
163 |
this->step = XML_STEP_TAG; |
|
164 |
increment_nesting(ch); |
|
165 |
SET_CHAR(this, ch); |
|
166 |
}
|
|
167 |
break; |
|
168 |
case XML_LT_BANG: |
|
169 |
if (ch == '-') { |
|
170 |
this->state = XML_LT_BANG_DASH; |
|
171 |
this->type = XML_noop; |
|
172 |
}
|
|
173 |
else if (ch == '[') { |
|
174 |
this->state = XML_LT_BANG_SQR; |
|
175 |
this->type = XML_noop; |
|
176 |
}
|
|
177 |
else { |
|
178 |
this->state = XML_IN_TAG_NAME; |
|
179 |
if (this->step == XML_STEP_TAG) |
|
180 |
this->type = XML_start_tag_TAG_CH; |
|
181 |
else if (this->step == XML_STEP_NESTED) |
|
182 |
this->type = XML_TAG_CH; |
|
183 |
else if (this->step == XML_STEP_NONE) |
|
184 |
this->type = XML_end_cdata_TAG_CH; |
|
185 |
else
|
|
186 |
this->type = XML_add_attr_TAG_CH; |
|
187 |
this->step = XML_STEP_TAG; |
|
188 |
increment_nesting('!'); |
|
189 |
SET_CHAR(this, '!'); |
|
190 |
}
|
|
191 |
ADD_CHAR(this, ch); |
|
192 |
break; |
|
193 |
case XML_LT_BANG_DASH: |
|
194 |
if (ch == '-') { |
|
195 |
this->state = XML_IN_COMMENT; |
|
196 |
if (this->step == XML_STEP_TAG) |
|
197 |
this->type = XML_start_tag_start_comment; |
|
198 |
else if (this->step == XML_STEP_NESTED) |
|
199 |
this->type = XML_start_comment; |
|
200 |
else if (this->step == XML_STEP_NONE) |
|
201 |
this->type = XML_end_cdata_start_comment; |
|
202 |
else
|
|
203 |
this->type = XML_add_attr_start_comment; |
|
204 |
increment_nesting(' '); |
|
205 |
}
|
|
206 |
else { |
|
207 |
this->state = XML_IN_CDATA; |
|
208 |
this->type = XML_CDATA_CH; |
|
209 |
ADD_CHAR(this, ch); |
|
210 |
}
|
|
211 |
break; |
|
212 |
case XML_LT_BANG_SQR: |
|
213 |
if (ISSPACE(ch)) |
|
214 |
this->type = XML_noop; |
|
215 |
else if (ch == '[') { |
|
216 |
this->state = XML_BEFORE_ATTR; |
|
217 |
if (this->step == XML_STEP_TAG) |
|
218 |
this->type = XML_start_tag_TAG_CH; |
|
219 |
else if (this->step == XML_STEP_NESTED) |
|
220 |
this->type = XML_TAG_CH; |
|
221 |
else if (this->step == XML_STEP_NONE) |
|
222 |
this->type = XML_end_cdata_TAG_CH; |
|
223 |
else
|
|
224 |
this->type = XML_add_attr_TAG_CH; |
|
225 |
this->step = XML_STEP_TAG; |
|
226 |
increment_nesting('['); |
|
227 |
SET_CHAR(this, '!'); |
|
228 |
ADD_CHAR(this, '['); |
|
229 |
}
|
|
230 |
else { |
|
231 |
this->state = XML_LT_BANG_SQR_IN_NAME; |
|
232 |
this->type = XML_noop; |
|
233 |
SET_CHAR(this, '!'); |
|
234 |
ADD_CHAR(this, '['); |
|
235 |
ADD_CHAR(this, ch); |
|
236 |
}
|
|
237 |
break; |
|
238 |
case XML_LT_BANG_SQR_IN_NAME: |
|
239 |
if (ISSPACE(ch)) { |
|
240 |
this->state = XML_LT_BANG_SQR_AFTER_NAME; |
|
241 |
this->type = XML_noop; |
|
242 |
}
|
|
243 |
else if (ch == '[') { |
|
244 |
if (match_string("![CDATA")) { |
|
245 |
this->state = XML_IN_CDATA_TAG; |
|
246 |
if (this->step == XML_STEP_TAG) |
|
247 |
this->type = XML_start_tag_start_cdata_tag; |
|
248 |
else if (this->step == XML_STEP_NESTED) |
|
249 |
this->type = XML_start_cdata_tag; |
|
250 |
else if (this->step == XML_STEP_NONE) |
|
251 |
this->type = XML_end_cdata_start_cdata_tag; |
|
252 |
else
|
|
253 |
this->type = XML_add_attr_start_cdata_tag; |
|
254 |
this->step = XML_STEP_TAG; |
|
255 |
increment_nesting('['); |
|
256 |
}
|
|
257 |
else { |
|
258 |
this->state = XML_BEFORE_ATTR; |
|
259 |
if (this->step == XML_STEP_TAG) |
|
260 |
this->type = XML_start_tag_TAG_CH; |
|
261 |
else if (this->step == XML_STEP_NESTED) |
|
262 |
this->type = XML_TAG_CH; |
|
263 |
else if (this->step == XML_STEP_NONE) |
|
264 |
this->type = XML_end_cdata_TAG_CH; |
|
265 |
else
|
|
266 |
this->type = XML_add_attr_TAG_CH; |
|
267 |
this->step = XML_STEP_TAG; |
|
268 |
increment_nesting('['); |
|
269 |
}
|
|
270 |
}
|
|
271 |
else { |
|
272 |
this->type = XML_noop; |
|
273 |
ADD_CHAR(this, ch); |
|
274 |
}
|
|
275 |
break; |
|
276 |
case XML_LT_BANG_SQR_AFTER_NAME: |
|
277 |
if (ch == '[') { |
|
278 |
if (match_string("![CDATA")) { |
|
279 |
this->state = XML_IN_CDATA_TAG; |
|
280 |
if (this->step == XML_STEP_TAG) |
|
281 |
this->type = XML_start_tag_start_cdata_tag; |
|
282 |
else if (this->step == XML_STEP_NESTED) |
|
283 |
this->type = XML_start_cdata_tag; |
|
284 |
else if (this->step == XML_STEP_NONE) |
|
285 |
this->type = XML_end_cdata_start_cdata_tag; |
|
286 |
else
|
|
287 |
this->type = XML_add_attr_start_cdata_tag; |
|
288 |
increment_nesting('['); |
|
289 |
}
|
|
290 |
else { |
|
291 |
this->state = XML_BEFORE_ATTR; |
|
292 |
if (this->step == XML_STEP_TAG) |
|
293 |
this->type = XML_start_tag_TAG_CH; |
|
294 |
else if (this->step == XML_STEP_NESTED) |
|
295 |
this->type = XML_TAG_CH; |
|
296 |
else if (this->step == XML_STEP_NONE) |
|
297 |
this->type = XML_end_cdata_TAG_CH; |
|
298 |
else
|
|
299 |
this->type = XML_add_attr_TAG_CH; |
|
300 |
this->step = XML_STEP_TAG; |
|
301 |
increment_nesting('['); |
|
302 |
}
|
|
303 |
}
|
|
304 |
else
|
|
305 |
/* Ignore data until the '['!!! */
|
|
306 |
this->type = XML_noop; |
|
307 |
break; |
|
308 |
case XML_IN_TAG_NAME: |
|
309 |
if (ISSPACE(ch)) { |
|
310 |
this->state = XML_BEFORE_ATTR; |
|
311 |
this->type = XML_noop; |
|
312 |
}
|
|
313 |
else if (ch == '<') { |
|
314 |
this->state = XML_LT; |
|
315 |
this->type = XML_noop; |
|
316 |
}
|
|
317 |
else if (ch == '>') { |
|
318 |
if (this->step == XML_STEP_TAG) |
|
319 |
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this)); |
|
320 |
else if (this->step == XML_STEP_NESTED) |
|
321 |
this->type = XML_end_tag(END_TAG_TYPE(this)); |
|
322 |
else
|
|
323 |
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this)); |
|
324 |
this->nesting--; |
|
325 |
if (this->nesting) { |
|
326 |
this->step = XML_STEP_NESTED; |
|
327 |
this->state = XML_BEFORE_ATTR; |
|
328 |
}
|
|
329 |
else { |
|
330 |
this->step = XML_STEP_NONE; |
|
331 |
this->state = XML_IN_CDATA; |
|
332 |
}
|
|
333 |
}
|
|
334 |
else if (ch == '"' || ch == '\'') { |
|
335 |
this->state = XML_QUOTE_BEFORE_VALUE; |
|
336 |
this->quote = ch; |
|
337 |
this->type = XML_noop; |
|
338 |
}
|
|
339 |
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) { |
|
340 |
this->state = XML_SLASH; |
|
341 |
this->type = XML_noop; |
|
342 |
}
|
|
343 |
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) { |
|
344 |
this->state = XML_QMARK; |
|
345 |
this->type = XML_noop; |
|
346 |
}
|
|
347 |
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) { |
|
348 |
this->state = XML_SQR; |
|
349 |
this->type = XML_noop; |
|
350 |
}
|
|
351 |
else if (ISSINGLE(ch)) { |
|
352 |
this->state = XML_BEFORE_ATTR; |
|
353 |
if (this->step == XML_STEP_TAG) |
|
354 |
this->type = XML_start_tag_ATTR_CH; |
|
355 |
else if (this->step == XML_STEP_NESTED) |
|
356 |
this->type = XML_ATTR_CH; |
|
357 |
else
|
|
358 |
this->type = XML_add_attr_ATTR_CH; |
|
359 |
this->step = XML_STEP_ATTR; |
|
360 |
SET_CHAR(this, ch); |
|
361 |
}
|
|
362 |
else { |
|
363 |
this->type = XML_TAG_CH; |
|
364 |
SET_CHAR(this, ch); |
|
365 |
}
|
|
366 |
break; |
|
367 |
case XML_BEFORE_ATTR: |
|
368 |
if (ISSPACE(ch)) |
|
369 |
this->type = XML_noop; |
|
370 |
else if (ch == '<') { |
|
371 |
this->state = XML_LT; |
|
372 |
this->type = XML_noop; |
|
373 |
}
|
|
374 |
else if (ch == '>') { |
|
375 |
if (this->step == XML_STEP_TAG) |
|
376 |
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this)); |
|
377 |
else if (this->step == XML_STEP_NESTED) |
|
378 |
this->type = XML_end_tag(END_TAG_TYPE(this)); |
|
379 |
else
|
|
380 |
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this)); |
|
381 |
this->nesting--; |
|
382 |
if (this->nesting) { |
|
383 |
this->step = XML_STEP_NESTED; |
|
384 |
this->state = XML_BEFORE_ATTR; |
|
385 |
}
|
|
386 |
else { |
|
387 |
this->step = XML_STEP_NONE; |
|
388 |
this->state = XML_IN_CDATA; |
|
389 |
}
|
|
390 |
}
|
|
391 |
else if (ch == '"' || ch == '\'') { |
|
392 |
this->state = XML_QUOTE_BEFORE_VALUE; |
|
393 |
this->quote = ch; |
|
394 |
this->type = XML_noop; |
|
395 |
}
|
|
396 |
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) { |
|
397 |
this->state = XML_SLASH; |
|
398 |
this->type = XML_noop; |
|
399 |
}
|
|
400 |
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) { |
|
401 |
this->state = XML_QMARK; |
|
402 |
this->type = XML_noop; |
|
403 |
}
|
|
404 |
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) { |
|
405 |
this->state = XML_SQR; |
|
406 |
this->type = XML_noop; |
|
407 |
}
|
|
408 |
else if (ISSINGLE(ch)) { |
|
409 |
if (this->step == XML_STEP_TAG) |
|
410 |
this->type = XML_start_tag_ATTR_CH; |
|
411 |
else if (this->step == XML_STEP_NESTED) |
|
412 |
this->type = XML_ATTR_CH; |
|
413 |
else
|
|
414 |
this->type = XML_add_attr_ATTR_CH; |
|
415 |
this->step = XML_STEP_ATTR; |
|
416 |
SET_CHAR(this, ch); |
|
417 |
}
|
|
418 |
else { |
|
419 |
this->state = XML_IN_ATTR; |
|
420 |
if (this->step == XML_STEP_TAG) |
|
421 |
this->type = XML_start_tag_ATTR_CH; |
|
422 |
else if (this->step == XML_STEP_NESTED) |
|
423 |
this->type = XML_ATTR_CH; |
|
424 |
else
|
|
425 |
this->type = XML_add_attr_ATTR_CH; |
|
426 |
this->step = XML_STEP_ATTR; |
|
427 |
SET_CHAR(this, ch); |
|
428 |
}
|
|
429 |
break; |
|
430 |
case XML_IN_ATTR: |
|
431 |
if (ISSPACE(ch)) { |
|
432 |
this->state = XML_BEFORE_EQUAL; |
|
433 |
this->type = XML_noop; |
|
434 |
}
|
|
435 |
else if (ch == '<') { |
|
436 |
this->state = XML_LT; |
|
437 |
this->type = XML_noop; |
|
438 |
}
|
|
439 |
else if (ch == '>') { |
|
440 |
if (this->step == XML_STEP_TAG) |
|
441 |
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this)); |
|
442 |
else if (this->step == XML_STEP_NESTED) |
|
443 |
this->type = XML_end_tag(END_TAG_TYPE(this)); |
|
444 |
else
|
|
445 |
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this)); |
|
446 |
this->nesting--; |
|
447 |
if (this->nesting) { |
|
448 |
this->step = XML_STEP_NESTED; |
|
449 |
this->state = XML_BEFORE_ATTR; |
|
450 |
}
|
|
451 |
else { |
|
452 |
this->step = XML_STEP_NONE; |
|
453 |
this->state = XML_IN_CDATA; |
|
454 |
}
|
|
455 |
}
|
|
456 |
else if (ch == '"' || ch == '\'') { |
|
457 |
this->state = XML_QUOTE_BEFORE_VALUE; |
|
458 |
this->quote = ch; |
|
459 |
this->type = XML_noop; |
|
460 |
}
|
|
461 |
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) { |
|
462 |
this->state = XML_SLASH; |
|
463 |
this->type = XML_noop; |
|
464 |
}
|
|
465 |
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) { |
|
466 |
this->state = XML_QMARK; |
|
467 |
this->type = XML_noop; |
|
468 |
}
|
|
469 |
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) { |
|
470 |
this->state = XML_SQR; |
|
471 |
this->type = XML_noop; |
|
472 |
}
|
|
473 |
else if (ISSINGLE(ch)) { |
|
474 |
this->state = XML_BEFORE_ATTR; |
|
475 |
if (this->step == XML_STEP_TAG) |
|
476 |
this->type = XML_start_tag_ATTR_CH; |
|
477 |
else if (this->step == XML_STEP_NESTED) |
|
478 |
this->type = XML_ATTR_CH; |
|
479 |
else
|
|
480 |
this->type = XML_add_attr_ATTR_CH; |
|
481 |
this->step = XML_STEP_ATTR; |
|
482 |
SET_CHAR(this, ch); |
|
483 |
}
|
|
484 |
else if (ch == '=') { |
|
485 |
this->state = XML_AFTER_EQUAL; |
|
486 |
this->type = XML_noop; |
|
487 |
}
|
|
488 |
else { |
|
489 |
this->type = XML_ATTR_CH; |
|
490 |
SET_CHAR(this, ch); |
|
491 |
}
|
|
492 |
break; |
|
493 |
case XML_BEFORE_EQUAL: |
|
494 |
if (ISSPACE(ch)) |
|
495 |
this->type = XML_noop; |
|
496 |
else if (ch == '<') { |
|
497 |
this->state = XML_LT; |
|
498 |
this->type = XML_noop; |
|
499 |
}
|
|
500 |
else if (ch == '>') { |
|
501 |
if (this->step == XML_STEP_TAG) |
|
502 |
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this)); |
|
503 |
else if (this->step == XML_STEP_NESTED) |
|
504 |
this->type = XML_end_tag(END_TAG_TYPE(this)); |
|
505 |
else
|
|
506 |
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this)); |
|
507 |
this->nesting--; |
|
508 |
if (this->nesting) { |
|
509 |
this->step = XML_STEP_NESTED; |
|
510 |
this->state = XML_BEFORE_ATTR; |
|
511 |
}
|
|
512 |
else { |
|
513 |
this->step = XML_STEP_NONE; |
|
514 |
this->state = XML_IN_CDATA; |
|
515 |
}
|
|
516 |
}
|
|
517 |
else if (ch == '"' || ch == '\'') { |
|
518 |
this->state = XML_QUOTE_BEFORE_VALUE; |
|
519 |
this->quote = ch; |
|
520 |
this->type = XML_noop; |
|
521 |
}
|
|
522 |
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) { |
|
523 |
this->state = XML_SLASH; |
|
524 |
this->type = XML_noop; |
|
525 |
}
|
|
526 |
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) { |
|
527 |
this->state = XML_QMARK; |
|
528 |
this->type = XML_noop; |
|
529 |
}
|
|
530 |
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) { |
|
531 |
this->state = XML_SQR; |
|
532 |
this->type = XML_noop; |
|
533 |
}
|
|
534 |
else if (ISSINGLE(ch)) { |
|
535 |
this->state = XML_BEFORE_ATTR; |
|
536 |
if (this->step == XML_STEP_TAG) |
|
537 |
this->type = XML_start_tag_ATTR_CH; |
|
538 |
else if (this->step == XML_STEP_NESTED) |
|
539 |
this->type = XML_ATTR_CH; |
|
540 |
else
|
|
541 |
this->type = XML_add_attr_ATTR_CH; |
|
542 |
this->step = XML_STEP_ATTR; |
|
543 |
SET_CHAR(this, ch); |
|
544 |
}
|
|
545 |
else if (ch == '=') { |
|
546 |
this->state = XML_AFTER_EQUAL; |
|
547 |
this->type = XML_noop; |
|
548 |
}
|
|
549 |
else { |
|
550 |
this->state = XML_IN_ATTR; |
|
551 |
if (this->step == XML_STEP_TAG) |
|
552 |
this->type = XML_start_tag_ATTR_CH; |
|
553 |
else if (this->step == XML_STEP_NESTED) |
|
554 |
this->type = XML_ATTR_CH; |
|
555 |
else
|
|
556 |
this->type = XML_add_attr_ATTR_CH; |
|
557 |
this->step = XML_STEP_ATTR; |
|
558 |
SET_CHAR(this, ch); |
|
559 |
}
|
|
560 |
break; |
|
561 |
case XML_AFTER_EQUAL: |
|
562 |
if (ISSPACE(ch)) { |
|
563 |
this->state = XML_AFTER_EQUAL; |
|
564 |
this->type = XML_noop; |
|
565 |
}
|
|
566 |
else if (ch == '<') { |
|
567 |
this->state = XML_LT; |
|
568 |
this->type = XML_noop; |
|
569 |
}
|
|
570 |
else if (ch == '>') { |
|
571 |
if (this->step == XML_STEP_TAG) |
|
572 |
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this)); |
|
573 |
else if (this->step == XML_STEP_NESTED) |
|
574 |
this->type = XML_end_tag(END_TAG_TYPE(this)); |
|
575 |
else
|
|
576 |
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this)); |
|
577 |
this->nesting--; |
|
578 |
if (this->nesting) { |
|
579 |
this->step = XML_STEP_NESTED; |
|
580 |
this->state = XML_BEFORE_ATTR; |
|
581 |
}
|
|
582 |
else { |
|
583 |
this->step = XML_STEP_NONE; |
|
584 |
this->state = XML_IN_CDATA; |
|
585 |
}
|
|
586 |
}
|
|
587 |
else if (ch == '"' || ch == '\'') { |
|
588 |
this->state = XML_QUOTE_BEFORE_VALUE; |
|
589 |
this->quote = ch; |
|
590 |
this->type = XML_noop; |
|
591 |
}
|
|
592 |
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) { |
|
593 |
this->state = XML_SLASH; |
|
594 |
this->type = XML_noop; |
|
595 |
}
|
|
596 |
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) { |
|
597 |
this->state = XML_QMARK; |
|
598 |
this->type = XML_noop; |
|
599 |
}
|
|
600 |
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) { |
|
601 |
this->state = XML_SQR; |
|
602 |
this->type = XML_noop; |
|
603 |
}
|
|
604 |
else if (ISSINGLE(ch)) { |
|
605 |
this->state = XML_BEFORE_ATTR; |
|
606 |
if (this->step == XML_STEP_TAG) |
|
607 |
this->type = XML_start_tag_ATTR_CH; |
|
608 |
else if (this->step == XML_STEP_NESTED) |
|
609 |
this->type = XML_ATTR_CH; |
|
610 |
else
|
|
611 |
this->type = XML_add_attr_ATTR_CH; |
|
612 |
this->step = XML_STEP_ATTR; |
|
613 |
SET_CHAR(this, ch); |
|
614 |
}
|
|
615 |
else { |
|
616 |
this->state = XML_IN_VALUE; |
|
617 |
this->quote = 0; |
|
618 |
if (this->step == XML_STEP_TAG) |
|
619 |
this->type = XML_start_tag_VALUE_CH; |
|
620 |
else if (this->step == XML_STEP_VALUE) |
|
621 |
this->type = XML_add_attr_VALUE_CH; |
|
622 |
else
|
|
623 |
this->type = XML_VALUE_CH; |
|
624 |
this->step = XML_STEP_VALUE; |
|
625 |
SET_CHAR(this, ch); |
|
626 |
}
|
|
627 |
break; |
|
628 |
case XML_QUOTE_BEFORE_VALUE: |
|
629 |
if (ch == this->quote) { |
|
630 |
this->state = XML_QUOTE_AFTER_VALUE; |
|
631 |
// Empty string:
|
|
632 |
if (this->step == XML_STEP_TAG) |
|
633 |
this->type = XML_start_tag_VALUE_CH; |
|
634 |
else if (this->step == XML_STEP_VALUE) |
|
635 |
this->type = XML_add_attr_VALUE_CH; |
|
636 |
else
|
|
637 |
this->type = XML_VALUE_CH; |
|
638 |
this->step = XML_STEP_VALUE; |
|
639 |
this->count = 0; |
|
640 |
}
|
|
641 |
else { |
|
642 |
this->state = XML_IN_VALUE; |
|
643 |
if (this->step == XML_STEP_TAG) |
|
644 |
this->type = XML_start_tag_VALUE_CH; |
|
645 |
else if (this->step == XML_STEP_VALUE) |
|
646 |
this->type = XML_add_attr_VALUE_CH; |
|
647 |
else
|
|
648 |
this->type = XML_VALUE_CH; |
|
649 |
this->step = XML_STEP_VALUE; |
|
650 |
SET_CHAR(this, ch); |
|
651 |
}
|
|
652 |
break; |
|
653 |
case XML_IN_VALUE: |
|
654 |
if (this->quote) { |
|
655 |
if (ch == this->quote) { |
|
656 |
this->state = XML_QUOTE_AFTER_VALUE; |
|
657 |
this->type = XML_noop; |
|
658 |
}
|
|
659 |
else { |
|
660 |
this->type = XML_VALUE_CH; |
|
661 |
SET_CHAR(this, ch); |
|
662 |
}
|
|
663 |
}
|
|
664 |
else { |
|
665 |
/* A value without quotes (for HTML!) */
|
|
666 |
if (ISSPACE(ch)) { |
|
667 |
this->state = XML_BEFORE_ATTR; |
|
668 |
this->type = XML_noop; |
|
669 |
}
|
|
670 |
else if (ch == '<') { |
|
671 |
this->state = XML_LT; |
|
672 |
this->type = XML_noop; |
|
673 |
}
|
|
674 |
else if (ch == '>') { |
|
675 |
if (this->step == XML_STEP_TAG) |
|
676 |
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this)); |
|
677 |
else if (this->step == XML_STEP_NESTED) |
|
678 |
this->type = XML_end_tag(END_TAG_TYPE(this)); |
|
679 |
else
|
|
680 |
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this)); |
|
681 |
this->nesting--; |
|
682 |
if (this->nesting) { |
|
683 |
this->step = XML_STEP_NESTED; |
|
684 |
this->state = XML_BEFORE_ATTR; |
|
685 |
}
|
|
686 |
else { |
|
687 |
this->step = XML_STEP_NONE; |
|
688 |
this->state = XML_IN_CDATA; |
|
689 |
}
|
|
690 |
}
|
|
691 |
else if (ch == '"' || ch == '\'') { |
|
692 |
this->state = XML_QUOTE_BEFORE_VALUE; |
|
693 |
this->quote = ch; |
|
694 |
this->type = XML_noop; |
|
695 |
}
|
|
696 |
else { |
|
697 |
this->type = XML_VALUE_CH; |
|
698 |
SET_CHAR(this, ch); |
|
699 |
}
|
|
700 |
}
|
|
701 |
break; |
|
702 |
case XML_QUOTE_AFTER_VALUE: |
|
703 |
if (ISSPACE(ch)) { |
|
704 |
this->state = XML_BEFORE_ATTR; |
|
705 |
this->type = XML_noop; |
|
706 |
}
|
|
707 |
else if (ch == '<') { |
|
708 |
this->state = XML_LT; |
|
709 |
this->type = XML_noop; |
|
710 |
}
|
|
711 |
else if (ch == '>') { |
|
712 |
if (this->step == XML_STEP_TAG) |
|
713 |
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this)); |
|
714 |
else if (this->step == XML_STEP_NESTED) |
|
715 |
this->type = XML_end_tag(END_TAG_TYPE(this)); |
|
716 |
else
|
|
717 |
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this)); |
|
718 |
this->nesting--; |
|
719 |
if (this->nesting) { |
|
720 |
this->step = XML_STEP_NESTED; |
|
721 |
this->state = XML_BEFORE_ATTR; |
|
722 |
}
|
|
723 |
else { |
|
724 |
this->step = XML_STEP_NONE; |
|
725 |
this->state = XML_IN_CDATA; |
|
726 |
}
|
|
727 |
}
|
|
728 |
else if (ch == '"' || ch == '\'') { |
|
729 |
this->state = XML_QUOTE_BEFORE_VALUE; |
|
730 |
this->quote = ch; |
|
731 |
this->type = XML_noop; |
|
732 |
}
|
|
733 |
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) { |
|
734 |
this->state = XML_SLASH; |
|
735 |
this->type = XML_noop; |
|
736 |
}
|
|
737 |
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) { |
|
738 |
this->state = XML_QMARK; |
|
739 |
this->type = XML_noop; |
|
740 |
}
|
|
741 |
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) { |
|
742 |
this->state = XML_SQR; |
|
743 |
this->type = XML_noop; |
|
744 |
}
|
|
745 |
else if (ISSINGLE(ch)) { |
|
746 |
this->state = XML_BEFORE_ATTR; |
|
747 |
if (this->step == XML_STEP_TAG) |
|
748 |
this->type = XML_start_tag_ATTR_CH; |
|
749 |
else if (this->step == XML_STEP_NESTED) |
|
750 |
this->type = XML_ATTR_CH; |
|
751 |
else
|
|
752 |
this->type = XML_add_attr_ATTR_CH; |
|
753 |
this->step = XML_STEP_ATTR; |
|
754 |
SET_CHAR(this, ch); |
|
755 |
}
|
|
756 |
else { |
|
757 |
this->state = XML_IN_ATTR; |
|
758 |
if (this->step == XML_STEP_TAG) |
|
759 |
this->type = XML_start_tag_ATTR_CH; |
|
760 |
else if (this->step == XML_STEP_NESTED) |
|
761 |
this->type = XML_ATTR_CH; |
|
762 |
else
|
|
763 |
this->type = XML_add_attr_ATTR_CH; |
|
764 |
this->step = XML_STEP_ATTR; |
|
765 |
SET_CHAR(this, ch); |
|
766 |
}
|
|
767 |
break; |
|
768 |
case XML_SQR: |
|
769 |
SET_CHAR(this, ']'); |
|
770 |
goto cont; |
|
771 |
case XML_SLASH: |
|
772 |
SET_CHAR(this, '/'); |
|
773 |
goto cont; |
|
774 |
case XML_QMARK: |
|
775 |
SET_CHAR(this, '?'); |
|
776 |
cont: |
|
777 |
if (ISSPACE(ch)) { |
|
778 |
this->state = XML_BEFORE_ATTR; |
|
779 |
if (this->step == XML_STEP_TAG) |
|
780 |
this->type = XML_start_tag_TAG_CH; |
|
781 |
else if (this->step == XML_STEP_NESTED) |
|
782 |
this->type = XML_TAG_CH; |
|
783 |
else if (this->step == XML_STEP_NONE) |
|
784 |
this->type = XML_end_cdata_TAG_CH; |
|
785 |
else
|
|
786 |
this->type = XML_add_attr_TAG_CH; |
|
787 |
this->step = XML_STEP_ATTR; |
|
788 |
}
|
|
789 |
else if (ch == '<') { |
|
790 |
this->state = XML_LT; |
|
791 |
if (this->step == XML_STEP_TAG) |
|
792 |
this->type = XML_start_tag_TAG_CH; |
|
793 |
else if (this->step == XML_STEP_NESTED) |
|
794 |
this->type = XML_TAG_CH; |
|
795 |
else if (this->step == XML_STEP_NONE) |
|
796 |
this->type = XML_end_cdata_TAG_CH; |
|
797 |
else
|
|
798 |
this->type = XML_add_attr_TAG_CH; |
|
799 |
this->step = XML_STEP_TAG; |
|
800 |
}
|
|
801 |
else if (ch == '>') { |
|
802 |
if (this->state == XML_SLASH) { |
|
803 |
if (this->step == XML_STEP_TAG) |
|
804 |
this->type = XML_start_tag_end_empty_tag; |
|
805 |
else if (this->step == XML_STEP_NESTED) |
|
806 |
this->type = XML_end_empty_tag; |
|
807 |
else
|
|
808 |
this->type = XML_add_attr_end_empty_tag; |
|
809 |
}
|
|
810 |
else if (this->state == XML_SQR) { |
|
811 |
if (this->step == XML_STEP_TAG) |
|
812 |
this->type = XML_start_tag_end_tag(XML_OP_1_END_BRACKET_TAG); |
|
813 |
else if (this->step == XML_STEP_NESTED) |
|
814 |
this->type = XML_end_tag(XML_OP_1_END_BRACKET_TAG); |
|
815 |
else
|
|
816 |
this->type = XML_add_attr_end_tag(XML_OP_1_END_BRACKET_TAG); |
|
817 |
}
|
|
818 |
else { |
|
819 |
if (this->step == XML_STEP_TAG) |
|
820 |
this->type = XML_start_tag_end_pi_tag; |
|
821 |
else if (this->step == XML_STEP_NESTED) |
|
822 |
this->type = XML_end_pi_tag; |
|
823 |
else
|
|
824 |
this->type = XML_add_attr_end_pi_tag; |
|
825 |
}
|
|
826 |
this->nesting--; |
|
827 |
if (this->nesting) { |
|
828 |
this->step = XML_STEP_NESTED; |
|
829 |
this->state = XML_BEFORE_ATTR; |
|
830 |
}
|
|
831 |
else { |
|
832 |
this->step = XML_STEP_NONE; |
|
833 |
this->state = XML_IN_CDATA; |
|
834 |
}
|
|
835 |
}
|
|
836 |
else if (ch == '"' || ch == '\'') { |
|
837 |
this->state = XML_QUOTE_BEFORE_VALUE; |
|
838 |
this->quote = ch; |
|
839 |
if (this->step == XML_STEP_TAG) |
|
840 |
this->type = XML_start_tag_TAG_CH; |
|
841 |
else if (this->step == XML_STEP_NESTED) |
|
842 |
this->type = XML_TAG_CH; |
|
843 |
else if (this->step == XML_STEP_NONE) |
|
844 |
this->type = XML_end_cdata_TAG_CH; |
|
845 |
else
|
|
846 |
this->type = XML_add_attr_TAG_CH; |
|
847 |
this->step = XML_STEP_ATTR; |
|
848 |
}
|
|
849 |
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) { |
|
850 |
this->state = XML_SLASH; |
|
851 |
if (this->step == XML_STEP_TAG) |
|
852 |
this->type = XML_start_tag_TAG_CH; |
|
853 |
else if (this->step == XML_STEP_NESTED) |
|
854 |
this->type = XML_TAG_CH; |
|
855 |
else if (this->step == XML_STEP_NONE) |
|
856 |
this->type = XML_end_cdata_TAG_CH; |
|
857 |
else
|
|
858 |
this->type = XML_add_attr_TAG_CH; |
|
859 |
this->step = XML_STEP_ATTR; |
|
860 |
}
|
|
861 |
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) { |
|
862 |
this->state = XML_QMARK; |
|
863 |
if (this->step == XML_STEP_TAG) |
|
864 |
this->type = XML_start_tag_TAG_CH; |
|
865 |
else if (this->step == XML_STEP_NESTED) |
|
866 |
this->type = XML_TAG_CH; |
|
867 |
else if (this->step == XML_STEP_NONE) |
|
868 |
this->type = XML_end_cdata_TAG_CH; |
|
869 |
else
|
|
870 |
this->type = XML_add_attr_TAG_CH; |
|
871 |
this->step = XML_STEP_ATTR; |
|
872 |
}
|
|
873 |
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) { |
|
874 |
this->state = XML_SQR; |
|
875 |
if (this->step == XML_STEP_TAG) |
|
876 |
this->type = XML_start_tag_TAG_CH; |
|
877 |
else if (this->step == XML_STEP_NESTED) |
|
878 |
this->type = XML_TAG_CH; |
|
879 |
else if (this->step == XML_STEP_NONE) |
|
880 |
this->type = XML_end_cdata_TAG_CH; |
|
881 |
else
|
|
882 |
this->type = XML_add_attr_TAG_CH; |
|
883 |
this->step = XML_STEP_ATTR; |
|
884 |
}
|
|
885 |
else if (ISSINGLE(ch)) { |
|
886 |
this->state = XML_BEFORE_ATTR; |
|
887 |
if (this->step == XML_STEP_TAG) |
|
888 |
this->type = XML_start_tag_TAG_CH; |
|
889 |
else if (this->step == XML_STEP_NESTED) |
|
890 |
this->type = XML_TAG_CH; |
|
891 |
else if (this->step == XML_STEP_NONE) |
|
892 |
this->type = XML_end_cdata_TAG_CH; |
|
893 |
else
|
|
894 |
this->type = XML_add_attr_TAG_CH; |
|
895 |
this->step = XML_STEP_ATTR; |
|
896 |
ADD_CHAR(this, ch); |
|
897 |
}
|
|
898 |
else { |
|
899 |
this->state = XML_IN_ATTR; |
|
900 |
if (this->step == XML_STEP_TAG) |
|
901 |
this->type = XML_start_tag_TAG_CH; |
|
902 |
else if (this->step == XML_STEP_NESTED) |
|
903 |
this->type = XML_TAG_CH; |
|
904 |
else if (this->step == XML_STEP_NONE) |
|
905 |
this->type = XML_end_cdata_TAG_CH; |
|
906 |
else
|
|
907 |
this->type = XML_add_attr_TAG_CH; |
|
908 |
this->step = XML_STEP_ATTR; |
|
909 |
ADD_CHAR(this, ch); |
|
910 |
}
|
|
911 |
break; |
|
912 |
case XML_IN_COMMENT: |
|
913 |
if (ch == '-') { |
|
914 |
this->state = XML_IN_COMMENT_DASH; |
|
915 |
this->type = XML_noop; |
|
916 |
}
|
|
917 |
else
|
|
918 |
this->type = XML_COMMENT_CH; |
|
919 |
SET_CHAR(this, ch); |
|
920 |
break; |
|
921 |
case XML_IN_COMMENT_DASH: |
|
922 |
if (ch == '-') { |
|
923 |
this->state = XML_IN_COMMENT_DASH_DASH; |
|
924 |
this->type = XML_noop; |
|
925 |
}
|
|
926 |
else { |
|
927 |
this->state = XML_IN_COMMENT; |
|
928 |
this->type = XML_COMMENT_CH; |
|
929 |
}
|
|
930 |
ADD_CHAR(this, ch); |
|
931 |
break; |
|
932 |
case XML_IN_COMMENT_DASH_DASH: |
|
933 |
if (ch == '-') { |
|
934 |
this->state = XML_IN_COMMENT_3_DASH; |
|
935 |
this->type = XML_COMMENT_CH; |
|
936 |
SET_CHAR(this, ch); |
|
937 |
}
|
|
938 |
else if (ch == '>') { |
|
939 |
this->type = XML_end_comment; |
|
940 |
this->nesting--; |
|
941 |
if (this->nesting) { |
|
942 |
this->step = XML_STEP_NESTED; |
|
943 |
this->state = XML_BEFORE_ATTR; |
|
944 |
}
|
|
945 |
else { |
|
946 |
this->step = XML_STEP_NONE; |
|
947 |
this->state = XML_IN_CDATA; |
|
948 |
}
|
|
949 |
}
|
|
950 |
else { |
|
951 |
this->state = XML_IN_COMMENT; |
|
952 |
this->type = XML_COMMENT_CH; |
|
953 |
ADD_CHAR(this, ch); |
|
954 |
}
|
|
955 |
break; |
|
956 |
case XML_IN_COMMENT_3_DASH: |
|
957 |
if (ch == '-') { |
|
958 |
this->type = XML_COMMENT_CH; |
|
959 |
SET_CHAR(this, ch); |
|
960 |
}
|
|
961 |
else if (ch == '>') { |
|
962 |
this->type = XML_end_comment; |
|
963 |
this->nesting--; |
|
964 |
if (this->nesting) { |
|
965 |
this->step = XML_STEP_NESTED; |
|
966 |
this->state = XML_BEFORE_ATTR; |
|
967 |
}
|
|
968 |
else { |
|
969 |
this->step = XML_STEP_NONE; |
|
970 |
this->state = XML_IN_CDATA; |
|
971 |
}
|
|
972 |
}
|
|
973 |
else { |
|
974 |
this->state = XML_IN_COMMENT; |
|
975 |
this->type = XML_COMMENT_CH; |
|
976 |
SET_CHAR(this, '-'); |
|
977 |
ADD_CHAR(this, '-'); |
|
978 |
ADD_CHAR(this, ch); |
|
979 |
}
|
|
980 |
break; |
|
981 |
case XML_IN_CDATA_TAG: |
|
982 |
if (ch == ']') { |
|
983 |
this->state = XML_IN_CDATA_TAG_SQR; |
|
984 |
this->type = XML_noop; |
|
985 |
}
|
|
986 |
else
|
|
987 |
this->type = XML_CDATA_TAG_CH; |
|
988 |
SET_CHAR(this, ch); |
|
989 |
break; |
|
990 |
case XML_IN_CDATA_TAG_SQR: |
|
991 |
if (ch == ']') { |
|
992 |
this->state = XML_IN_CDATA_TAG_SQR_SQR; |
|
993 |
this->type = XML_noop; |
|
994 |
}
|
|
995 |
else { |
|
996 |
this->state = XML_IN_CDATA_TAG; |
|
997 |
this->type = XML_CDATA_TAG_CH; |
|
998 |
}
|
|
999 |
ADD_CHAR(this, ch); |
|
1000 |
break; |
|
1001 |
case XML_IN_CDATA_TAG_SQR_SQR: |
|
1002 |
if (ch == ']') { |
|
1003 |
this->state = XML_IN_CDATA_TAG_3_SQR; |
|
1004 |
this->type = XML_CDATA_TAG_CH; |
|
1005 |
SET_CHAR(this, ch); |
|
1006 |
}
|
|
1007 |
else if (ch == '>') { |
|
1008 |
this->type = XML_end_cdata_tag; |
|
1009 |
this->nesting--; |
|
1010 |
if (this->nesting) { |
|
1011 |
this->step = XML_STEP_NESTED; |
|
1012 |
this->state = XML_BEFORE_ATTR; |
|
1013 |
}
|
|
1014 |
else { |
|
1015 |
this->step = XML_STEP_NONE; |
|
1016 |
this->state = XML_IN_CDATA; |
|
1017 |
}
|
|
1018 |
}
|
|
1019 |
else { |
|
1020 |
this->state = XML_IN_CDATA_TAG; |
|
1021 |
this->type = XML_CDATA_TAG_CH; |
|
1022 |
ADD_CHAR(this, ch); |
|
1023 |
}
|
|
1024 |
break; |
|
1025 |
case XML_IN_CDATA_TAG_3_SQR: |
|
1026 |
if (ch == ']') { |
|
1027 |
this->type = XML_CDATA_TAG_CH; |
|
1028 |
SET_CHAR(this, ch); |
|
1029 |
}
|
|
1030 |
else if (ch == '>') { |
|
1031 |
this->type = XML_end_cdata_tag; |
|
1032 |
this->nesting--; |
|
1033 |
if (this->nesting) { |
|
1034 |
this->step = XML_STEP_NESTED; |
|
1035 |
this->state = XML_BEFORE_ATTR; |
|
1036 |
}
|
|
1037 |
else { |
|
1038 |
this->step = XML_STEP_NONE; |
|
1039 |
this->state = XML_IN_CDATA; |
|
1040 |
}
|
|
1041 |
}
|
|
1042 |
else { |
|
1043 |
this->state = XML_IN_CDATA_TAG; |
|
1044 |
this->type = XML_CDATA_TAG_CH; |
|
1045 |
SET_CHAR(this, ']'); |
|
1046 |
ADD_CHAR(this, ']'); |
|
1047 |
ADD_CHAR(this, ch); |
|
1048 |
}
|
|
1049 |
break; |
|
1050 |
}
|
|
1051 |
return(this->type); |
|
1052 |
}
|
|
1053 |
||
1054 |
/* ------------------------------------------------------------------- */
|
|
1055 |
/* CSXMLProcessor */
|
|
1056 |
||
1057 |
bool CSXMLProcessor::buildConversionTable() |
|
1058 |
{
|
|
1059 |
int32_t i; |
|
1060 |
||
1061 |
/* By default we don't know how to convert any charset
|
|
1062 |
* other tha ISO-1 to unicode!
|
|
1063 |
*/
|
|
1064 |
if (strcasecmp(charset, "ISO-8859-1") == 0) { |
|
1065 |
for (i=0; i<128; i++) |
|
1066 |
conversion_table[i] = (wchar_t) (i + 128); |
|
1067 |
}
|
|
1068 |
else { |
|
1069 |
for (i=0; i<128; i++) |
|
1070 |
conversion_table[i] = '?'; |
|
1071 |
}
|
|
1072 |
return true; |
|
1073 |
}
|
|
1074 |
||
1075 |
// Private use are: E000 - F8FF
|
|
1076 |
||
1077 |
int32_t CSXMLProcessor::capture_initializer(wchar_t ch) |
|
1078 |
/* We capture tag and attribute data for the parsing purposes.
|
|
1079 |
* The buffers are initialized here (at the lowest level)
|
|
1080 |
* of processing after parsing.
|
|
1081 |
*/
|
|
1082 |
{
|
|
1083 |
int32_t op; |
|
1084 |
||
1085 |
op = parseChar(ch); |
|
1086 |
switch (op & XML_OP_1_MASK) { |
|
1087 |
case XML_OP_1_START_TAG: |
|
1088 |
this->tlength = 0; |
|
1089 |
break; |
|
1090 |
case XML_OP_1_ADD_ATTR: |
|
1091 |
this->nlength = 0; |
|
1092 |
this->vlength = 0; |
|
1093 |
break; |
|
1094 |
}
|
|
1095 |
return(op); |
|
1096 |
}
|
|
1097 |
||
1098 |
int32_t CSXMLProcessor::entity_translator(wchar_t ch) |
|
1099 |
/* This function handles entities.
|
|
1100 |
* Certain entities are translated into UNICODE characters.
|
|
1101 |
* Strictly speaking, these enties are only recognised by HTML.
|
|
1102 |
* The few entities that are recognised by XML are first translated
|
|
1103 |
* into some reserved characters for the parser. This is to ensure
|
|
1104 |
* that the parser does not recognize them as characters with special
|
|
1105 |
* meaning! This includes '&', '<' and '>'.
|
|
1106 |
*/
|
|
1107 |
{
|
|
1108 |
int32_t op; |
|
1109 |
||
1110 |
op = capture_initializer(ch); |
|
1111 |
return(op); |
|
1112 |
}
|
|
1113 |
||
1114 |
/*
|
|
1115 |
* This function translates the input character stream into UNICODE.
|
|
1116 |
*/
|
|
1117 |
int32_t CSXMLProcessor::charset_transformer(wchar_t ch) |
|
1118 |
{
|
|
1119 |
int32_t op; |
|
1120 |
||
1121 |
// Do transformation according to the charset.
|
|
1122 |
switch (this->charset_type) { |
|
1123 |
case CHARSET_UTF_8: |
|
1124 |
if (ch > 127 && ch < 256) { |
|
1125 |
uint32_t utf_value; |
|
1126 |
uint8_t utf_ch = (uint8_t)ch; |
|
1127 |
||
1128 |
if ((utf_ch & 0xC0) != 0x80) |
|
1129 |
this->utf8_count = 0; |
|
1130 |
if ((utf_ch & 0x80) == 0x00) |
|
1131 |
this->utf8_length = 1; |
|
1132 |
else if ((utf_ch & 0xE0) == 0xC0) |
|
1133 |
this->utf8_length = 2; |
|
1134 |
else if ((utf_ch & 0xF0) == 0xE0) |
|
1135 |
this->utf8_length = 3; |
|
1136 |
else if ((utf_ch & 0xF8) == 0xF0) |
|
1137 |
this->utf8_length = 4; |
|
1138 |
else if ((utf_ch & 0xFC) == 0xF8) |
|
1139 |
this->utf8_length = 5; |
|
1140 |
else if ((utf_ch & 0xFE) == 0xFC) |
|
1141 |
this->utf8_length = 6; |
|
1142 |
this->utf8_buffer[this->utf8_count] = (uint32_t) utf_ch; |
|
1143 |
this->utf8_count++; |
|
1144 |
if (this->utf8_count < this->utf8_length) { |
|
1145 |
// I need more bytes!
|
|
1146 |
setDataType(XML_noop); |
|
1147 |
return(XML_noop); |
|
1148 |
}
|
|
1149 |
utf_value = 0; |
|
1150 |
switch (this->utf8_length) { |
|
1151 |
case 1: |
|
1152 |
utf_value = this->utf8_buffer[0] & 0x0000007F; |
|
1153 |
break; |
|
1154 |
case 2: |
|
1155 |
utf_value = ((this->utf8_buffer[0] & 0x0000001F) << 6) | |
|
1156 |
(this->utf8_buffer[1] & 0x0000003F); |
|
1157 |
if (utf_value < 0x00000080) |
|
1158 |
utf_value = '?'; |
|
1159 |
break; |
|
1160 |
case 3: |
|
1161 |
utf_value = ((this->utf8_buffer[0] & 0x0000000F) << 12) | |
|
1162 |
((this->utf8_buffer[1] & 0x0000003F) << 6) | |
|
1163 |
(this->utf8_buffer[2] & 0x0000003F); |
|
1164 |
if (utf_value < 0x000000800) |
|
1165 |
utf_value = '?'; |
|
1166 |
break; |
|
1167 |
case 4: |
|
1168 |
utf_value = ((this->utf8_buffer[0] & 0x00000007) << 18) | |
|
1169 |
((this->utf8_buffer[1] & 0x0000003F) << 12) | |
|
1170 |
((this->utf8_buffer[2] & 0x0000003F) << 6) | |
|
1171 |
(this->utf8_buffer[3] & 0x0000003F); |
|
1172 |
if (utf_value < 0x00010000) |
|
1173 |
utf_value = '?'; |
|
1174 |
break; |
|
1175 |
case 5: |
|
1176 |
utf_value = ((this->utf8_buffer[0] & 0x00000003) << 24) | |
|
1177 |
((this->utf8_buffer[1] & 0x0000003F) << 18) | |
|
1178 |
((this->utf8_buffer[2] & 0x0000003F) << 12) | |
|
1179 |
((this->utf8_buffer[3] & 0x0000003F) << 6) | |
|
1180 |
(this->utf8_buffer[4] & 0x0000003F); |
|
1181 |
if (utf_value < 0x00200000) |
|
1182 |
utf_value = '?'; |
|
1183 |
break; |
|
1184 |
case 6: |
|
1185 |
utf_value = ((this->utf8_buffer[0] & 0x00000001) << 30) | |
|
1186 |
((this->utf8_buffer[1] & 0x0000003F) << 24) | |
|
1187 |
((this->utf8_buffer[2] & 0x0000003F) << 18) | |
|
1188 |
((this->utf8_buffer[3] & 0x0000003F) << 12) | |
|
1189 |
((this->utf8_buffer[4] & 0x0000003F) << 6) | |
|
1190 |
(this->utf8_buffer[5] & 0x0000003F); |
|
1191 |
if (utf_value < 0x04000000) |
|
1192 |
utf_value = '?'; |
|
1193 |
break; |
|
1194 |
}
|
|
1195 |
if (utf_value > 0x0000FFFF) |
|
1196 |
ch = '?'; |
|
1197 |
else
|
|
1198 |
ch = utf_value; |
|
1199 |
}
|
|
1200 |
break; |
|
1201 |
case CHARSET_TO_CONVERT_8_BIT: |
|
1202 |
if (ch > 127 && ch < 256) |
|
1203 |
ch = this->conversion_table[((unsigned char) ch) - 128]; |
|
1204 |
break; |
|
1205 |
}
|
|
1206 |
||
1207 |
op = entity_translator(ch); |
|
1208 |
||
1209 |
// Determine the characters set:
|
|
1210 |
switch (op & XML_OP_1_MASK) { |
|
1211 |
case XML_OP_1_START_TAG: |
|
1212 |
if (strcmp(this->pr_tag, "?xml") == 0) |
|
1213 |
this->ip = true; |
|
1214 |
else
|
|
1215 |
this->ip = false; |
|
1216 |
break; |
|
1217 |
case XML_OP_1_ADD_ATTR: |
|
1218 |
if (this->ip) { |
|
1219 |
if (strcasecmp(this->pr_name, "encoding") == 0) { |
|
1220 |
strcpy(this->charset, this->pr_value); |
|
1644.3.1
by Barry.Leslie at PrimeBase
Merged in changes from PBMS project. |
1221 |
if (STRCASESTR(this->charset, "utf-8")) |
1548.2.11
by Barry.Leslie at PrimeBase
Removed libxml reqirement by using a home grown xml parser. |
1222 |
this->charset_type = CHARSET_UTF_8; |
1644.3.1
by Barry.Leslie at PrimeBase
Merged in changes from PBMS project. |
1223 |
else if (STRCASESTR(this->charset, "ucs-2") || |
1224 |
STRCASESTR(this->charset, "ucs-4") || |
|
1225 |
STRCASESTR(this->charset, "unicode")) |
|
1548.2.11
by Barry.Leslie at PrimeBase
Removed libxml reqirement by using a home grown xml parser. |
1226 |
this->charset_type = CHARSET_STANDARD; |
1227 |
else { |
|
1228 |
this->charset_type = CHARSET_TO_CONVERT_8_BIT; |
|
1229 |
buildConversionTable(); |
|
1230 |
}
|
|
1231 |
}
|
|
1232 |
}
|
|
1233 |
break; |
|
1234 |
}
|
|
1235 |
return(op); |
|
1236 |
}
|
|
1237 |
||
1238 |
void CSXMLProcessor::appendWCharToString(char *dstr, size_t *dlen, size_t dsize, wchar_t *schars, size_t slen) |
|
1239 |
{
|
|
1240 |
for (size_t i=0; i < slen; i++) { |
|
1241 |
if (*dlen < dsize-1) { |
|
1242 |
if (*schars > 127) |
|
1243 |
dstr[*dlen] = '~'; |
|
1244 |
else
|
|
1245 |
dstr[*dlen] = (char)*schars; |
|
1246 |
(*dlen)++; |
|
1247 |
schars++; |
|
1248 |
dstr[*dlen] = 0; |
|
1249 |
}
|
|
1250 |
}
|
|
1251 |
}
|
|
1252 |
||
1253 |
int32_t CSXMLProcessor::processChar(wchar_t ch) |
|
1254 |
{
|
|
1255 |
int32_t op; |
|
1256 |
||
1257 |
op = charset_transformer(ch); |
|
1258 |
||
1259 |
/*
|
|
1260 |
* Capture output tag and attribute data.
|
|
1261 |
* This must be done at the highest level, after
|
|
1262 |
* parsing.
|
|
1263 |
*/
|
|
1264 |
switch (op & XML_DATA_MASK) { |
|
1265 |
case XML_DATA_TAG: |
|
1266 |
appendWCharToString(this->pr_tag, &this->tlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen()); |
|
1267 |
break; |
|
1268 |
case XML_DATA_ATTR: |
|
1269 |
appendWCharToString(this->pr_name, &this->nlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen()); |
|
1270 |
break; |
|
1271 |
case XML_DATA_VALUE: |
|
1272 |
appendWCharToString(this->pr_value, &this->vlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen()); |
|
1273 |
break; |
|
1274 |
}
|
|
1275 |
return(op); |
|
1276 |
}
|
|
1277 |
||
1278 |
bool CSXMLProcessor::getError(int32_t *err, char **msg) |
|
1279 |
{
|
|
1280 |
*err = err_no; |
|
1281 |
*msg = err_message; |
|
1282 |
return err_no != 0; |
|
1283 |
}
|
|
1284 |
||
1285 |
void CSXMLProcessor::setError(int32_t err, char *msg) |
|
1286 |
{
|
|
1287 |
err_no = err; |
|
1288 |
if (msg) { |
|
1289 |
strncpy(err_message, msg, CS_XML_ERR_MSG_SIZE); |
|
1290 |
err_message[CS_XML_ERR_MSG_SIZE-1] = 0; |
|
1291 |
return; |
|
1292 |
}
|
|
1293 |
||
1294 |
switch (err) { |
|
1295 |
case CS_XML_ERR_OUT_OF_MEMORY: |
|
1296 |
sprintf(err_message, "AES parse error- insufficient memory"); |
|
1297 |
break; |
|
1298 |
case CS_XML_ERR_CHAR_TOO_LARGE: |
|
1299 |
sprintf(err_message, "AES parse error- UNICODE character too large to be encoded as UTF-8"); |
|
1300 |
break; |
|
1301 |
default: |
|
1302 |
sprintf(err_message, "AES parse error- %s", strerror(err)); |
|
1303 |
break; |
|
1304 |
}
|
|
1305 |
}
|
|
1306 |
||
1307 |
void CSXMLProcessor::printError(char *prefix) |
|
1308 |
{
|
|
1309 |
printf("%s%s", prefix, err_message); |
|
1310 |
}
|
|
1311 |
||
1312 |
/* ------------------------------------------------------------------- */
|
|
1313 |
/* CSXMLString */
|
|
1314 |
||
1315 |
#ifdef DEBUG_ALL
|
|
1316 |
#define EXTRA_SIZE 2
|
|
1317 |
#else
|
|
1318 |
#define EXTRA_SIZE 100
|
|
1319 |
#endif
|
|
1320 |
||
1321 |
bool CSXMLString::addChar(char ch, CSXMLProcessor *xml) |
|
1322 |
{
|
|
1323 |
char *ptr; |
|
1324 |
||
1325 |
if (stringLen + 2 > stringSize) { |
|
1326 |
if (!(ptr = (char *) realloc(stringPtr, stringLen + 2 + EXTRA_SIZE))) { |
|
1327 |
xml->setError(CS_XML_ERR_OUT_OF_MEMORY, NULL); |
|
1328 |
return false; |
|
1329 |
}
|
|
1330 |
stringPtr = ptr; |
|
1331 |
stringSize = stringLen + 2 + EXTRA_SIZE; |
|
1332 |
}
|
|
1333 |
stringPtr[stringLen] = ch; |
|
1334 |
stringPtr[stringLen+1] = 0; |
|
1335 |
stringLen++; |
|
1336 |
return true; |
|
1337 |
}
|
|
1338 |
||
1339 |
bool CSXMLString::addChars(size_t size, wchar_t *buffer, bool to_lower, CSXMLProcessor *xml) |
|
1340 |
{
|
|
1341 |
size_t i; |
|
1342 |
uint32_t uni_char; |
|
1343 |
int32_t shift; |
|
1344 |
||
1345 |
for (i=0; i<size; i++) { |
|
1346 |
uni_char = (uint32_t) buffer[i]; |
|
1347 |
||
1348 |
/* Convertion to lower only done for ASCII! */
|
|
1349 |
if (to_lower && uni_char <= 127) |
|
1350 |
uni_char = (uint32_t) tolower((int32_t) uni_char); |
|
1351 |
||
1352 |
// Convert to UTF-8!
|
|
1353 |
if (uni_char <= 0x0000007F) { |
|
1354 |
if (!addChar((char) uni_char, xml)) |
|
1355 |
return false; |
|
1356 |
shift = -6; |
|
1357 |
}
|
|
1358 |
else if (uni_char <= 0x000007FF) { |
|
1359 |
if (!addChar((char) ((0x000000C0) | ((uni_char >> 6) & 0x0000001F)), xml)) |
|
1360 |
return false; |
|
1361 |
shift = 0; |
|
1362 |
}
|
|
1363 |
else if (uni_char <= 0x00000FFFF) { |
|
1364 |
if (!addChar((char) ((0x000000E0) | ((uni_char >> 12) & 0x0000000F)), xml)) |
|
1365 |
return false; |
|
1366 |
shift = 6; |
|
1367 |
}
|
|
1368 |
else if (uni_char <= 0x001FFFFF) { |
|
1369 |
if (!addChar((char) ((0x000000F0) | ((uni_char >> 18) & 0x00000007)), xml)) |
|
1370 |
return false; |
|
1371 |
shift = 12; |
|
1372 |
}
|
|
1373 |
else if (uni_char <= 0x003FFFFFF) { |
|
1374 |
if (!addChar((char) ((0x000000F0) | ((uni_char >> 24) & 0x00000003)), xml)) |
|
1375 |
return false; |
|
1376 |
shift = 18; |
|
1377 |
}
|
|
1378 |
else if (uni_char <= 0x07FFFFFFF) { |
|
1379 |
if (!addChar((char) ((0x000000F0) | ((uni_char >> 30) & 0x00000001)), xml)) |
|
1380 |
return false; |
|
1381 |
shift = 24; |
|
1382 |
}
|
|
1383 |
else { |
|
1384 |
xml->setError(CS_XML_ERR_CHAR_TOO_LARGE, NULL); |
|
1385 |
return false; |
|
1386 |
}
|
|
1387 |
||
1388 |
while (shift >= 0) { |
|
1389 |
if (!addChar((char) ((0x00000080) | ((uni_char >> shift) & 0x0000003F)), xml)) |
|
1390 |
return false; |
|
1391 |
shift -= 6; |
|
1392 |
}
|
|
1393 |
}
|
|
1394 |
return true; |
|
1395 |
}
|
|
1396 |
||
1397 |
bool CSXMLString::addString(const char *string, CSXMLProcessor *xml) |
|
1398 |
{
|
|
1399 |
bool ok = true; |
|
1400 |
||
1401 |
while (*string && ok) { |
|
1402 |
ok = addChar(*string, xml); |
|
1403 |
string++; |
|
1404 |
}
|
|
1405 |
return ok; |
|
1406 |
}
|
|
1407 |
||
1408 |
void CSXMLString::setEmpty() |
|
1409 |
{
|
|
1410 |
stringLen = 0; |
|
1411 |
if (stringPtr) |
|
1412 |
*stringPtr = 0; |
|
1413 |
}
|
|
1414 |
||
1415 |
void CSXMLString::setNull() |
|
1416 |
{
|
|
1417 |
if (stringPtr) |
|
1418 |
free(stringPtr); |
|
1419 |
stringPtr = NULL; |
|
1420 |
stringLen = 0; |
|
1421 |
stringSize = 0; |
|
1422 |
}
|
|
1423 |
||
1424 |
char *CSXMLString::lastComponent() |
|
1425 |
{
|
|
1426 |
char *ptr; |
|
1427 |
||
1428 |
if (stringLen == 0) |
|
1429 |
return NULL; |
|
1430 |
||
1431 |
ptr = stringPtr + stringLen - 1; |
|
1432 |
while (ptr > stringPtr && *ptr != '/') |
|
1433 |
ptr--; |
|
1434 |
return ptr; |
|
1435 |
}
|
|
1436 |
||
1437 |
/* We assume comp begins with a '/' */
|
|
1438 |
char *CSXMLString::findTrailingComponent(const char *comp) |
|
1439 |
{
|
|
1440 |
char *ptr, *last_slash; |
|
1441 |
||
1442 |
if (stringLen == 0) |
|
1443 |
return NULL; |
|
1444 |
||
1445 |
ptr = stringPtr + stringLen - 1; |
|
1446 |
last_slash = NULL; |
|
1447 |
||
1448 |
do { |
|
1449 |
/* Find the next '/' */
|
|
1450 |
while (ptr > stringPtr && *ptr != '/') |
|
1451 |
ptr--; |
|
1452 |
if (last_slash) |
|
1453 |
*last_slash = 0; |
|
1454 |
if (strcmp(ptr, comp) == 0) { |
|
1455 |
if (last_slash) |
|
1456 |
*last_slash = '/'; |
|
1457 |
return ptr; |
|
1458 |
}
|
|
1459 |
if (last_slash) |
|
1460 |
*last_slash = '/'; |
|
1461 |
last_slash = ptr; |
|
1462 |
ptr--; |
|
1463 |
}
|
|
1464 |
while (ptr > stringPtr); |
|
1465 |
return NULL; |
|
1466 |
}
|
|
1467 |
||
1468 |
void CSXMLString::truncate(char *ptr) |
|
1469 |
{
|
|
1470 |
*ptr = 0; |
|
1471 |
stringLen = ptr - stringPtr; |
|
1472 |
}
|
|
1473 |
||
1474 |
/* ------------------------------------------------------------------- */
|
|
1475 |
/* CSXML */
|
|
1476 |
||
1477 |
#define IS_XML_CDATA 0
|
|
1478 |
#define IS_XML_CDATA_TAG 1
|
|
1479 |
#define IS_XML_TAG 2
|
|
1480 |
#define IS_XML_CLOSE_TAG 3
|
|
1481 |
#define IS_XML_COMMENT 4
|
|
1482 |
#define IS_XML_DTD 5
|
|
1483 |
#define IS_XML_PI 6
|
|
1484 |
#define IS_XML_PI_XML 7
|
|
1485 |
#define IS_XML_IN_EX 8
|
|
1486 |
#define IS_XML_OPEN_BRACKET 9
|
|
1487 |
#define IS_XML_CLOSE_BRACKET 10
|
|
1488 |
||
1489 |
int32_t CSXML::nodeType(char *name) |
|
1490 |
{
|
|
1491 |
if (name) { |
|
1492 |
switch (*name) { |
|
1493 |
case 0: |
|
1494 |
return IS_XML_CDATA; |
|
1495 |
case '[': |
|
1496 |
if (strlen(name) == 1) |
|
1497 |
return IS_XML_OPEN_BRACKET; |
|
1498 |
break; |
|
1499 |
case ']': |
|
1500 |
if (strlen(name) == 1) |
|
1501 |
return IS_XML_CLOSE_BRACKET; |
|
1502 |
break; |
|
1503 |
case '/': |
|
1504 |
return IS_XML_CLOSE_TAG; |
|
1505 |
case '!': |
|
1506 |
if (strlen(name) > 1) { |
|
1507 |
if (strcasecmp(name, "!--") == 0) |
|
1508 |
return IS_XML_COMMENT; |
|
1509 |
if (name[1] == '[') { |
|
1510 |
if (strcasecmp(name, "![CDATA[") == 0) |
|
1511 |
return IS_XML_CDATA_TAG; |
|
1512 |
return IS_XML_IN_EX; |
|
1513 |
}
|
|
1514 |
}
|
|
1515 |
return IS_XML_DTD; |
|
1516 |
case '?': |
|
1517 |
if (strcasecmp(name, "?xml") == 0) |
|
1518 |
return IS_XML_PI_XML; |
|
1519 |
return IS_XML_PI; |
|
1520 |
}
|
|
1521 |
return IS_XML_TAG; |
|
1522 |
}
|
|
1523 |
return IS_XML_CDATA; |
|
1524 |
}
|
|
1525 |
||
1526 |
bool CSXML::internalCloseNode(const char *name, bool single) |
|
1527 |
{
|
|
1528 |
bool ok = true; |
|
1529 |
char *ptr; |
|
1530 |
||
1531 |
if (single) { |
|
1532 |
if ((ptr = xml_path.lastComponent())) { |
|
1533 |
ok = closeNode(xml_path.stringPtr); |
|
1534 |
xml_path.truncate(ptr); |
|
1535 |
}
|
|
1536 |
}
|
|
1537 |
else if ((ptr = xml_path.findTrailingComponent(name))) { |
|
1538 |
/* Close the node that is named above. If the XML is
|
|
1539 |
* correct, then the node should be at the top of the
|
|
1540 |
* node stack (last element of the path).
|
|
1541 |
*
|
|
1542 |
* If not found, "ignore" the close.
|
|
1543 |
*
|
|
1544 |
* If not found on the top of the node stack, then
|
|
1545 |
* we close serveral nodes.
|
|
1546 |
*/
|
|
1547 |
for (;;) { |
|
1548 |
if (!(ptr = xml_path.lastComponent())) |
|
1549 |
break; |
|
1550 |
if (!(ok = closeNode(xml_path.stringPtr))) |
|
1551 |
break; |
|
1552 |
if (strcmp(ptr, name) == 0) { |
|
1553 |
xml_path.truncate(ptr); |
|
1554 |
break; |
|
1555 |
}
|
|
1556 |
xml_path.truncate(ptr); |
|
1557 |
}
|
|
1558 |
}
|
|
1559 |
return ok; |
|
1560 |
}
|
|
1561 |
||
1562 |
bool CSXML::internalOpenNode(const char *name) |
|
1563 |
{
|
|
1564 |
bool ok; |
|
1565 |
||
1566 |
ok = xml_path.addString("/", this); |
|
1567 |
if (!ok) |
|
1568 |
return ok; |
|
1569 |
ok = xml_path.addString(name, this); |
|
1570 |
if (!ok) |
|
1571 |
return ok; |
|
1572 |
return openNode(this->xml_path.stringPtr, this->xml_value.stringPtr); |
|
1573 |
}
|
|
1574 |
||
1575 |
bool CSXML::parseXML(int32_t my_flags) |
|
1576 |
{
|
|
1577 |
wchar_t ch; |
|
1578 |
bool ok = true; |
|
1579 |
int32_t op; |
|
1580 |
int32_t tagtype; |
|
1581 |
||
1582 |
this->flags = my_flags; |
|
1583 |
ok = xml_path.addChars(0, NULL, false, this); |
|
1584 |
if (!ok) |
|
1585 |
goto exit; |
|
1586 |
ok = xml_name.addChars(0, NULL, false, this); |
|
1587 |
if (!ok) |
|
1588 |
goto exit; |
|
1589 |
ok = xml_value.addChars(0, NULL, false, this); |
|
1590 |
if (!ok) |
|
1591 |
goto exit; |
|
1592 |
||
1593 |
ok = getChar(&ch); |
|
1594 |
while (ch != CS_XML_EOF_CHAR && ok) { |
|
1595 |
op = processChar(ch); |
|
1596 |
switch (op & XML_OP_1_MASK) { |
|
1597 |
case XML_OP_1_NOOP: |
|
1598 |
break; |
|
1599 |
case XML_OP_1_END_TAG: |
|
1600 |
break; |
|
1601 |
case XML_OP_1_END_CLOSE_TAG: |
|
1602 |
break; |
|
1603 |
case XML_OP_1_END_EMPTY_TAG: |
|
1604 |
ok = internalCloseNode("/>", true); |
|
1605 |
break; |
|
1606 |
case XML_OP_1_END_PI_TAG: |
|
1607 |
ok = internalCloseNode("?>", true); |
|
1608 |
break; |
|
1609 |
case XML_OP_1_END_ENTITY_TAG: |
|
1610 |
ok = internalCloseNode(">", true); |
|
1611 |
break; |
|
1612 |
case XML_OP_1_END_BRACKET_TAG: |
|
1613 |
ok = internalCloseNode("]>", true); |
|
1614 |
break; |
|
1615 |
case XML_OP_1_END_UNKNOWN_TAG: |
|
1616 |
ok = internalCloseNode(">", true); |
|
1617 |
break; |
|
1618 |
case XML_OP_1_START_CDATA_TAG: |
|
1619 |
break; |
|
1620 |
case XML_OP_1_START_COMMENT: |
|
1621 |
break; |
|
1622 |
case XML_OP_1_START_TAG: |
|
1623 |
if (nodeType(xml_name.stringPtr) == IS_XML_CLOSE_TAG) |
|
1624 |
ok = internalCloseNode(xml_name.stringPtr, false); |
|
1625 |
else
|
|
1626 |
ok = internalOpenNode(xml_name.stringPtr); |
|
1627 |
xml_name.setEmpty(); |
|
1628 |
xml_value.setEmpty(); |
|
1629 |
break; |
|
1630 |
case XML_OP_1_ADD_ATTR: |
|
1631 |
tagtype = nodeType(xml_name.stringPtr); |
|
1632 |
if (tagtype != IS_XML_OPEN_BRACKET && tagtype != IS_XML_CLOSE_BRACKET) |
|
1633 |
ok = addAttribute(xml_path.stringPtr, xml_name.stringPtr, xml_value.stringPtr); |
|
1634 |
xml_name.setEmpty(); |
|
1635 |
xml_value.setEmpty(); |
|
1636 |
break; |
|
1637 |
case XML_OP_1_END_CDATA: |
|
1638 |
if (xml_value.stringLen || (my_flags & XML_KEEP_EMPTY_CDATA)) { |
|
1639 |
ok = internalOpenNode(""); |
|
1640 |
xml_name.setEmpty(); |
|
1641 |
xml_value.setEmpty(); |
|
1642 |
ok = internalCloseNode("", true); |
|
1643 |
}
|
|
1644 |
break; |
|
1645 |
case XML_OP_1_END_CDATA_TAG: |
|
1646 |
ok = internalOpenNode("![CDATA["); |
|
1647 |
xml_name.setEmpty(); |
|
1648 |
xml_value.setEmpty(); |
|
1649 |
if (ok) |
|
1650 |
ok = internalCloseNode("]]>", true); |
|
1651 |
break; |
|
1652 |
case XML_OP_1_END_COMMENT: |
|
1653 |
ok = internalOpenNode("!--"); |
|
1654 |
xml_name.setEmpty(); |
|
1655 |
xml_value.setEmpty(); |
|
1656 |
if (ok) |
|
1657 |
ok = internalCloseNode("-->", true); |
|
1658 |
break; |
|
1659 |
}
|
|
1660 |
if (!ok) |
|
1661 |
break; |
|
1662 |
switch (op & XML_DATA_MASK) { |
|
1663 |
case XML_DATA_TAG: |
|
1664 |
case XML_DATA_ATTR: |
|
1665 |
ok = xml_name.addChars(getDataLen(), getDataPtr(), true, this); |
|
1666 |
break; |
|
1667 |
case XML_DATA_CDATA: |
|
1668 |
case XML_DATA_CDATA_TAG: |
|
1669 |
case XML_COMMENT: |
|
1670 |
case XML_DATA_VALUE: |
|
1671 |
ok = xml_value.addChars(getDataLen(), getDataPtr(), false, this); |
|
1672 |
break; |
|
1673 |
}
|
|
1674 |
if (!ok) |
|
1675 |
break; |
|
1676 |
switch (op & XML_OP_2_MASK) { |
|
1677 |
case XML_OP_2_NOOP: |
|
1678 |
break; |
|
1679 |
case XML_OP_2_END_TAG: |
|
1680 |
break; |
|
1681 |
case XML_OP_2_END_CLOSE_TAG: |
|
1682 |
break; |
|
1683 |
case XML_OP_2_END_EMPTY_TAG: |
|
1684 |
ok = internalCloseNode("/>", true); |
|
1685 |
break; |
|
1686 |
case XML_OP_2_END_PI_TAG: |
|
1687 |
ok = internalCloseNode("?>", true); |
|
1688 |
break; |
|
1689 |
case XML_OP_2_END_ENTITY_TAG: |
|
1690 |
ok = internalCloseNode(">", true); |
|
1691 |
break; |
|
1692 |
case XML_OP_2_END_BRACKET_TAG: |
|
1693 |
ok = internalCloseNode("]>", true); |
|
1694 |
break; |
|
1695 |
case XML_OP_2_END_UNKNOWN_TAG: |
|
1696 |
ok = internalCloseNode(">", true); |
|
1697 |
break; |
|
1698 |
case XML_OP_2_START_CDATA_TAG: |
|
1699 |
break; |
|
1700 |
case XML_OP_2_START_COMMENT: |
|
1701 |
break; |
|
1702 |
}
|
|
1703 |
ok = getChar(&ch); |
|
1704 |
}
|
|
1705 |
||
1706 |
exit: |
|
1707 |
xml_path.setNull(); |
|
1708 |
xml_name.setNull(); |
|
1709 |
xml_value.setNull(); |
|
1710 |
return ok; |
|
1711 |
}
|
|
1712 |
||
1713 |
/* ------------------------------------------------------------------- */
|
|
1714 |
/* CSXMLPrint */
|
|
1715 |
||
1716 |
bool CSXMLPrint::openNode(char *path, char *value) |
|
1717 |
{
|
|
1718 |
printf("OPEN %s\n", path); |
|
1719 |
if (value && *value) |
|
1720 |
printf(" %s\n", value); |
|
1721 |
return true; |
|
1722 |
}
|
|
1723 |
||
1724 |
bool CSXMLPrint::closeNode(char *path) |
|
1725 |
{
|
|
1726 |
printf("close %s\n", path); |
|
1727 |
return true; |
|
1728 |
}
|
|
1729 |
||
1730 |
bool CSXMLPrint::addAttribute(char *path, char *name, char *value) |
|
1731 |
{
|
|
1732 |
if (value) |
|
1733 |
printf("attr %s %s=%s\n", path, name, value); |
|
1734 |
else
|
|
1735 |
printf("attr %s %s\n", path, name); |
|
1736 |
return true; |
|
1737 |
}
|
|
1738 |
||
1739 |
/* ------------------------------------------------------------------- */
|
|
1740 |
/* CSXMLBuffer */
|
|
1741 |
||
1742 |
bool CSXMLBuffer::parseString(const char *data, int32_t my_flags) |
|
1743 |
{
|
|
1744 |
charData = data; |
|
1745 |
dataLen = strlen(data); |
|
1746 |
dataPos = 0; |
|
1747 |
return parseXML(my_flags); |
|
1748 |
}
|
|
1749 |
||
1750 |
bool CSXMLBuffer::parseData(const char *data, size_t len, int32_t my_flags) |
|
1751 |
{
|
|
1752 |
charData = data; |
|
1753 |
dataLen = len; |
|
1754 |
dataPos = 0; |
|
1755 |
return parseXML(my_flags); |
|
1756 |
}
|
|
1757 |
||
1758 |
bool CSXMLBuffer::getChar(wchar_t *ch) |
|
1759 |
{
|
|
1760 |
if (dataPos == dataLen) |
|
1761 |
*ch = CS_XML_EOF_CHAR; |
|
1762 |
else { |
|
1763 |
*ch = (wchar_t) (unsigned char) charData[dataPos]; |
|
1764 |
dataPos++; |
|
1765 |
}
|
|
1766 |
return true; |
|
1767 |
}
|
|
1768 |
||
1769 |
/* ------------------------------------------------------------------- */
|
|
1770 |
/* CSXMLFile */
|
|
1771 |
||
1772 |
bool CSXMLFile::parseFile(char *file_name, int32_t my_flags) |
|
1773 |
{
|
|
1774 |
bool ok; |
|
1775 |
||
1776 |
if (!(this->file = fopen(file_name, "r"))) { |
|
1777 |
setError(errno, NULL); |
|
1778 |
return false; |
|
1779 |
}
|
|
1780 |
ok = parseXML(my_flags); |
|
1781 |
fclose(this->file); |
|
1782 |
return ok; |
|
1783 |
}
|
|
1784 |
||
1785 |
bool CSXMLFile::getChar(wchar_t *ch) |
|
1786 |
{
|
|
1787 |
int32_t next_ch; |
|
1788 |
||
1789 |
next_ch = fgetc(file); |
|
1790 |
if (next_ch == EOF) { |
|
1791 |
if (ferror(file)) { |
|
1792 |
setError(errno, NULL); |
|
1793 |
return false; |
|
1794 |
}
|
|
1795 |
*ch = CS_XML_EOF_CHAR; |
|
1796 |
}
|
|
1797 |
else
|
|
1798 |
*ch = (wchar_t) next_ch; |
|
1799 |
return true; |
|
1800 |
}
|
|
1801 |
||
1802 |