1
/* Copyright (c) 2010 PrimeBase Technologies GmbH, Germany
3
* PrimeBase Media Stream for MySQL
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* Paul McCullagh (H&G2JCtL)
38
#include <boost/algorithm/string.hpp>
42
#define ISSPACE(ch) (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
43
#define ISSINGLE(ch) (ch == '*' || ch == '+' || ch == '(' || ch == ')' || ch == ',' || ch == '|' || ch == '[' || ch == ']' || ch == '?' || ch == '/')
45
#define SET_CHAR(x, ch) { x->buffer[0] = ch; x->count = 1; }
46
#define ADD_CHAR(x, ch) { if (x->count < PARSE_BUFFER_SIZE) { x->buffer[x->count] = ch; x->count++; } else x->buffer[PARSE_BUFFER_SIZE-1] = ch; }
48
bool CSXMLParser::match_string(const char *ch)
52
for (i=0; i<this->count; i++) {
53
if (this->buffer[i] != *ch)
59
return(i == this->count);
62
void CSXMLParser::increment_nesting(wchar_t ch)
64
if (this->nesting < PARSE_STACK_SIZE) {
67
this->end_type[this->nesting] = XML_OP_1_END_CLOSE_TAG;
70
this->end_type[this->nesting] = XML_OP_1_END_PI_TAG;
73
this->end_type[this->nesting] = XML_OP_1_END_ENTITY_TAG;
76
this->end_type[this->nesting] = XML_OP_1_END_BRACKET_TAG;
80
this->end_type[this->nesting] = XML_OP_1_END_UNKNOWN_TAG;
82
this->end_type[this->nesting] = XML_OP_1_END_TAG;
89
int32_t CSXMLParser::parseChar(wchar_t ch)
90
/* This function does the actual work of parsing. It is expects
91
* "complete" characters as input. This could be 4 byte characters
92
* as long as it is able to recognize the characters that are
93
* relevant to parsing.
94
* The function outputs processing instructions, and indicates
95
* how the output data is to be understood.
98
switch (this->state) {
99
case XML_BEFORE_CDATA:
101
/* This is the initial state! */
103
this->state = XML_LT;
104
this->type = XML_noop;
107
this->state = XML_IN_CDATA;
108
this->type = XML_CDATA_CH;
114
this->state = XML_LT;
115
this->type = XML_noop;
118
this->type = XML_CDATA_CH;
124
this->state = XML_BEFORE_ATTR;
125
if (this->step == XML_STEP_TAG)
126
this->type = XML_start_tag_TAG_CH;
127
else if (this->step == XML_STEP_NESTED)
128
this->type = XML_TAG_CH;
129
else if (this->step == XML_STEP_NONE)
130
this->type = XML_end_cdata_TAG_CH;
132
this->type = XML_add_attr_TAG_CH;
133
this->step = XML_STEP_TAG;
134
increment_nesting(ch);
138
this->state = XML_IN_CDATA;
139
this->type = XML_CDATA_CH;
143
else if (ch == '!') {
144
this->state = XML_LT_BANG;
145
this->type = XML_noop;
149
this->state = XML_IN_TAG_NAME;
150
if (this->step == XML_STEP_TAG)
151
this->type = XML_start_tag_TAG_CH;
152
else if (this->step == XML_STEP_NESTED)
153
this->type = XML_TAG_CH;
154
else if (this->step == XML_STEP_NONE)
155
this->type = XML_end_cdata_TAG_CH;
157
this->type = XML_add_attr_TAG_CH;
158
this->step = XML_STEP_TAG;
159
increment_nesting(ch);
165
this->state = XML_LT_BANG_DASH;
166
this->type = XML_noop;
168
else if (ch == '[') {
169
this->state = XML_LT_BANG_SQR;
170
this->type = XML_noop;
173
this->state = XML_IN_TAG_NAME;
174
if (this->step == XML_STEP_TAG)
175
this->type = XML_start_tag_TAG_CH;
176
else if (this->step == XML_STEP_NESTED)
177
this->type = XML_TAG_CH;
178
else if (this->step == XML_STEP_NONE)
179
this->type = XML_end_cdata_TAG_CH;
181
this->type = XML_add_attr_TAG_CH;
182
this->step = XML_STEP_TAG;
183
increment_nesting('!');
188
case XML_LT_BANG_DASH:
190
this->state = XML_IN_COMMENT;
191
if (this->step == XML_STEP_TAG)
192
this->type = XML_start_tag_start_comment;
193
else if (this->step == XML_STEP_NESTED)
194
this->type = XML_start_comment;
195
else if (this->step == XML_STEP_NONE)
196
this->type = XML_end_cdata_start_comment;
198
this->type = XML_add_attr_start_comment;
199
increment_nesting(' ');
202
this->state = XML_IN_CDATA;
203
this->type = XML_CDATA_CH;
207
case XML_LT_BANG_SQR:
209
this->type = XML_noop;
210
else if (ch == '[') {
211
this->state = XML_BEFORE_ATTR;
212
if (this->step == XML_STEP_TAG)
213
this->type = XML_start_tag_TAG_CH;
214
else if (this->step == XML_STEP_NESTED)
215
this->type = XML_TAG_CH;
216
else if (this->step == XML_STEP_NONE)
217
this->type = XML_end_cdata_TAG_CH;
219
this->type = XML_add_attr_TAG_CH;
220
this->step = XML_STEP_TAG;
221
increment_nesting('[');
226
this->state = XML_LT_BANG_SQR_IN_NAME;
227
this->type = XML_noop;
233
case XML_LT_BANG_SQR_IN_NAME:
235
this->state = XML_LT_BANG_SQR_AFTER_NAME;
236
this->type = XML_noop;
238
else if (ch == '[') {
239
if (match_string("![CDATA")) {
240
this->state = XML_IN_CDATA_TAG;
241
if (this->step == XML_STEP_TAG)
242
this->type = XML_start_tag_start_cdata_tag;
243
else if (this->step == XML_STEP_NESTED)
244
this->type = XML_start_cdata_tag;
245
else if (this->step == XML_STEP_NONE)
246
this->type = XML_end_cdata_start_cdata_tag;
248
this->type = XML_add_attr_start_cdata_tag;
249
this->step = XML_STEP_TAG;
250
increment_nesting('[');
253
this->state = XML_BEFORE_ATTR;
254
if (this->step == XML_STEP_TAG)
255
this->type = XML_start_tag_TAG_CH;
256
else if (this->step == XML_STEP_NESTED)
257
this->type = XML_TAG_CH;
258
else if (this->step == XML_STEP_NONE)
259
this->type = XML_end_cdata_TAG_CH;
261
this->type = XML_add_attr_TAG_CH;
262
this->step = XML_STEP_TAG;
263
increment_nesting('[');
267
this->type = XML_noop;
271
case XML_LT_BANG_SQR_AFTER_NAME:
273
if (match_string("![CDATA")) {
274
this->state = XML_IN_CDATA_TAG;
275
if (this->step == XML_STEP_TAG)
276
this->type = XML_start_tag_start_cdata_tag;
277
else if (this->step == XML_STEP_NESTED)
278
this->type = XML_start_cdata_tag;
279
else if (this->step == XML_STEP_NONE)
280
this->type = XML_end_cdata_start_cdata_tag;
282
this->type = XML_add_attr_start_cdata_tag;
283
increment_nesting('[');
286
this->state = XML_BEFORE_ATTR;
287
if (this->step == XML_STEP_TAG)
288
this->type = XML_start_tag_TAG_CH;
289
else if (this->step == XML_STEP_NESTED)
290
this->type = XML_TAG_CH;
291
else if (this->step == XML_STEP_NONE)
292
this->type = XML_end_cdata_TAG_CH;
294
this->type = XML_add_attr_TAG_CH;
295
this->step = XML_STEP_TAG;
296
increment_nesting('[');
300
/* Ignore data until the '['!!! */
301
this->type = XML_noop;
303
case XML_IN_TAG_NAME:
305
this->state = XML_BEFORE_ATTR;
306
this->type = XML_noop;
308
else if (ch == '<') {
309
this->state = XML_LT;
310
this->type = XML_noop;
312
else if (ch == '>') {
313
if (this->step == XML_STEP_TAG)
314
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
315
else if (this->step == XML_STEP_NESTED)
316
this->type = XML_end_tag(END_TAG_TYPE(this));
318
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
321
this->step = XML_STEP_NESTED;
322
this->state = XML_BEFORE_ATTR;
325
this->step = XML_STEP_NONE;
326
this->state = XML_IN_CDATA;
329
else if (ch == '"' || ch == '\'') {
330
this->state = XML_QUOTE_BEFORE_VALUE;
332
this->type = XML_noop;
334
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
335
this->state = XML_SLASH;
336
this->type = XML_noop;
338
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
339
this->state = XML_QMARK;
340
this->type = XML_noop;
342
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
343
this->state = XML_SQR;
344
this->type = XML_noop;
346
else if (ISSINGLE(ch)) {
347
this->state = XML_BEFORE_ATTR;
348
if (this->step == XML_STEP_TAG)
349
this->type = XML_start_tag_ATTR_CH;
350
else if (this->step == XML_STEP_NESTED)
351
this->type = XML_ATTR_CH;
353
this->type = XML_add_attr_ATTR_CH;
354
this->step = XML_STEP_ATTR;
358
this->type = XML_TAG_CH;
362
case XML_BEFORE_ATTR:
364
this->type = XML_noop;
365
else if (ch == '<') {
366
this->state = XML_LT;
367
this->type = XML_noop;
369
else if (ch == '>') {
370
if (this->step == XML_STEP_TAG)
371
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
372
else if (this->step == XML_STEP_NESTED)
373
this->type = XML_end_tag(END_TAG_TYPE(this));
375
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
378
this->step = XML_STEP_NESTED;
379
this->state = XML_BEFORE_ATTR;
382
this->step = XML_STEP_NONE;
383
this->state = XML_IN_CDATA;
386
else if (ch == '"' || ch == '\'') {
387
this->state = XML_QUOTE_BEFORE_VALUE;
389
this->type = XML_noop;
391
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
392
this->state = XML_SLASH;
393
this->type = XML_noop;
395
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
396
this->state = XML_QMARK;
397
this->type = XML_noop;
399
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
400
this->state = XML_SQR;
401
this->type = XML_noop;
403
else if (ISSINGLE(ch)) {
404
if (this->step == XML_STEP_TAG)
405
this->type = XML_start_tag_ATTR_CH;
406
else if (this->step == XML_STEP_NESTED)
407
this->type = XML_ATTR_CH;
409
this->type = XML_add_attr_ATTR_CH;
410
this->step = XML_STEP_ATTR;
414
this->state = XML_IN_ATTR;
415
if (this->step == XML_STEP_TAG)
416
this->type = XML_start_tag_ATTR_CH;
417
else if (this->step == XML_STEP_NESTED)
418
this->type = XML_ATTR_CH;
420
this->type = XML_add_attr_ATTR_CH;
421
this->step = XML_STEP_ATTR;
427
this->state = XML_BEFORE_EQUAL;
428
this->type = XML_noop;
430
else if (ch == '<') {
431
this->state = XML_LT;
432
this->type = XML_noop;
434
else if (ch == '>') {
435
if (this->step == XML_STEP_TAG)
436
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
437
else if (this->step == XML_STEP_NESTED)
438
this->type = XML_end_tag(END_TAG_TYPE(this));
440
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
443
this->step = XML_STEP_NESTED;
444
this->state = XML_BEFORE_ATTR;
447
this->step = XML_STEP_NONE;
448
this->state = XML_IN_CDATA;
451
else if (ch == '"' || ch == '\'') {
452
this->state = XML_QUOTE_BEFORE_VALUE;
454
this->type = XML_noop;
456
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
457
this->state = XML_SLASH;
458
this->type = XML_noop;
460
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
461
this->state = XML_QMARK;
462
this->type = XML_noop;
464
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
465
this->state = XML_SQR;
466
this->type = XML_noop;
468
else if (ISSINGLE(ch)) {
469
this->state = XML_BEFORE_ATTR;
470
if (this->step == XML_STEP_TAG)
471
this->type = XML_start_tag_ATTR_CH;
472
else if (this->step == XML_STEP_NESTED)
473
this->type = XML_ATTR_CH;
475
this->type = XML_add_attr_ATTR_CH;
476
this->step = XML_STEP_ATTR;
479
else if (ch == '=') {
480
this->state = XML_AFTER_EQUAL;
481
this->type = XML_noop;
484
this->type = XML_ATTR_CH;
488
case XML_BEFORE_EQUAL:
490
this->type = XML_noop;
491
else if (ch == '<') {
492
this->state = XML_LT;
493
this->type = XML_noop;
495
else if (ch == '>') {
496
if (this->step == XML_STEP_TAG)
497
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
498
else if (this->step == XML_STEP_NESTED)
499
this->type = XML_end_tag(END_TAG_TYPE(this));
501
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
504
this->step = XML_STEP_NESTED;
505
this->state = XML_BEFORE_ATTR;
508
this->step = XML_STEP_NONE;
509
this->state = XML_IN_CDATA;
512
else if (ch == '"' || ch == '\'') {
513
this->state = XML_QUOTE_BEFORE_VALUE;
515
this->type = XML_noop;
517
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
518
this->state = XML_SLASH;
519
this->type = XML_noop;
521
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
522
this->state = XML_QMARK;
523
this->type = XML_noop;
525
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
526
this->state = XML_SQR;
527
this->type = XML_noop;
529
else if (ISSINGLE(ch)) {
530
this->state = XML_BEFORE_ATTR;
531
if (this->step == XML_STEP_TAG)
532
this->type = XML_start_tag_ATTR_CH;
533
else if (this->step == XML_STEP_NESTED)
534
this->type = XML_ATTR_CH;
536
this->type = XML_add_attr_ATTR_CH;
537
this->step = XML_STEP_ATTR;
540
else if (ch == '=') {
541
this->state = XML_AFTER_EQUAL;
542
this->type = XML_noop;
545
this->state = XML_IN_ATTR;
546
if (this->step == XML_STEP_TAG)
547
this->type = XML_start_tag_ATTR_CH;
548
else if (this->step == XML_STEP_NESTED)
549
this->type = XML_ATTR_CH;
551
this->type = XML_add_attr_ATTR_CH;
552
this->step = XML_STEP_ATTR;
556
case XML_AFTER_EQUAL:
558
this->state = XML_AFTER_EQUAL;
559
this->type = XML_noop;
561
else if (ch == '<') {
562
this->state = XML_LT;
563
this->type = XML_noop;
565
else if (ch == '>') {
566
if (this->step == XML_STEP_TAG)
567
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
568
else if (this->step == XML_STEP_NESTED)
569
this->type = XML_end_tag(END_TAG_TYPE(this));
571
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
574
this->step = XML_STEP_NESTED;
575
this->state = XML_BEFORE_ATTR;
578
this->step = XML_STEP_NONE;
579
this->state = XML_IN_CDATA;
582
else if (ch == '"' || ch == '\'') {
583
this->state = XML_QUOTE_BEFORE_VALUE;
585
this->type = XML_noop;
587
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
588
this->state = XML_SLASH;
589
this->type = XML_noop;
591
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
592
this->state = XML_QMARK;
593
this->type = XML_noop;
595
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
596
this->state = XML_SQR;
597
this->type = XML_noop;
599
else if (ISSINGLE(ch)) {
600
this->state = XML_BEFORE_ATTR;
601
if (this->step == XML_STEP_TAG)
602
this->type = XML_start_tag_ATTR_CH;
603
else if (this->step == XML_STEP_NESTED)
604
this->type = XML_ATTR_CH;
606
this->type = XML_add_attr_ATTR_CH;
607
this->step = XML_STEP_ATTR;
611
this->state = XML_IN_VALUE;
613
if (this->step == XML_STEP_TAG)
614
this->type = XML_start_tag_VALUE_CH;
615
else if (this->step == XML_STEP_VALUE)
616
this->type = XML_add_attr_VALUE_CH;
618
this->type = XML_VALUE_CH;
619
this->step = XML_STEP_VALUE;
623
case XML_QUOTE_BEFORE_VALUE:
624
if (ch == this->quote) {
625
this->state = XML_QUOTE_AFTER_VALUE;
627
if (this->step == XML_STEP_TAG)
628
this->type = XML_start_tag_VALUE_CH;
629
else if (this->step == XML_STEP_VALUE)
630
this->type = XML_add_attr_VALUE_CH;
632
this->type = XML_VALUE_CH;
633
this->step = XML_STEP_VALUE;
637
this->state = XML_IN_VALUE;
638
if (this->step == XML_STEP_TAG)
639
this->type = XML_start_tag_VALUE_CH;
640
else if (this->step == XML_STEP_VALUE)
641
this->type = XML_add_attr_VALUE_CH;
643
this->type = XML_VALUE_CH;
644
this->step = XML_STEP_VALUE;
650
if (ch == this->quote) {
651
this->state = XML_QUOTE_AFTER_VALUE;
652
this->type = XML_noop;
655
this->type = XML_VALUE_CH;
660
/* A value without quotes (for HTML!) */
662
this->state = XML_BEFORE_ATTR;
663
this->type = XML_noop;
665
else if (ch == '<') {
666
this->state = XML_LT;
667
this->type = XML_noop;
669
else if (ch == '>') {
670
if (this->step == XML_STEP_TAG)
671
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
672
else if (this->step == XML_STEP_NESTED)
673
this->type = XML_end_tag(END_TAG_TYPE(this));
675
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
678
this->step = XML_STEP_NESTED;
679
this->state = XML_BEFORE_ATTR;
682
this->step = XML_STEP_NONE;
683
this->state = XML_IN_CDATA;
686
else if (ch == '"' || ch == '\'') {
687
this->state = XML_QUOTE_BEFORE_VALUE;
689
this->type = XML_noop;
692
this->type = XML_VALUE_CH;
697
case XML_QUOTE_AFTER_VALUE:
699
this->state = XML_BEFORE_ATTR;
700
this->type = XML_noop;
702
else if (ch == '<') {
703
this->state = XML_LT;
704
this->type = XML_noop;
706
else if (ch == '>') {
707
if (this->step == XML_STEP_TAG)
708
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
709
else if (this->step == XML_STEP_NESTED)
710
this->type = XML_end_tag(END_TAG_TYPE(this));
712
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
715
this->step = XML_STEP_NESTED;
716
this->state = XML_BEFORE_ATTR;
719
this->step = XML_STEP_NONE;
720
this->state = XML_IN_CDATA;
723
else if (ch == '"' || ch == '\'') {
724
this->state = XML_QUOTE_BEFORE_VALUE;
726
this->type = XML_noop;
728
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
729
this->state = XML_SLASH;
730
this->type = XML_noop;
732
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
733
this->state = XML_QMARK;
734
this->type = XML_noop;
736
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
737
this->state = XML_SQR;
738
this->type = XML_noop;
740
else if (ISSINGLE(ch)) {
741
this->state = XML_BEFORE_ATTR;
742
if (this->step == XML_STEP_TAG)
743
this->type = XML_start_tag_ATTR_CH;
744
else if (this->step == XML_STEP_NESTED)
745
this->type = XML_ATTR_CH;
747
this->type = XML_add_attr_ATTR_CH;
748
this->step = XML_STEP_ATTR;
752
this->state = XML_IN_ATTR;
753
if (this->step == XML_STEP_TAG)
754
this->type = XML_start_tag_ATTR_CH;
755
else if (this->step == XML_STEP_NESTED)
756
this->type = XML_ATTR_CH;
758
this->type = XML_add_attr_ATTR_CH;
759
this->step = XML_STEP_ATTR;
773
this->state = XML_BEFORE_ATTR;
774
if (this->step == XML_STEP_TAG)
775
this->type = XML_start_tag_TAG_CH;
776
else if (this->step == XML_STEP_NESTED)
777
this->type = XML_TAG_CH;
778
else if (this->step == XML_STEP_NONE)
779
this->type = XML_end_cdata_TAG_CH;
781
this->type = XML_add_attr_TAG_CH;
782
this->step = XML_STEP_ATTR;
784
else if (ch == '<') {
785
this->state = XML_LT;
786
if (this->step == XML_STEP_TAG)
787
this->type = XML_start_tag_TAG_CH;
788
else if (this->step == XML_STEP_NESTED)
789
this->type = XML_TAG_CH;
790
else if (this->step == XML_STEP_NONE)
791
this->type = XML_end_cdata_TAG_CH;
793
this->type = XML_add_attr_TAG_CH;
794
this->step = XML_STEP_TAG;
796
else if (ch == '>') {
797
if (this->state == XML_SLASH) {
798
if (this->step == XML_STEP_TAG)
799
this->type = XML_start_tag_end_empty_tag;
800
else if (this->step == XML_STEP_NESTED)
801
this->type = XML_end_empty_tag;
803
this->type = XML_add_attr_end_empty_tag;
805
else if (this->state == XML_SQR) {
806
if (this->step == XML_STEP_TAG)
807
this->type = XML_start_tag_end_tag(XML_OP_1_END_BRACKET_TAG);
808
else if (this->step == XML_STEP_NESTED)
809
this->type = XML_end_tag(XML_OP_1_END_BRACKET_TAG);
811
this->type = XML_add_attr_end_tag(XML_OP_1_END_BRACKET_TAG);
814
if (this->step == XML_STEP_TAG)
815
this->type = XML_start_tag_end_pi_tag;
816
else if (this->step == XML_STEP_NESTED)
817
this->type = XML_end_pi_tag;
819
this->type = XML_add_attr_end_pi_tag;
823
this->step = XML_STEP_NESTED;
824
this->state = XML_BEFORE_ATTR;
827
this->step = XML_STEP_NONE;
828
this->state = XML_IN_CDATA;
831
else if (ch == '"' || ch == '\'') {
832
this->state = XML_QUOTE_BEFORE_VALUE;
834
if (this->step == XML_STEP_TAG)
835
this->type = XML_start_tag_TAG_CH;
836
else if (this->step == XML_STEP_NESTED)
837
this->type = XML_TAG_CH;
838
else if (this->step == XML_STEP_NONE)
839
this->type = XML_end_cdata_TAG_CH;
841
this->type = XML_add_attr_TAG_CH;
842
this->step = XML_STEP_ATTR;
844
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
845
this->state = XML_SLASH;
846
if (this->step == XML_STEP_TAG)
847
this->type = XML_start_tag_TAG_CH;
848
else if (this->step == XML_STEP_NESTED)
849
this->type = XML_TAG_CH;
850
else if (this->step == XML_STEP_NONE)
851
this->type = XML_end_cdata_TAG_CH;
853
this->type = XML_add_attr_TAG_CH;
854
this->step = XML_STEP_ATTR;
856
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
857
this->state = XML_QMARK;
858
if (this->step == XML_STEP_TAG)
859
this->type = XML_start_tag_TAG_CH;
860
else if (this->step == XML_STEP_NESTED)
861
this->type = XML_TAG_CH;
862
else if (this->step == XML_STEP_NONE)
863
this->type = XML_end_cdata_TAG_CH;
865
this->type = XML_add_attr_TAG_CH;
866
this->step = XML_STEP_ATTR;
868
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
869
this->state = XML_SQR;
870
if (this->step == XML_STEP_TAG)
871
this->type = XML_start_tag_TAG_CH;
872
else if (this->step == XML_STEP_NESTED)
873
this->type = XML_TAG_CH;
874
else if (this->step == XML_STEP_NONE)
875
this->type = XML_end_cdata_TAG_CH;
877
this->type = XML_add_attr_TAG_CH;
878
this->step = XML_STEP_ATTR;
880
else if (ISSINGLE(ch)) {
881
this->state = XML_BEFORE_ATTR;
882
if (this->step == XML_STEP_TAG)
883
this->type = XML_start_tag_TAG_CH;
884
else if (this->step == XML_STEP_NESTED)
885
this->type = XML_TAG_CH;
886
else if (this->step == XML_STEP_NONE)
887
this->type = XML_end_cdata_TAG_CH;
889
this->type = XML_add_attr_TAG_CH;
890
this->step = XML_STEP_ATTR;
894
this->state = XML_IN_ATTR;
895
if (this->step == XML_STEP_TAG)
896
this->type = XML_start_tag_TAG_CH;
897
else if (this->step == XML_STEP_NESTED)
898
this->type = XML_TAG_CH;
899
else if (this->step == XML_STEP_NONE)
900
this->type = XML_end_cdata_TAG_CH;
902
this->type = XML_add_attr_TAG_CH;
903
this->step = XML_STEP_ATTR;
909
this->state = XML_IN_COMMENT_DASH;
910
this->type = XML_noop;
913
this->type = XML_COMMENT_CH;
916
case XML_IN_COMMENT_DASH:
918
this->state = XML_IN_COMMENT_DASH_DASH;
919
this->type = XML_noop;
922
this->state = XML_IN_COMMENT;
923
this->type = XML_COMMENT_CH;
927
case XML_IN_COMMENT_DASH_DASH:
929
this->state = XML_IN_COMMENT_3_DASH;
930
this->type = XML_COMMENT_CH;
933
else if (ch == '>') {
934
this->type = XML_end_comment;
937
this->step = XML_STEP_NESTED;
938
this->state = XML_BEFORE_ATTR;
941
this->step = XML_STEP_NONE;
942
this->state = XML_IN_CDATA;
946
this->state = XML_IN_COMMENT;
947
this->type = XML_COMMENT_CH;
951
case XML_IN_COMMENT_3_DASH:
953
this->type = XML_COMMENT_CH;
956
else if (ch == '>') {
957
this->type = XML_end_comment;
960
this->step = XML_STEP_NESTED;
961
this->state = XML_BEFORE_ATTR;
964
this->step = XML_STEP_NONE;
965
this->state = XML_IN_CDATA;
969
this->state = XML_IN_COMMENT;
970
this->type = XML_COMMENT_CH;
976
case XML_IN_CDATA_TAG:
978
this->state = XML_IN_CDATA_TAG_SQR;
979
this->type = XML_noop;
982
this->type = XML_CDATA_TAG_CH;
985
case XML_IN_CDATA_TAG_SQR:
987
this->state = XML_IN_CDATA_TAG_SQR_SQR;
988
this->type = XML_noop;
991
this->state = XML_IN_CDATA_TAG;
992
this->type = XML_CDATA_TAG_CH;
996
case XML_IN_CDATA_TAG_SQR_SQR:
998
this->state = XML_IN_CDATA_TAG_3_SQR;
999
this->type = XML_CDATA_TAG_CH;
1002
else if (ch == '>') {
1003
this->type = XML_end_cdata_tag;
1005
if (this->nesting) {
1006
this->step = XML_STEP_NESTED;
1007
this->state = XML_BEFORE_ATTR;
1010
this->step = XML_STEP_NONE;
1011
this->state = XML_IN_CDATA;
1015
this->state = XML_IN_CDATA_TAG;
1016
this->type = XML_CDATA_TAG_CH;
1020
case XML_IN_CDATA_TAG_3_SQR:
1022
this->type = XML_CDATA_TAG_CH;
1025
else if (ch == '>') {
1026
this->type = XML_end_cdata_tag;
1028
if (this->nesting) {
1029
this->step = XML_STEP_NESTED;
1030
this->state = XML_BEFORE_ATTR;
1033
this->step = XML_STEP_NONE;
1034
this->state = XML_IN_CDATA;
1038
this->state = XML_IN_CDATA_TAG;
1039
this->type = XML_CDATA_TAG_CH;
1040
SET_CHAR(this, ']');
1041
ADD_CHAR(this, ']');
1049
/* ------------------------------------------------------------------- */
1050
/* CSXMLProcessor */
1052
bool CSXMLProcessor::buildConversionTable()
1056
/* By default we don't know how to convert any charset
1057
* other tha ISO-1 to unicode!
1059
if (strcasecmp(charset, "ISO-8859-1") == 0) {
1060
for (i=0; i<128; i++)
1061
conversion_table[i] = (wchar_t) (i + 128);
1064
for (i=0; i<128; i++)
1065
conversion_table[i] = '?';
1070
// Private use are: E000 - F8FF
1072
int32_t CSXMLProcessor::capture_initializer(wchar_t ch)
1073
/* We capture tag and attribute data for the parsing purposes.
1074
* The buffers are initialized here (at the lowest level)
1075
* of processing after parsing.
1081
switch (op & XML_OP_1_MASK) {
1082
case XML_OP_1_START_TAG:
1085
case XML_OP_1_ADD_ATTR:
1093
int32_t CSXMLProcessor::entity_translator(wchar_t ch)
1094
/* This function handles entities.
1095
* Certain entities are translated into UNICODE characters.
1096
* Strictly speaking, these enties are only recognised by HTML.
1097
* The few entities that are recognised by XML are first translated
1098
* into some reserved characters for the parser. This is to ensure
1099
* that the parser does not recognize them as characters with special
1100
* meaning! This includes '&', '<' and '>'.
1105
op = capture_initializer(ch);
1110
* This function translates the input character stream into UNICODE.
1112
int32_t CSXMLProcessor::charset_transformer(wchar_t ch)
1116
// Do transformation according to the charset.
1117
switch (this->charset_type) {
1119
if (ch > 127 && ch < 256) {
1121
uint8_t utf_ch = (uint8_t)ch;
1123
if ((utf_ch & 0xC0) != 0x80)
1124
this->utf8_count = 0;
1125
if ((utf_ch & 0x80) == 0x00)
1126
this->utf8_length = 1;
1127
else if ((utf_ch & 0xE0) == 0xC0)
1128
this->utf8_length = 2;
1129
else if ((utf_ch & 0xF0) == 0xE0)
1130
this->utf8_length = 3;
1131
else if ((utf_ch & 0xF8) == 0xF0)
1132
this->utf8_length = 4;
1133
else if ((utf_ch & 0xFC) == 0xF8)
1134
this->utf8_length = 5;
1135
else if ((utf_ch & 0xFE) == 0xFC)
1136
this->utf8_length = 6;
1137
this->utf8_buffer[this->utf8_count] = (uint32_t) utf_ch;
1139
if (this->utf8_count < this->utf8_length) {
1140
// I need more bytes!
1141
setDataType(XML_noop);
1145
switch (this->utf8_length) {
1147
utf_value = this->utf8_buffer[0] & 0x0000007F;
1150
utf_value = ((this->utf8_buffer[0] & 0x0000001F) << 6) |
1151
(this->utf8_buffer[1] & 0x0000003F);
1152
if (utf_value < 0x00000080)
1156
utf_value = ((this->utf8_buffer[0] & 0x0000000F) << 12) |
1157
((this->utf8_buffer[1] & 0x0000003F) << 6) |
1158
(this->utf8_buffer[2] & 0x0000003F);
1159
if (utf_value < 0x000000800)
1163
utf_value = ((this->utf8_buffer[0] & 0x00000007) << 18) |
1164
((this->utf8_buffer[1] & 0x0000003F) << 12) |
1165
((this->utf8_buffer[2] & 0x0000003F) << 6) |
1166
(this->utf8_buffer[3] & 0x0000003F);
1167
if (utf_value < 0x00010000)
1171
utf_value = ((this->utf8_buffer[0] & 0x00000003) << 24) |
1172
((this->utf8_buffer[1] & 0x0000003F) << 18) |
1173
((this->utf8_buffer[2] & 0x0000003F) << 12) |
1174
((this->utf8_buffer[3] & 0x0000003F) << 6) |
1175
(this->utf8_buffer[4] & 0x0000003F);
1176
if (utf_value < 0x00200000)
1180
utf_value = ((this->utf8_buffer[0] & 0x00000001) << 30) |
1181
((this->utf8_buffer[1] & 0x0000003F) << 24) |
1182
((this->utf8_buffer[2] & 0x0000003F) << 18) |
1183
((this->utf8_buffer[3] & 0x0000003F) << 12) |
1184
((this->utf8_buffer[4] & 0x0000003F) << 6) |
1185
(this->utf8_buffer[5] & 0x0000003F);
1186
if (utf_value < 0x04000000)
1190
if (utf_value > 0x0000FFFF)
1196
case CHARSET_TO_CONVERT_8_BIT:
1197
if (ch > 127 && ch < 256)
1198
ch = this->conversion_table[((unsigned char) ch) - 128];
1202
op = entity_translator(ch);
1204
// Determine the characters set:
1205
switch (op & XML_OP_1_MASK) {
1206
case XML_OP_1_START_TAG:
1207
if (strcmp(this->pr_tag, "?xml") == 0)
1212
case XML_OP_1_ADD_ATTR:
1214
if (strcasecmp(this->pr_name, "encoding") == 0) {
1215
strcpy(this->charset, this->pr_value);
1216
if (boost::ifind_first(this->charset, "utf-8"))
1217
this->charset_type = CHARSET_UTF_8;
1218
else if (boost::ifind_first(this->charset, "ucs-2") ||
1219
boost::ifind_first(this->charset, "ucs-4") ||
1220
boost::ifind_first(this->charset, "unicode"))
1221
this->charset_type = CHARSET_STANDARD;
1223
this->charset_type = CHARSET_TO_CONVERT_8_BIT;
1224
buildConversionTable();
1233
void CSXMLProcessor::appendWCharToString(char *dstr, size_t *dlen, size_t dsize, wchar_t *schars, size_t slen)
1235
for (size_t i=0; i < slen; i++) {
1236
if (*dlen < dsize-1) {
1240
dstr[*dlen] = (char)*schars;
1248
int32_t CSXMLProcessor::processChar(wchar_t ch)
1252
op = charset_transformer(ch);
1255
* Capture output tag and attribute data.
1256
* This must be done at the highest level, after
1259
switch (op & XML_DATA_MASK) {
1261
appendWCharToString(this->pr_tag, &this->tlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
1264
appendWCharToString(this->pr_name, &this->nlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
1266
case XML_DATA_VALUE:
1267
appendWCharToString(this->pr_value, &this->vlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
1273
bool CSXMLProcessor::getError(int32_t *err, char **msg)
1280
void CSXMLProcessor::setError(int32_t err, char *msg)
1284
strncpy(err_message, msg, CS_XML_ERR_MSG_SIZE);
1285
err_message[CS_XML_ERR_MSG_SIZE-1] = 0;
1290
case CS_XML_ERR_OUT_OF_MEMORY:
1291
sprintf(err_message, "AES parse error- insufficient memory");
1293
case CS_XML_ERR_CHAR_TOO_LARGE:
1294
sprintf(err_message, "AES parse error- UNICODE character too large to be encoded as UTF-8");
1297
sprintf(err_message, "AES parse error- %s", strerror(err));
1302
void CSXMLProcessor::printError(char *prefix)
1304
printf("%s%s", prefix, err_message);
1307
/* ------------------------------------------------------------------- */
1311
#define EXTRA_SIZE 2
1313
#define EXTRA_SIZE 100
1316
bool CSXMLString::addChar(char ch, CSXMLProcessor *xml)
1320
if (stringLen + 2 > stringSize) {
1321
if (!(ptr = (char *) realloc(stringPtr, stringLen + 2 + EXTRA_SIZE))) {
1322
xml->setError(CS_XML_ERR_OUT_OF_MEMORY, NULL);
1326
stringSize = stringLen + 2 + EXTRA_SIZE;
1328
stringPtr[stringLen] = ch;
1329
stringPtr[stringLen+1] = 0;
1334
bool CSXMLString::addChars(size_t size, wchar_t *buffer, bool to_lower, CSXMLProcessor *xml)
1340
for (i=0; i<size; i++) {
1341
uni_char = (uint32_t) buffer[i];
1343
/* Convertion to lower only done for ASCII! */
1344
if (to_lower && uni_char <= 127)
1345
uni_char = (uint32_t) tolower((int32_t) uni_char);
1347
// Convert to UTF-8!
1348
if (uni_char <= 0x0000007F) {
1349
if (!addChar((char) uni_char, xml))
1353
else if (uni_char <= 0x000007FF) {
1354
if (!addChar((char) ((0x000000C0) | ((uni_char >> 6) & 0x0000001F)), xml))
1358
else if (uni_char <= 0x00000FFFF) {
1359
if (!addChar((char) ((0x000000E0) | ((uni_char >> 12) & 0x0000000F)), xml))
1363
else if (uni_char <= 0x001FFFFF) {
1364
if (!addChar((char) ((0x000000F0) | ((uni_char >> 18) & 0x00000007)), xml))
1368
else if (uni_char <= 0x003FFFFFF) {
1369
if (!addChar((char) ((0x000000F0) | ((uni_char >> 24) & 0x00000003)), xml))
1373
else if (uni_char <= 0x07FFFFFFF) {
1374
if (!addChar((char) ((0x000000F0) | ((uni_char >> 30) & 0x00000001)), xml))
1379
xml->setError(CS_XML_ERR_CHAR_TOO_LARGE, NULL);
1383
while (shift >= 0) {
1384
if (!addChar((char) ((0x00000080) | ((uni_char >> shift) & 0x0000003F)), xml))
1392
bool CSXMLString::addString(const char *string, CSXMLProcessor *xml)
1396
while (*string && ok) {
1397
ok = addChar(*string, xml);
1403
void CSXMLString::setEmpty()
1410
void CSXMLString::setNull()
1419
char *CSXMLString::lastComponent()
1426
ptr = stringPtr + stringLen - 1;
1427
while (ptr > stringPtr && *ptr != '/')
1432
/* We assume comp begins with a '/' */
1433
char *CSXMLString::findTrailingComponent(const char *comp)
1435
char *ptr, *last_slash;
1440
ptr = stringPtr + stringLen - 1;
1444
/* Find the next '/' */
1445
while (ptr > stringPtr && *ptr != '/')
1449
if (strcmp(ptr, comp) == 0) {
1459
while (ptr > stringPtr);
1463
void CSXMLString::truncate(char *ptr)
1466
stringLen = ptr - stringPtr;
1469
/* ------------------------------------------------------------------- */
1472
#define IS_XML_CDATA 0
1473
#define IS_XML_CDATA_TAG 1
1474
#define IS_XML_TAG 2
1475
#define IS_XML_CLOSE_TAG 3
1476
#define IS_XML_COMMENT 4
1477
#define IS_XML_DTD 5
1479
#define IS_XML_PI_XML 7
1480
#define IS_XML_IN_EX 8
1481
#define IS_XML_OPEN_BRACKET 9
1482
#define IS_XML_CLOSE_BRACKET 10
1484
int32_t CSXML::nodeType(char *name)
1489
return IS_XML_CDATA;
1491
if (strlen(name) == 1)
1492
return IS_XML_OPEN_BRACKET;
1495
if (strlen(name) == 1)
1496
return IS_XML_CLOSE_BRACKET;
1499
return IS_XML_CLOSE_TAG;
1501
if (strlen(name) > 1) {
1502
if (strcasecmp(name, "!--") == 0)
1503
return IS_XML_COMMENT;
1504
if (name[1] == '[') {
1505
if (strcasecmp(name, "![CDATA[") == 0)
1506
return IS_XML_CDATA_TAG;
1507
return IS_XML_IN_EX;
1512
if (strcasecmp(name, "?xml") == 0)
1513
return IS_XML_PI_XML;
1518
return IS_XML_CDATA;
1521
bool CSXML::internalCloseNode(const char *name, bool single)
1527
if ((ptr = xml_path.lastComponent())) {
1528
ok = closeNode(xml_path.stringPtr);
1529
xml_path.truncate(ptr);
1532
else if ((ptr = xml_path.findTrailingComponent(name))) {
1533
/* Close the node that is named above. If the XML is
1534
* correct, then the node should be at the top of the
1535
* node stack (last element of the path).
1537
* If not found, "ignore" the close.
1539
* If not found on the top of the node stack, then
1540
* we close serveral nodes.
1543
if (!(ptr = xml_path.lastComponent()))
1545
if (!(ok = closeNode(xml_path.stringPtr)))
1547
if (strcmp(ptr, name) == 0) {
1548
xml_path.truncate(ptr);
1551
xml_path.truncate(ptr);
1557
bool CSXML::internalOpenNode(const char *name)
1561
ok = xml_path.addString("/", this);
1564
ok = xml_path.addString(name, this);
1567
return openNode(this->xml_path.stringPtr, this->xml_value.stringPtr);
1570
bool CSXML::parseXML(int32_t my_flags)
1577
this->flags = my_flags;
1578
ok = xml_path.addChars(0, NULL, false, this);
1581
ok = xml_name.addChars(0, NULL, false, this);
1584
ok = xml_value.addChars(0, NULL, false, this);
1589
while (ch != CS_XML_EOF_CHAR && ok) {
1590
op = processChar(ch);
1591
switch (op & XML_OP_1_MASK) {
1594
case XML_OP_1_END_TAG:
1596
case XML_OP_1_END_CLOSE_TAG:
1598
case XML_OP_1_END_EMPTY_TAG:
1599
ok = internalCloseNode("/>", true);
1601
case XML_OP_1_END_PI_TAG:
1602
ok = internalCloseNode("?>", true);
1604
case XML_OP_1_END_ENTITY_TAG:
1605
ok = internalCloseNode(">", true);
1607
case XML_OP_1_END_BRACKET_TAG:
1608
ok = internalCloseNode("]>", true);
1610
case XML_OP_1_END_UNKNOWN_TAG:
1611
ok = internalCloseNode(">", true);
1613
case XML_OP_1_START_CDATA_TAG:
1615
case XML_OP_1_START_COMMENT:
1617
case XML_OP_1_START_TAG:
1618
if (nodeType(xml_name.stringPtr) == IS_XML_CLOSE_TAG)
1619
ok = internalCloseNode(xml_name.stringPtr, false);
1621
ok = internalOpenNode(xml_name.stringPtr);
1622
xml_name.setEmpty();
1623
xml_value.setEmpty();
1625
case XML_OP_1_ADD_ATTR:
1626
tagtype = nodeType(xml_name.stringPtr);
1627
if (tagtype != IS_XML_OPEN_BRACKET && tagtype != IS_XML_CLOSE_BRACKET)
1628
ok = addAttribute(xml_path.stringPtr, xml_name.stringPtr, xml_value.stringPtr);
1629
xml_name.setEmpty();
1630
xml_value.setEmpty();
1632
case XML_OP_1_END_CDATA:
1633
if (xml_value.stringLen || (my_flags & XML_KEEP_EMPTY_CDATA)) {
1634
ok = internalOpenNode("");
1635
xml_name.setEmpty();
1636
xml_value.setEmpty();
1637
ok = internalCloseNode("", true);
1640
case XML_OP_1_END_CDATA_TAG:
1641
ok = internalOpenNode("![CDATA[");
1642
xml_name.setEmpty();
1643
xml_value.setEmpty();
1645
ok = internalCloseNode("]]>", true);
1647
case XML_OP_1_END_COMMENT:
1648
ok = internalOpenNode("!--");
1649
xml_name.setEmpty();
1650
xml_value.setEmpty();
1652
ok = internalCloseNode("-->", true);
1657
switch (op & XML_DATA_MASK) {
1660
ok = xml_name.addChars(getDataLen(), getDataPtr(), true, this);
1662
case XML_DATA_CDATA:
1663
case XML_DATA_CDATA_TAG:
1665
case XML_DATA_VALUE:
1666
ok = xml_value.addChars(getDataLen(), getDataPtr(), false, this);
1671
switch (op & XML_OP_2_MASK) {
1674
case XML_OP_2_END_TAG:
1676
case XML_OP_2_END_CLOSE_TAG:
1678
case XML_OP_2_END_EMPTY_TAG:
1679
ok = internalCloseNode("/>", true);
1681
case XML_OP_2_END_PI_TAG:
1682
ok = internalCloseNode("?>", true);
1684
case XML_OP_2_END_ENTITY_TAG:
1685
ok = internalCloseNode(">", true);
1687
case XML_OP_2_END_BRACKET_TAG:
1688
ok = internalCloseNode("]>", true);
1690
case XML_OP_2_END_UNKNOWN_TAG:
1691
ok = internalCloseNode(">", true);
1693
case XML_OP_2_START_CDATA_TAG:
1695
case XML_OP_2_START_COMMENT:
1704
xml_value.setNull();
1708
/* ------------------------------------------------------------------- */
1711
bool CSXMLPrint::openNode(char *path, char *value)
1713
printf("OPEN %s\n", path);
1714
if (value && *value)
1715
printf(" %s\n", value);
1719
bool CSXMLPrint::closeNode(char *path)
1721
printf("close %s\n", path);
1725
bool CSXMLPrint::addAttribute(char *path, char *name, char *value)
1728
printf("attr %s %s=%s\n", path, name, value);
1730
printf("attr %s %s\n", path, name);
1734
/* ------------------------------------------------------------------- */
1737
bool CSXMLBuffer::parseString(const char *data, int32_t my_flags)
1740
dataLen = strlen(data);
1742
return parseXML(my_flags);
1745
bool CSXMLBuffer::parseData(const char *data, size_t len, int32_t my_flags)
1750
return parseXML(my_flags);
1753
bool CSXMLBuffer::getChar(wchar_t *ch)
1755
if (dataPos == dataLen)
1756
*ch = CS_XML_EOF_CHAR;
1758
*ch = (wchar_t) (unsigned char) charData[dataPos];
1764
/* ------------------------------------------------------------------- */
1767
bool CSXMLFile::parseFile(char *file_name, int32_t my_flags)
1771
if (!(this->file = fopen(file_name, "r"))) {
1772
setError(errno, NULL);
1775
ok = parseXML(my_flags);
1780
bool CSXMLFile::getChar(wchar_t *ch)
1784
next_ch = fgetc(file);
1785
if (next_ch == EOF) {
1787
setError(errno, NULL);
1790
*ch = CS_XML_EOF_CHAR;
1793
*ch = (wchar_t) next_ch;