1
/* Copyright (c) 2010 PrimeBase Technologies GmbH, Germany
3
* PrimeBase Media Stream for MySQL
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* Paul McCullagh (H&G2JCtL)
40
#define ISSPACE(ch) (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
41
#define ISSINGLE(ch) (ch == '*' || ch == '+' || ch == '(' || ch == ')' || ch == ',' || ch == '|' || ch == '[' || ch == ']' || ch == '?' || ch == '/')
43
#define SET_CHAR(x, ch) { x->buffer[0] = ch; x->count = 1; }
44
#define ADD_CHAR(x, ch) { if (x->count < PARSE_BUFFER_SIZE) { x->buffer[x->count] = ch; x->count++; } else x->buffer[PARSE_BUFFER_SIZE-1] = ch; }
46
bool CSXMLParser::match_string(const char *ch)
50
for (i=0; i<this->count; i++) {
51
if (this->buffer[i] != *ch)
57
return(i == this->count);
60
void CSXMLParser::increment_nesting(wchar_t ch)
62
if (this->nesting < PARSE_STACK_SIZE) {
65
this->end_type[this->nesting] = XML_OP_1_END_CLOSE_TAG;
68
this->end_type[this->nesting] = XML_OP_1_END_PI_TAG;
71
this->end_type[this->nesting] = XML_OP_1_END_ENTITY_TAG;
74
this->end_type[this->nesting] = XML_OP_1_END_BRACKET_TAG;
78
this->end_type[this->nesting] = XML_OP_1_END_UNKNOWN_TAG;
80
this->end_type[this->nesting] = XML_OP_1_END_TAG;
87
int32_t CSXMLParser::parseChar(wchar_t ch)
88
/* This function does the actual work of parsing. It is expects
89
* "complete" characters as input. This could be 4 byte characters
90
* as long as it is able to recognize the characters that are
91
* relevant to parsing.
92
* The function outputs processing instructions, and indicates
93
* how the output data is to be understood.
96
switch (this->state) {
97
case XML_BEFORE_CDATA:
99
/* This is the initial state! */
101
this->state = XML_LT;
102
this->type = XML_noop;
105
this->state = XML_IN_CDATA;
106
this->type = XML_CDATA_CH;
112
this->state = XML_LT;
113
this->type = XML_noop;
116
this->type = XML_CDATA_CH;
122
this->state = XML_BEFORE_ATTR;
123
if (this->step == XML_STEP_TAG)
124
this->type = XML_start_tag_TAG_CH;
125
else if (this->step == XML_STEP_NESTED)
126
this->type = XML_TAG_CH;
127
else if (this->step == XML_STEP_NONE)
128
this->type = XML_end_cdata_TAG_CH;
130
this->type = XML_add_attr_TAG_CH;
131
this->step = XML_STEP_TAG;
132
increment_nesting(ch);
136
this->state = XML_IN_CDATA;
137
this->type = XML_CDATA_CH;
141
else if (ch == '!') {
142
this->state = XML_LT_BANG;
143
this->type = XML_noop;
147
this->state = XML_IN_TAG_NAME;
148
if (this->step == XML_STEP_TAG)
149
this->type = XML_start_tag_TAG_CH;
150
else if (this->step == XML_STEP_NESTED)
151
this->type = XML_TAG_CH;
152
else if (this->step == XML_STEP_NONE)
153
this->type = XML_end_cdata_TAG_CH;
155
this->type = XML_add_attr_TAG_CH;
156
this->step = XML_STEP_TAG;
157
increment_nesting(ch);
163
this->state = XML_LT_BANG_DASH;
164
this->type = XML_noop;
166
else if (ch == '[') {
167
this->state = XML_LT_BANG_SQR;
168
this->type = XML_noop;
171
this->state = XML_IN_TAG_NAME;
172
if (this->step == XML_STEP_TAG)
173
this->type = XML_start_tag_TAG_CH;
174
else if (this->step == XML_STEP_NESTED)
175
this->type = XML_TAG_CH;
176
else if (this->step == XML_STEP_NONE)
177
this->type = XML_end_cdata_TAG_CH;
179
this->type = XML_add_attr_TAG_CH;
180
this->step = XML_STEP_TAG;
181
increment_nesting('!');
186
case XML_LT_BANG_DASH:
188
this->state = XML_IN_COMMENT;
189
if (this->step == XML_STEP_TAG)
190
this->type = XML_start_tag_start_comment;
191
else if (this->step == XML_STEP_NESTED)
192
this->type = XML_start_comment;
193
else if (this->step == XML_STEP_NONE)
194
this->type = XML_end_cdata_start_comment;
196
this->type = XML_add_attr_start_comment;
197
increment_nesting(' ');
200
this->state = XML_IN_CDATA;
201
this->type = XML_CDATA_CH;
205
case XML_LT_BANG_SQR:
207
this->type = XML_noop;
208
else if (ch == '[') {
209
this->state = XML_BEFORE_ATTR;
210
if (this->step == XML_STEP_TAG)
211
this->type = XML_start_tag_TAG_CH;
212
else if (this->step == XML_STEP_NESTED)
213
this->type = XML_TAG_CH;
214
else if (this->step == XML_STEP_NONE)
215
this->type = XML_end_cdata_TAG_CH;
217
this->type = XML_add_attr_TAG_CH;
218
this->step = XML_STEP_TAG;
219
increment_nesting('[');
224
this->state = XML_LT_BANG_SQR_IN_NAME;
225
this->type = XML_noop;
231
case XML_LT_BANG_SQR_IN_NAME:
233
this->state = XML_LT_BANG_SQR_AFTER_NAME;
234
this->type = XML_noop;
236
else if (ch == '[') {
237
if (match_string("![CDATA")) {
238
this->state = XML_IN_CDATA_TAG;
239
if (this->step == XML_STEP_TAG)
240
this->type = XML_start_tag_start_cdata_tag;
241
else if (this->step == XML_STEP_NESTED)
242
this->type = XML_start_cdata_tag;
243
else if (this->step == XML_STEP_NONE)
244
this->type = XML_end_cdata_start_cdata_tag;
246
this->type = XML_add_attr_start_cdata_tag;
247
this->step = XML_STEP_TAG;
248
increment_nesting('[');
251
this->state = XML_BEFORE_ATTR;
252
if (this->step == XML_STEP_TAG)
253
this->type = XML_start_tag_TAG_CH;
254
else if (this->step == XML_STEP_NESTED)
255
this->type = XML_TAG_CH;
256
else if (this->step == XML_STEP_NONE)
257
this->type = XML_end_cdata_TAG_CH;
259
this->type = XML_add_attr_TAG_CH;
260
this->step = XML_STEP_TAG;
261
increment_nesting('[');
265
this->type = XML_noop;
269
case XML_LT_BANG_SQR_AFTER_NAME:
271
if (match_string("![CDATA")) {
272
this->state = XML_IN_CDATA_TAG;
273
if (this->step == XML_STEP_TAG)
274
this->type = XML_start_tag_start_cdata_tag;
275
else if (this->step == XML_STEP_NESTED)
276
this->type = XML_start_cdata_tag;
277
else if (this->step == XML_STEP_NONE)
278
this->type = XML_end_cdata_start_cdata_tag;
280
this->type = XML_add_attr_start_cdata_tag;
281
increment_nesting('[');
284
this->state = XML_BEFORE_ATTR;
285
if (this->step == XML_STEP_TAG)
286
this->type = XML_start_tag_TAG_CH;
287
else if (this->step == XML_STEP_NESTED)
288
this->type = XML_TAG_CH;
289
else if (this->step == XML_STEP_NONE)
290
this->type = XML_end_cdata_TAG_CH;
292
this->type = XML_add_attr_TAG_CH;
293
this->step = XML_STEP_TAG;
294
increment_nesting('[');
298
/* Ignore data until the '['!!! */
299
this->type = XML_noop;
301
case XML_IN_TAG_NAME:
303
this->state = XML_BEFORE_ATTR;
304
this->type = XML_noop;
306
else if (ch == '<') {
307
this->state = XML_LT;
308
this->type = XML_noop;
310
else if (ch == '>') {
311
if (this->step == XML_STEP_TAG)
312
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
313
else if (this->step == XML_STEP_NESTED)
314
this->type = XML_end_tag(END_TAG_TYPE(this));
316
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
319
this->step = XML_STEP_NESTED;
320
this->state = XML_BEFORE_ATTR;
323
this->step = XML_STEP_NONE;
324
this->state = XML_IN_CDATA;
327
else if (ch == '"' || ch == '\'') {
328
this->state = XML_QUOTE_BEFORE_VALUE;
330
this->type = XML_noop;
332
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
333
this->state = XML_SLASH;
334
this->type = XML_noop;
336
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
337
this->state = XML_QMARK;
338
this->type = XML_noop;
340
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
341
this->state = XML_SQR;
342
this->type = XML_noop;
344
else if (ISSINGLE(ch)) {
345
this->state = XML_BEFORE_ATTR;
346
if (this->step == XML_STEP_TAG)
347
this->type = XML_start_tag_ATTR_CH;
348
else if (this->step == XML_STEP_NESTED)
349
this->type = XML_ATTR_CH;
351
this->type = XML_add_attr_ATTR_CH;
352
this->step = XML_STEP_ATTR;
356
this->type = XML_TAG_CH;
360
case XML_BEFORE_ATTR:
362
this->type = XML_noop;
363
else if (ch == '<') {
364
this->state = XML_LT;
365
this->type = XML_noop;
367
else if (ch == '>') {
368
if (this->step == XML_STEP_TAG)
369
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
370
else if (this->step == XML_STEP_NESTED)
371
this->type = XML_end_tag(END_TAG_TYPE(this));
373
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
376
this->step = XML_STEP_NESTED;
377
this->state = XML_BEFORE_ATTR;
380
this->step = XML_STEP_NONE;
381
this->state = XML_IN_CDATA;
384
else if (ch == '"' || ch == '\'') {
385
this->state = XML_QUOTE_BEFORE_VALUE;
387
this->type = XML_noop;
389
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
390
this->state = XML_SLASH;
391
this->type = XML_noop;
393
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
394
this->state = XML_QMARK;
395
this->type = XML_noop;
397
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
398
this->state = XML_SQR;
399
this->type = XML_noop;
401
else if (ISSINGLE(ch)) {
402
if (this->step == XML_STEP_TAG)
403
this->type = XML_start_tag_ATTR_CH;
404
else if (this->step == XML_STEP_NESTED)
405
this->type = XML_ATTR_CH;
407
this->type = XML_add_attr_ATTR_CH;
408
this->step = XML_STEP_ATTR;
412
this->state = XML_IN_ATTR;
413
if (this->step == XML_STEP_TAG)
414
this->type = XML_start_tag_ATTR_CH;
415
else if (this->step == XML_STEP_NESTED)
416
this->type = XML_ATTR_CH;
418
this->type = XML_add_attr_ATTR_CH;
419
this->step = XML_STEP_ATTR;
425
this->state = XML_BEFORE_EQUAL;
426
this->type = XML_noop;
428
else if (ch == '<') {
429
this->state = XML_LT;
430
this->type = XML_noop;
432
else if (ch == '>') {
433
if (this->step == XML_STEP_TAG)
434
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
435
else if (this->step == XML_STEP_NESTED)
436
this->type = XML_end_tag(END_TAG_TYPE(this));
438
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
441
this->step = XML_STEP_NESTED;
442
this->state = XML_BEFORE_ATTR;
445
this->step = XML_STEP_NONE;
446
this->state = XML_IN_CDATA;
449
else if (ch == '"' || ch == '\'') {
450
this->state = XML_QUOTE_BEFORE_VALUE;
452
this->type = XML_noop;
454
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
455
this->state = XML_SLASH;
456
this->type = XML_noop;
458
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
459
this->state = XML_QMARK;
460
this->type = XML_noop;
462
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
463
this->state = XML_SQR;
464
this->type = XML_noop;
466
else if (ISSINGLE(ch)) {
467
this->state = XML_BEFORE_ATTR;
468
if (this->step == XML_STEP_TAG)
469
this->type = XML_start_tag_ATTR_CH;
470
else if (this->step == XML_STEP_NESTED)
471
this->type = XML_ATTR_CH;
473
this->type = XML_add_attr_ATTR_CH;
474
this->step = XML_STEP_ATTR;
477
else if (ch == '=') {
478
this->state = XML_AFTER_EQUAL;
479
this->type = XML_noop;
482
this->type = XML_ATTR_CH;
486
case XML_BEFORE_EQUAL:
488
this->type = XML_noop;
489
else if (ch == '<') {
490
this->state = XML_LT;
491
this->type = XML_noop;
493
else if (ch == '>') {
494
if (this->step == XML_STEP_TAG)
495
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
496
else if (this->step == XML_STEP_NESTED)
497
this->type = XML_end_tag(END_TAG_TYPE(this));
499
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
502
this->step = XML_STEP_NESTED;
503
this->state = XML_BEFORE_ATTR;
506
this->step = XML_STEP_NONE;
507
this->state = XML_IN_CDATA;
510
else if (ch == '"' || ch == '\'') {
511
this->state = XML_QUOTE_BEFORE_VALUE;
513
this->type = XML_noop;
515
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
516
this->state = XML_SLASH;
517
this->type = XML_noop;
519
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
520
this->state = XML_QMARK;
521
this->type = XML_noop;
523
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
524
this->state = XML_SQR;
525
this->type = XML_noop;
527
else if (ISSINGLE(ch)) {
528
this->state = XML_BEFORE_ATTR;
529
if (this->step == XML_STEP_TAG)
530
this->type = XML_start_tag_ATTR_CH;
531
else if (this->step == XML_STEP_NESTED)
532
this->type = XML_ATTR_CH;
534
this->type = XML_add_attr_ATTR_CH;
535
this->step = XML_STEP_ATTR;
538
else if (ch == '=') {
539
this->state = XML_AFTER_EQUAL;
540
this->type = XML_noop;
543
this->state = XML_IN_ATTR;
544
if (this->step == XML_STEP_TAG)
545
this->type = XML_start_tag_ATTR_CH;
546
else if (this->step == XML_STEP_NESTED)
547
this->type = XML_ATTR_CH;
549
this->type = XML_add_attr_ATTR_CH;
550
this->step = XML_STEP_ATTR;
554
case XML_AFTER_EQUAL:
556
this->state = XML_AFTER_EQUAL;
557
this->type = XML_noop;
559
else if (ch == '<') {
560
this->state = XML_LT;
561
this->type = XML_noop;
563
else if (ch == '>') {
564
if (this->step == XML_STEP_TAG)
565
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
566
else if (this->step == XML_STEP_NESTED)
567
this->type = XML_end_tag(END_TAG_TYPE(this));
569
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
572
this->step = XML_STEP_NESTED;
573
this->state = XML_BEFORE_ATTR;
576
this->step = XML_STEP_NONE;
577
this->state = XML_IN_CDATA;
580
else if (ch == '"' || ch == '\'') {
581
this->state = XML_QUOTE_BEFORE_VALUE;
583
this->type = XML_noop;
585
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
586
this->state = XML_SLASH;
587
this->type = XML_noop;
589
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
590
this->state = XML_QMARK;
591
this->type = XML_noop;
593
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
594
this->state = XML_SQR;
595
this->type = XML_noop;
597
else if (ISSINGLE(ch)) {
598
this->state = XML_BEFORE_ATTR;
599
if (this->step == XML_STEP_TAG)
600
this->type = XML_start_tag_ATTR_CH;
601
else if (this->step == XML_STEP_NESTED)
602
this->type = XML_ATTR_CH;
604
this->type = XML_add_attr_ATTR_CH;
605
this->step = XML_STEP_ATTR;
609
this->state = XML_IN_VALUE;
611
if (this->step == XML_STEP_TAG)
612
this->type = XML_start_tag_VALUE_CH;
613
else if (this->step == XML_STEP_VALUE)
614
this->type = XML_add_attr_VALUE_CH;
616
this->type = XML_VALUE_CH;
617
this->step = XML_STEP_VALUE;
621
case XML_QUOTE_BEFORE_VALUE:
622
if (ch == this->quote) {
623
this->state = XML_QUOTE_AFTER_VALUE;
625
if (this->step == XML_STEP_TAG)
626
this->type = XML_start_tag_VALUE_CH;
627
else if (this->step == XML_STEP_VALUE)
628
this->type = XML_add_attr_VALUE_CH;
630
this->type = XML_VALUE_CH;
631
this->step = XML_STEP_VALUE;
635
this->state = XML_IN_VALUE;
636
if (this->step == XML_STEP_TAG)
637
this->type = XML_start_tag_VALUE_CH;
638
else if (this->step == XML_STEP_VALUE)
639
this->type = XML_add_attr_VALUE_CH;
641
this->type = XML_VALUE_CH;
642
this->step = XML_STEP_VALUE;
648
if (ch == this->quote) {
649
this->state = XML_QUOTE_AFTER_VALUE;
650
this->type = XML_noop;
653
this->type = XML_VALUE_CH;
658
/* A value without quotes (for HTML!) */
660
this->state = XML_BEFORE_ATTR;
661
this->type = XML_noop;
663
else if (ch == '<') {
664
this->state = XML_LT;
665
this->type = XML_noop;
667
else if (ch == '>') {
668
if (this->step == XML_STEP_TAG)
669
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
670
else if (this->step == XML_STEP_NESTED)
671
this->type = XML_end_tag(END_TAG_TYPE(this));
673
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
676
this->step = XML_STEP_NESTED;
677
this->state = XML_BEFORE_ATTR;
680
this->step = XML_STEP_NONE;
681
this->state = XML_IN_CDATA;
684
else if (ch == '"' || ch == '\'') {
685
this->state = XML_QUOTE_BEFORE_VALUE;
687
this->type = XML_noop;
690
this->type = XML_VALUE_CH;
695
case XML_QUOTE_AFTER_VALUE:
697
this->state = XML_BEFORE_ATTR;
698
this->type = XML_noop;
700
else if (ch == '<') {
701
this->state = XML_LT;
702
this->type = XML_noop;
704
else if (ch == '>') {
705
if (this->step == XML_STEP_TAG)
706
this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
707
else if (this->step == XML_STEP_NESTED)
708
this->type = XML_end_tag(END_TAG_TYPE(this));
710
this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
713
this->step = XML_STEP_NESTED;
714
this->state = XML_BEFORE_ATTR;
717
this->step = XML_STEP_NONE;
718
this->state = XML_IN_CDATA;
721
else if (ch == '"' || ch == '\'') {
722
this->state = XML_QUOTE_BEFORE_VALUE;
724
this->type = XML_noop;
726
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
727
this->state = XML_SLASH;
728
this->type = XML_noop;
730
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
731
this->state = XML_QMARK;
732
this->type = XML_noop;
734
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
735
this->state = XML_SQR;
736
this->type = XML_noop;
738
else if (ISSINGLE(ch)) {
739
this->state = XML_BEFORE_ATTR;
740
if (this->step == XML_STEP_TAG)
741
this->type = XML_start_tag_ATTR_CH;
742
else if (this->step == XML_STEP_NESTED)
743
this->type = XML_ATTR_CH;
745
this->type = XML_add_attr_ATTR_CH;
746
this->step = XML_STEP_ATTR;
750
this->state = XML_IN_ATTR;
751
if (this->step == XML_STEP_TAG)
752
this->type = XML_start_tag_ATTR_CH;
753
else if (this->step == XML_STEP_NESTED)
754
this->type = XML_ATTR_CH;
756
this->type = XML_add_attr_ATTR_CH;
757
this->step = XML_STEP_ATTR;
771
this->state = XML_BEFORE_ATTR;
772
if (this->step == XML_STEP_TAG)
773
this->type = XML_start_tag_TAG_CH;
774
else if (this->step == XML_STEP_NESTED)
775
this->type = XML_TAG_CH;
776
else if (this->step == XML_STEP_NONE)
777
this->type = XML_end_cdata_TAG_CH;
779
this->type = XML_add_attr_TAG_CH;
780
this->step = XML_STEP_ATTR;
782
else if (ch == '<') {
783
this->state = XML_LT;
784
if (this->step == XML_STEP_TAG)
785
this->type = XML_start_tag_TAG_CH;
786
else if (this->step == XML_STEP_NESTED)
787
this->type = XML_TAG_CH;
788
else if (this->step == XML_STEP_NONE)
789
this->type = XML_end_cdata_TAG_CH;
791
this->type = XML_add_attr_TAG_CH;
792
this->step = XML_STEP_TAG;
794
else if (ch == '>') {
795
if (this->state == XML_SLASH) {
796
if (this->step == XML_STEP_TAG)
797
this->type = XML_start_tag_end_empty_tag;
798
else if (this->step == XML_STEP_NESTED)
799
this->type = XML_end_empty_tag;
801
this->type = XML_add_attr_end_empty_tag;
803
else if (this->state == XML_SQR) {
804
if (this->step == XML_STEP_TAG)
805
this->type = XML_start_tag_end_tag(XML_OP_1_END_BRACKET_TAG);
806
else if (this->step == XML_STEP_NESTED)
807
this->type = XML_end_tag(XML_OP_1_END_BRACKET_TAG);
809
this->type = XML_add_attr_end_tag(XML_OP_1_END_BRACKET_TAG);
812
if (this->step == XML_STEP_TAG)
813
this->type = XML_start_tag_end_pi_tag;
814
else if (this->step == XML_STEP_NESTED)
815
this->type = XML_end_pi_tag;
817
this->type = XML_add_attr_end_pi_tag;
821
this->step = XML_STEP_NESTED;
822
this->state = XML_BEFORE_ATTR;
825
this->step = XML_STEP_NONE;
826
this->state = XML_IN_CDATA;
829
else if (ch == '"' || ch == '\'') {
830
this->state = XML_QUOTE_BEFORE_VALUE;
832
if (this->step == XML_STEP_TAG)
833
this->type = XML_start_tag_TAG_CH;
834
else if (this->step == XML_STEP_NESTED)
835
this->type = XML_TAG_CH;
836
else if (this->step == XML_STEP_NONE)
837
this->type = XML_end_cdata_TAG_CH;
839
this->type = XML_add_attr_TAG_CH;
840
this->step = XML_STEP_ATTR;
842
else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
843
this->state = XML_SLASH;
844
if (this->step == XML_STEP_TAG)
845
this->type = XML_start_tag_TAG_CH;
846
else if (this->step == XML_STEP_NESTED)
847
this->type = XML_TAG_CH;
848
else if (this->step == XML_STEP_NONE)
849
this->type = XML_end_cdata_TAG_CH;
851
this->type = XML_add_attr_TAG_CH;
852
this->step = XML_STEP_ATTR;
854
else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
855
this->state = XML_QMARK;
856
if (this->step == XML_STEP_TAG)
857
this->type = XML_start_tag_TAG_CH;
858
else if (this->step == XML_STEP_NESTED)
859
this->type = XML_TAG_CH;
860
else if (this->step == XML_STEP_NONE)
861
this->type = XML_end_cdata_TAG_CH;
863
this->type = XML_add_attr_TAG_CH;
864
this->step = XML_STEP_ATTR;
866
else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
867
this->state = XML_SQR;
868
if (this->step == XML_STEP_TAG)
869
this->type = XML_start_tag_TAG_CH;
870
else if (this->step == XML_STEP_NESTED)
871
this->type = XML_TAG_CH;
872
else if (this->step == XML_STEP_NONE)
873
this->type = XML_end_cdata_TAG_CH;
875
this->type = XML_add_attr_TAG_CH;
876
this->step = XML_STEP_ATTR;
878
else if (ISSINGLE(ch)) {
879
this->state = XML_BEFORE_ATTR;
880
if (this->step == XML_STEP_TAG)
881
this->type = XML_start_tag_TAG_CH;
882
else if (this->step == XML_STEP_NESTED)
883
this->type = XML_TAG_CH;
884
else if (this->step == XML_STEP_NONE)
885
this->type = XML_end_cdata_TAG_CH;
887
this->type = XML_add_attr_TAG_CH;
888
this->step = XML_STEP_ATTR;
892
this->state = XML_IN_ATTR;
893
if (this->step == XML_STEP_TAG)
894
this->type = XML_start_tag_TAG_CH;
895
else if (this->step == XML_STEP_NESTED)
896
this->type = XML_TAG_CH;
897
else if (this->step == XML_STEP_NONE)
898
this->type = XML_end_cdata_TAG_CH;
900
this->type = XML_add_attr_TAG_CH;
901
this->step = XML_STEP_ATTR;
907
this->state = XML_IN_COMMENT_DASH;
908
this->type = XML_noop;
911
this->type = XML_COMMENT_CH;
914
case XML_IN_COMMENT_DASH:
916
this->state = XML_IN_COMMENT_DASH_DASH;
917
this->type = XML_noop;
920
this->state = XML_IN_COMMENT;
921
this->type = XML_COMMENT_CH;
925
case XML_IN_COMMENT_DASH_DASH:
927
this->state = XML_IN_COMMENT_3_DASH;
928
this->type = XML_COMMENT_CH;
931
else if (ch == '>') {
932
this->type = XML_end_comment;
935
this->step = XML_STEP_NESTED;
936
this->state = XML_BEFORE_ATTR;
939
this->step = XML_STEP_NONE;
940
this->state = XML_IN_CDATA;
944
this->state = XML_IN_COMMENT;
945
this->type = XML_COMMENT_CH;
949
case XML_IN_COMMENT_3_DASH:
951
this->type = XML_COMMENT_CH;
954
else if (ch == '>') {
955
this->type = XML_end_comment;
958
this->step = XML_STEP_NESTED;
959
this->state = XML_BEFORE_ATTR;
962
this->step = XML_STEP_NONE;
963
this->state = XML_IN_CDATA;
967
this->state = XML_IN_COMMENT;
968
this->type = XML_COMMENT_CH;
974
case XML_IN_CDATA_TAG:
976
this->state = XML_IN_CDATA_TAG_SQR;
977
this->type = XML_noop;
980
this->type = XML_CDATA_TAG_CH;
983
case XML_IN_CDATA_TAG_SQR:
985
this->state = XML_IN_CDATA_TAG_SQR_SQR;
986
this->type = XML_noop;
989
this->state = XML_IN_CDATA_TAG;
990
this->type = XML_CDATA_TAG_CH;
994
case XML_IN_CDATA_TAG_SQR_SQR:
996
this->state = XML_IN_CDATA_TAG_3_SQR;
997
this->type = XML_CDATA_TAG_CH;
1000
else if (ch == '>') {
1001
this->type = XML_end_cdata_tag;
1003
if (this->nesting) {
1004
this->step = XML_STEP_NESTED;
1005
this->state = XML_BEFORE_ATTR;
1008
this->step = XML_STEP_NONE;
1009
this->state = XML_IN_CDATA;
1013
this->state = XML_IN_CDATA_TAG;
1014
this->type = XML_CDATA_TAG_CH;
1018
case XML_IN_CDATA_TAG_3_SQR:
1020
this->type = XML_CDATA_TAG_CH;
1023
else if (ch == '>') {
1024
this->type = XML_end_cdata_tag;
1026
if (this->nesting) {
1027
this->step = XML_STEP_NESTED;
1028
this->state = XML_BEFORE_ATTR;
1031
this->step = XML_STEP_NONE;
1032
this->state = XML_IN_CDATA;
1036
this->state = XML_IN_CDATA_TAG;
1037
this->type = XML_CDATA_TAG_CH;
1038
SET_CHAR(this, ']');
1039
ADD_CHAR(this, ']');
1047
/* ------------------------------------------------------------------- */
1048
/* CSXMLProcessor */
1050
bool CSXMLProcessor::buildConversionTable()
1054
/* By default we don't know how to convert any charset
1055
* other tha ISO-1 to unicode!
1057
if (strcasecmp(charset, "ISO-8859-1") == 0) {
1058
for (i=0; i<128; i++)
1059
conversion_table[i] = (wchar_t) (i + 128);
1062
for (i=0; i<128; i++)
1063
conversion_table[i] = '?';
1068
// Private use are: E000 - F8FF
1070
int32_t CSXMLProcessor::capture_initializer(wchar_t ch)
1071
/* We capture tag and attribute data for the parsing purposes.
1072
* The buffers are initialized here (at the lowest level)
1073
* of processing after parsing.
1079
switch (op & XML_OP_1_MASK) {
1080
case XML_OP_1_START_TAG:
1083
case XML_OP_1_ADD_ATTR:
1091
int32_t CSXMLProcessor::entity_translator(wchar_t ch)
1092
/* This function handles entities.
1093
* Certain entities are translated into UNICODE characters.
1094
* Strictly speaking, these enties are only recognised by HTML.
1095
* The few entities that are recognised by XML are first translated
1096
* into some reserved characters for the parser. This is to ensure
1097
* that the parser does not recognize them as characters with special
1098
* meaning! This includes '&', '<' and '>'.
1103
op = capture_initializer(ch);
1108
* This function translates the input character stream into UNICODE.
1110
int32_t CSXMLProcessor::charset_transformer(wchar_t ch)
1114
// Do transformation according to the charset.
1115
switch (this->charset_type) {
1117
if (ch > 127 && ch < 256) {
1119
uint8_t utf_ch = (uint8_t)ch;
1121
if ((utf_ch & 0xC0) != 0x80)
1122
this->utf8_count = 0;
1123
if ((utf_ch & 0x80) == 0x00)
1124
this->utf8_length = 1;
1125
else if ((utf_ch & 0xE0) == 0xC0)
1126
this->utf8_length = 2;
1127
else if ((utf_ch & 0xF0) == 0xE0)
1128
this->utf8_length = 3;
1129
else if ((utf_ch & 0xF8) == 0xF0)
1130
this->utf8_length = 4;
1131
else if ((utf_ch & 0xFC) == 0xF8)
1132
this->utf8_length = 5;
1133
else if ((utf_ch & 0xFE) == 0xFC)
1134
this->utf8_length = 6;
1135
this->utf8_buffer[this->utf8_count] = (uint32_t) utf_ch;
1137
if (this->utf8_count < this->utf8_length) {
1138
// I need more bytes!
1139
setDataType(XML_noop);
1143
switch (this->utf8_length) {
1145
utf_value = this->utf8_buffer[0] & 0x0000007F;
1148
utf_value = ((this->utf8_buffer[0] & 0x0000001F) << 6) |
1149
(this->utf8_buffer[1] & 0x0000003F);
1150
if (utf_value < 0x00000080)
1154
utf_value = ((this->utf8_buffer[0] & 0x0000000F) << 12) |
1155
((this->utf8_buffer[1] & 0x0000003F) << 6) |
1156
(this->utf8_buffer[2] & 0x0000003F);
1157
if (utf_value < 0x000000800)
1161
utf_value = ((this->utf8_buffer[0] & 0x00000007) << 18) |
1162
((this->utf8_buffer[1] & 0x0000003F) << 12) |
1163
((this->utf8_buffer[2] & 0x0000003F) << 6) |
1164
(this->utf8_buffer[3] & 0x0000003F);
1165
if (utf_value < 0x00010000)
1169
utf_value = ((this->utf8_buffer[0] & 0x00000003) << 24) |
1170
((this->utf8_buffer[1] & 0x0000003F) << 18) |
1171
((this->utf8_buffer[2] & 0x0000003F) << 12) |
1172
((this->utf8_buffer[3] & 0x0000003F) << 6) |
1173
(this->utf8_buffer[4] & 0x0000003F);
1174
if (utf_value < 0x00200000)
1178
utf_value = ((this->utf8_buffer[0] & 0x00000001) << 30) |
1179
((this->utf8_buffer[1] & 0x0000003F) << 24) |
1180
((this->utf8_buffer[2] & 0x0000003F) << 18) |
1181
((this->utf8_buffer[3] & 0x0000003F) << 12) |
1182
((this->utf8_buffer[4] & 0x0000003F) << 6) |
1183
(this->utf8_buffer[5] & 0x0000003F);
1184
if (utf_value < 0x04000000)
1188
if (utf_value > 0x0000FFFF)
1194
case CHARSET_TO_CONVERT_8_BIT:
1195
if (ch > 127 && ch < 256)
1196
ch = this->conversion_table[((unsigned char) ch) - 128];
1200
op = entity_translator(ch);
1202
// Determine the characters set:
1203
switch (op & XML_OP_1_MASK) {
1204
case XML_OP_1_START_TAG:
1205
if (strcmp(this->pr_tag, "?xml") == 0)
1210
case XML_OP_1_ADD_ATTR:
1212
if (strcasecmp(this->pr_name, "encoding") == 0) {
1213
strcpy(this->charset, this->pr_value);
1214
if (strcasestr(this->charset, "utf-8"))
1215
this->charset_type = CHARSET_UTF_8;
1216
else if (strcasestr(this->charset, "ucs-2") ||
1217
strcasestr(this->charset, "ucs-4") ||
1218
strcasestr(this->charset, "unicode"))
1219
this->charset_type = CHARSET_STANDARD;
1221
this->charset_type = CHARSET_TO_CONVERT_8_BIT;
1222
buildConversionTable();
1231
void CSXMLProcessor::appendWCharToString(char *dstr, size_t *dlen, size_t dsize, wchar_t *schars, size_t slen)
1233
for (size_t i=0; i < slen; i++) {
1234
if (*dlen < dsize-1) {
1238
dstr[*dlen] = (char)*schars;
1246
int32_t CSXMLProcessor::processChar(wchar_t ch)
1250
op = charset_transformer(ch);
1253
* Capture output tag and attribute data.
1254
* This must be done at the highest level, after
1257
switch (op & XML_DATA_MASK) {
1259
appendWCharToString(this->pr_tag, &this->tlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
1262
appendWCharToString(this->pr_name, &this->nlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
1264
case XML_DATA_VALUE:
1265
appendWCharToString(this->pr_value, &this->vlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
1271
bool CSXMLProcessor::getError(int32_t *err, char **msg)
1278
void CSXMLProcessor::setError(int32_t err, char *msg)
1282
strncpy(err_message, msg, CS_XML_ERR_MSG_SIZE);
1283
err_message[CS_XML_ERR_MSG_SIZE-1] = 0;
1288
case CS_XML_ERR_OUT_OF_MEMORY:
1289
sprintf(err_message, "AES parse error- insufficient memory");
1291
case CS_XML_ERR_CHAR_TOO_LARGE:
1292
sprintf(err_message, "AES parse error- UNICODE character too large to be encoded as UTF-8");
1295
sprintf(err_message, "AES parse error- %s", strerror(err));
1300
void CSXMLProcessor::printError(char *prefix)
1302
printf("%s%s", prefix, err_message);
1305
/* ------------------------------------------------------------------- */
1309
#define EXTRA_SIZE 2
1311
#define EXTRA_SIZE 100
1314
bool CSXMLString::addChar(char ch, CSXMLProcessor *xml)
1318
if (stringLen + 2 > stringSize) {
1319
if (!(ptr = (char *) realloc(stringPtr, stringLen + 2 + EXTRA_SIZE))) {
1320
xml->setError(CS_XML_ERR_OUT_OF_MEMORY, NULL);
1324
stringSize = stringLen + 2 + EXTRA_SIZE;
1326
stringPtr[stringLen] = ch;
1327
stringPtr[stringLen+1] = 0;
1332
bool CSXMLString::addChars(size_t size, wchar_t *buffer, bool to_lower, CSXMLProcessor *xml)
1338
for (i=0; i<size; i++) {
1339
uni_char = (uint32_t) buffer[i];
1341
/* Convertion to lower only done for ASCII! */
1342
if (to_lower && uni_char <= 127)
1343
uni_char = (uint32_t) tolower((int32_t) uni_char);
1345
// Convert to UTF-8!
1346
if (uni_char <= 0x0000007F) {
1347
if (!addChar((char) uni_char, xml))
1351
else if (uni_char <= 0x000007FF) {
1352
if (!addChar((char) ((0x000000C0) | ((uni_char >> 6) & 0x0000001F)), xml))
1356
else if (uni_char <= 0x00000FFFF) {
1357
if (!addChar((char) ((0x000000E0) | ((uni_char >> 12) & 0x0000000F)), xml))
1361
else if (uni_char <= 0x001FFFFF) {
1362
if (!addChar((char) ((0x000000F0) | ((uni_char >> 18) & 0x00000007)), xml))
1366
else if (uni_char <= 0x003FFFFFF) {
1367
if (!addChar((char) ((0x000000F0) | ((uni_char >> 24) & 0x00000003)), xml))
1371
else if (uni_char <= 0x07FFFFFFF) {
1372
if (!addChar((char) ((0x000000F0) | ((uni_char >> 30) & 0x00000001)), xml))
1377
xml->setError(CS_XML_ERR_CHAR_TOO_LARGE, NULL);
1381
while (shift >= 0) {
1382
if (!addChar((char) ((0x00000080) | ((uni_char >> shift) & 0x0000003F)), xml))
1390
bool CSXMLString::addString(const char *string, CSXMLProcessor *xml)
1394
while (*string && ok) {
1395
ok = addChar(*string, xml);
1401
void CSXMLString::setEmpty()
1408
void CSXMLString::setNull()
1417
char *CSXMLString::lastComponent()
1424
ptr = stringPtr + stringLen - 1;
1425
while (ptr > stringPtr && *ptr != '/')
1430
/* We assume comp begins with a '/' */
1431
char *CSXMLString::findTrailingComponent(const char *comp)
1433
char *ptr, *last_slash;
1438
ptr = stringPtr + stringLen - 1;
1442
/* Find the next '/' */
1443
while (ptr > stringPtr && *ptr != '/')
1447
if (strcmp(ptr, comp) == 0) {
1457
while (ptr > stringPtr);
1461
void CSXMLString::truncate(char *ptr)
1464
stringLen = ptr - stringPtr;
1467
/* ------------------------------------------------------------------- */
1470
#define IS_XML_CDATA 0
1471
#define IS_XML_CDATA_TAG 1
1472
#define IS_XML_TAG 2
1473
#define IS_XML_CLOSE_TAG 3
1474
#define IS_XML_COMMENT 4
1475
#define IS_XML_DTD 5
1477
#define IS_XML_PI_XML 7
1478
#define IS_XML_IN_EX 8
1479
#define IS_XML_OPEN_BRACKET 9
1480
#define IS_XML_CLOSE_BRACKET 10
1482
int32_t CSXML::nodeType(char *name)
1487
return IS_XML_CDATA;
1489
if (strlen(name) == 1)
1490
return IS_XML_OPEN_BRACKET;
1493
if (strlen(name) == 1)
1494
return IS_XML_CLOSE_BRACKET;
1497
return IS_XML_CLOSE_TAG;
1499
if (strlen(name) > 1) {
1500
if (strcasecmp(name, "!--") == 0)
1501
return IS_XML_COMMENT;
1502
if (name[1] == '[') {
1503
if (strcasecmp(name, "![CDATA[") == 0)
1504
return IS_XML_CDATA_TAG;
1505
return IS_XML_IN_EX;
1510
if (strcasecmp(name, "?xml") == 0)
1511
return IS_XML_PI_XML;
1516
return IS_XML_CDATA;
1519
bool CSXML::internalCloseNode(const char *name, bool single)
1525
if ((ptr = xml_path.lastComponent())) {
1526
ok = closeNode(xml_path.stringPtr);
1527
xml_path.truncate(ptr);
1530
else if ((ptr = xml_path.findTrailingComponent(name))) {
1531
/* Close the node that is named above. If the XML is
1532
* correct, then the node should be at the top of the
1533
* node stack (last element of the path).
1535
* If not found, "ignore" the close.
1537
* If not found on the top of the node stack, then
1538
* we close serveral nodes.
1541
if (!(ptr = xml_path.lastComponent()))
1543
if (!(ok = closeNode(xml_path.stringPtr)))
1545
if (strcmp(ptr, name) == 0) {
1546
xml_path.truncate(ptr);
1549
xml_path.truncate(ptr);
1555
bool CSXML::internalOpenNode(const char *name)
1559
ok = xml_path.addString("/", this);
1562
ok = xml_path.addString(name, this);
1565
return openNode(this->xml_path.stringPtr, this->xml_value.stringPtr);
1568
bool CSXML::parseXML(int32_t my_flags)
1575
this->flags = my_flags;
1576
ok = xml_path.addChars(0, NULL, false, this);
1579
ok = xml_name.addChars(0, NULL, false, this);
1582
ok = xml_value.addChars(0, NULL, false, this);
1587
while (ch != CS_XML_EOF_CHAR && ok) {
1588
op = processChar(ch);
1589
switch (op & XML_OP_1_MASK) {
1592
case XML_OP_1_END_TAG:
1594
case XML_OP_1_END_CLOSE_TAG:
1596
case XML_OP_1_END_EMPTY_TAG:
1597
ok = internalCloseNode("/>", true);
1599
case XML_OP_1_END_PI_TAG:
1600
ok = internalCloseNode("?>", true);
1602
case XML_OP_1_END_ENTITY_TAG:
1603
ok = internalCloseNode(">", true);
1605
case XML_OP_1_END_BRACKET_TAG:
1606
ok = internalCloseNode("]>", true);
1608
case XML_OP_1_END_UNKNOWN_TAG:
1609
ok = internalCloseNode(">", true);
1611
case XML_OP_1_START_CDATA_TAG:
1613
case XML_OP_1_START_COMMENT:
1615
case XML_OP_1_START_TAG:
1616
if (nodeType(xml_name.stringPtr) == IS_XML_CLOSE_TAG)
1617
ok = internalCloseNode(xml_name.stringPtr, false);
1619
ok = internalOpenNode(xml_name.stringPtr);
1620
xml_name.setEmpty();
1621
xml_value.setEmpty();
1623
case XML_OP_1_ADD_ATTR:
1624
tagtype = nodeType(xml_name.stringPtr);
1625
if (tagtype != IS_XML_OPEN_BRACKET && tagtype != IS_XML_CLOSE_BRACKET)
1626
ok = addAttribute(xml_path.stringPtr, xml_name.stringPtr, xml_value.stringPtr);
1627
xml_name.setEmpty();
1628
xml_value.setEmpty();
1630
case XML_OP_1_END_CDATA:
1631
if (xml_value.stringLen || (my_flags & XML_KEEP_EMPTY_CDATA)) {
1632
ok = internalOpenNode("");
1633
xml_name.setEmpty();
1634
xml_value.setEmpty();
1635
ok = internalCloseNode("", true);
1638
case XML_OP_1_END_CDATA_TAG:
1639
ok = internalOpenNode("![CDATA[");
1640
xml_name.setEmpty();
1641
xml_value.setEmpty();
1643
ok = internalCloseNode("]]>", true);
1645
case XML_OP_1_END_COMMENT:
1646
ok = internalOpenNode("!--");
1647
xml_name.setEmpty();
1648
xml_value.setEmpty();
1650
ok = internalCloseNode("-->", true);
1655
switch (op & XML_DATA_MASK) {
1658
ok = xml_name.addChars(getDataLen(), getDataPtr(), true, this);
1660
case XML_DATA_CDATA:
1661
case XML_DATA_CDATA_TAG:
1663
case XML_DATA_VALUE:
1664
ok = xml_value.addChars(getDataLen(), getDataPtr(), false, this);
1669
switch (op & XML_OP_2_MASK) {
1672
case XML_OP_2_END_TAG:
1674
case XML_OP_2_END_CLOSE_TAG:
1676
case XML_OP_2_END_EMPTY_TAG:
1677
ok = internalCloseNode("/>", true);
1679
case XML_OP_2_END_PI_TAG:
1680
ok = internalCloseNode("?>", true);
1682
case XML_OP_2_END_ENTITY_TAG:
1683
ok = internalCloseNode(">", true);
1685
case XML_OP_2_END_BRACKET_TAG:
1686
ok = internalCloseNode("]>", true);
1688
case XML_OP_2_END_UNKNOWN_TAG:
1689
ok = internalCloseNode(">", true);
1691
case XML_OP_2_START_CDATA_TAG:
1693
case XML_OP_2_START_COMMENT:
1702
xml_value.setNull();
1706
/* ------------------------------------------------------------------- */
1709
bool CSXMLPrint::openNode(char *path, char *value)
1711
printf("OPEN %s\n", path);
1712
if (value && *value)
1713
printf(" %s\n", value);
1717
bool CSXMLPrint::closeNode(char *path)
1719
printf("close %s\n", path);
1723
bool CSXMLPrint::addAttribute(char *path, char *name, char *value)
1726
printf("attr %s %s=%s\n", path, name, value);
1728
printf("attr %s %s\n", path, name);
1732
/* ------------------------------------------------------------------- */
1735
bool CSXMLBuffer::parseString(const char *data, int32_t my_flags)
1738
dataLen = strlen(data);
1740
return parseXML(my_flags);
1743
bool CSXMLBuffer::parseData(const char *data, size_t len, int32_t my_flags)
1748
return parseXML(my_flags);
1751
bool CSXMLBuffer::getChar(wchar_t *ch)
1753
if (dataPos == dataLen)
1754
*ch = CS_XML_EOF_CHAR;
1756
*ch = (wchar_t) (unsigned char) charData[dataPos];
1762
/* ------------------------------------------------------------------- */
1765
bool CSXMLFile::parseFile(char *file_name, int32_t my_flags)
1769
if (!(this->file = fopen(file_name, "r"))) {
1770
setError(errno, NULL);
1773
ok = parseXML(my_flags);
1778
bool CSXMLFile::getChar(wchar_t *ch)
1782
next_ch = fgetc(file);
1783
if (next_ch == EOF) {
1785
setError(errno, NULL);
1788
*ch = CS_XML_EOF_CHAR;
1791
*ch = (wchar_t) next_ch;