1
/* Copyright (c) 2010 PrimeBase Technologies GmbH, Germany
3
* PrimeBase Media Stream for MySQL
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* Paul McCullagh (H&G2JCtL)
34
#define CS_XML_ERR_OUT_OF_MEMORY -1
35
#define CS_XML_ERR_CHAR_TOO_LARGE -2
37
#define CS_XML_EOF_CHAR WCHAR_MAX
39
#define CS_MAX_XML_NAME_SIZE 48
40
#define CS_XML_ERR_MSG_SIZE 128
42
/* pxml.h 23.3.01 Paul McCullagh */
44
/* Entities understood by XML:
51
Processing Instructions <? ... ?>
52
CDATA Sections <![CDATA[ ... ]]>
53
Document Type Definition <!DOCTYPE ... [ ...markup... ] >
54
Conditional Sections <![ ... [ ...markup... ]]>
57
#define XML_BEFORE_CDATA 0 /* XXX */
58
#define XML_IN_CDATA 1 /* XXX */
60
#define XML_LT 2 /* < */
61
#define XML_LT_BANG 3 /* <! */
62
#define XML_LT_BANG_DASH 4 /* <!- */
63
#define XML_LT_BANG_SQR 5 /* <![ */
64
#define XML_LT_BANG_SQR_IN_NAME 6
65
#define XML_LT_BANG_SQR_AFTER_NAME 7
67
#define XML_IN_TAG_NAME 8 /* abc */
69
#define XML_BEFORE_ATTR 9 /* ' ' */
70
#define XML_IN_ATTR 10 /* xyz */
72
#define XML_BEFORE_EQUAL 11 /* ' ' */
73
#define XML_AFTER_EQUAL 12 /* ' ' */
75
#define XML_QUOTE_BEFORE_VALUE 13 /* " or ' */
76
#define XML_IN_VALUE 14 /* ... */
77
#define XML_QUOTE_AFTER_VALUE 15 /* " or ' */
79
#define XML_SLASH 16 /* / */
80
#define XML_QMARK 17 /* ? */
81
#define XML_SQR 18 /* ] */
83
#define XML_IN_COMMENT 19 /* <!--... */
84
#define XML_IN_COMMENT_DASH 20 /* - */
85
#define XML_IN_COMMENT_DASH_DASH 21 /* -- */
86
#define XML_IN_COMMENT_3_DASH 22 /* --- */
88
#define XML_IN_CDATA_TAG 23 /* <![CDATA[... */
89
#define XML_IN_CDATA_TAG_SQR 24 /* ] */
90
#define XML_IN_CDATA_TAG_SQR_SQR 25 /* ]] */
91
#define XML_IN_CDATA_TAG_3_SQR 26 /* ]]] */
93
#define PARSE_BUFFER_SIZE 20
94
#define PARSE_STACK_SIZE 200
96
#define END_TAG_TYPE(x) (x->nesting-1 < PARSE_STACK_SIZE ? x->end_type[x->nesting-1] : XML_OP_1_END_UNKNOWN_TAG)
98
#define TO_LONG_CHAR(ch) ((unsigned char) (ch))
100
#define XML_STEP_NONE 0
101
#define XML_STEP_TAG 1
102
#define XML_STEP_ATTR 2
103
#define XML_STEP_VALUE 3
104
#define XML_STEP_NESTED 4
116
virtual ~CSXMLParser() { }
118
int32_t parseChar(wchar_t ch);
119
void setDataType(int32_t t) { type = t; }
120
int32_t getDataLen() { return count; }
121
wchar_t *getDataPtr() { return buffer; }
124
/* Internal information: */
129
/* Data: output is always in the buffer: */
130
int32_t type; /* Type of data in the buffer. */
131
int32_t count; /* Size of the buffer. */
132
wchar_t buffer[PARSE_BUFFER_SIZE]; /* Contains data to be added. */
134
/* Signals: tag start and end: */
135
int32_t nesting; /* Tag nesting depth. */
136
uint8_t end_type[PARSE_STACK_SIZE]; /* Stack of tag types */
138
bool match_string(const char *ch);
139
void increment_nesting(wchar_t ch);
142
#define XML_OP_1_MASK 0x0000000F
143
#define XML_ERROR 0x00001000
145
#define XML_OP_1_NOOP 0x00000000
146
#define XML_OP_1_END_TAG 0x00000001 /* < ... > */
147
#define XML_OP_1_END_CLOSE_TAG 0x00000002 /* </ ... > */
148
#define XML_OP_1_END_EMPTY_TAG 0x00000003 /* < ... /> */
149
#define XML_OP_1_END_PI_TAG 0x00000004 /* <? ... ?> */
150
#define XML_OP_1_END_ENTITY_TAG 0x00000005 /* <! ... > */
151
#define XML_OP_1_END_BRACKET_TAG 0x00000006 /* <![ ... ]> */
152
#define XML_OP_1_END_UNKNOWN_TAG 0x00000007 /* <_ ... > */
153
#define XML_OP_1_START_CDATA_TAG 0x00000008 /* <![CDATA[ ... */
154
#define XML_OP_1_START_COMMENT 0x00000009 /* <!-- ... */
155
#define XML_OP_1_START_TAG 0x0000000A /* <... */
156
#define XML_OP_1_ADD_ATTR 0x0000000B
157
#define XML_OP_1_END_CDATA 0x0000000C
158
#define XML_OP_1_END_CDATA_TAG 0x0000000D /* ... ]]> */
159
#define XML_OP_1_END_COMMENT 0x0000000E /* ... --> */
161
#define XML_DATA_MASK 0x000000F0
163
#define XML_NO_DATA 0x00000000
164
#define XML_DATA_TAG 0x00000010
165
#define XML_DATA_ATTR 0x00000020
166
#define XML_DATA_CDATA 0x00000030
167
#define XML_DATA_CDATA_TAG 0x00000040
168
#define XML_COMMENT 0x00000050
169
#define XML_DATA_VALUE 0x00000060
171
#define XML_OP_2_MASK 0x00000F00
173
#define XML_OP_2_NOOP 0x00000000
174
#define XML_OP_2_END_TAG 0x00000100
175
#define XML_OP_2_END_CLOSE_TAG 0x00000200
176
#define XML_OP_2_END_EMPTY_TAG 0x00000300
177
#define XML_OP_2_END_PI_TAG 0x00000400
178
#define XML_OP_2_END_ENTITY_TAG 0x00000500
179
#define XML_OP_2_END_BRACKET_TAG 0x00000600
180
#define XML_OP_2_END_UNKNOWN_TAG 0x00000700
181
#define XML_OP_2_START_CDATA_TAG 0x00000800
182
#define XML_OP_2_START_COMMENT 0x00000900
184
#define XML_noop (XML_OP_2_NOOP|XML_NO_DATA)
186
#define XML_CDATA_CH (XML_DATA_CDATA)
187
#define XML_end_cdata_TAG_CH (XML_OP_1_END_CDATA|XML_DATA_TAG)
188
#define XML_start_tag_TAG_CH (XML_OP_1_START_TAG|XML_DATA_TAG)
189
#define XML_add_attr_TAG_CH (XML_OP_1_ADD_ATTR|XML_DATA_TAG)
190
#define XML_TAG_CH (XML_DATA_TAG)
191
#define XML_start_tag_ATTR_CH (XML_OP_1_START_TAG|XML_DATA_ATTR)
192
#define XML_add_attr_ATTR_CH (XML_OP_1_ADD_ATTR|XML_DATA_ATTR)
193
#define XML_ATTR_CH (XML_DATA_ATTR)
194
#define XML_start_tag_VALUE_CH (XML_OP_1_START_TAG|XML_DATA_VALUE)
195
#define XML_add_attr_VALUE_CH (XML_OP_1_ADD_ATTR|XML_DATA_VALUE)
196
#define XML_VALUE_CH (XML_DATA_VALUE)
197
#define XML_start_tag_end_tag(x) (XML_OP_1_START_TAG|((x) << 8))
198
#define XML_add_attr_end_tag(x) (XML_OP_1_ADD_ATTR|((x) << 8))
199
#define XML_end_tag(x) (x)
200
#define XML_start_tag_end_empty_tag XML_start_tag_end_tag(XML_OP_1_END_EMPTY_TAG)
201
#define XML_add_attr_end_empty_tag XML_add_attr_end_tag(XML_OP_1_END_EMPTY_TAG)
202
#define XML_end_empty_tag XML_end_tag(XML_OP_1_END_EMPTY_TAG)
203
#define XML_start_tag_end_pi_tag XML_start_tag_end_tag(XML_OP_1_END_PI_TAG)
204
#define XML_add_attr_end_pi_tag XML_add_attr_end_tag(XML_OP_1_END_PI_TAG)
205
#define XML_end_pi_tag XML_end_tag(XML_OP_1_END_PI_TAG)
207
#define XML_end_cdata_start_cdata_tag (XML_OP_1_END_CDATA|XML_OP_2_START_CDATA_TAG)
208
#define XML_start_tag_start_cdata_tag (XML_OP_1_START_TAG|XML_OP_2_START_CDATA_TAG)
209
#define XML_add_attr_start_cdata_tag (XML_OP_1_ADD_ATTR|XML_OP_2_START_CDATA_TAG)
210
#define XML_start_cdata_tag (XML_OP_1_START_CDATA_TAG)
211
#define XML_CDATA_TAG_CH (XML_DATA_CDATA_TAG)
212
#define XML_end_cdata_tag (XML_OP_1_END_CDATA_TAG)
214
#define XML_end_cdata_start_comment (XML_OP_1_END_CDATA|XML_OP_2_START_COMMENT)
215
#define XML_start_tag_start_comment (XML_OP_1_START_TAG|XML_OP_2_START_COMMENT)
216
#define XML_add_attr_start_comment (XML_OP_1_ADD_ATTR|XML_OP_2_START_COMMENT)
217
#define XML_start_comment (XML_OP_1_START_COMMENT)
218
#define XML_COMMENT_CH (XML_COMMENT)
219
#define XML_end_comment (XML_OP_1_END_COMMENT)
221
/* Standard charsets are ISO-8879-1, US-ASCII or UNICODE. None
222
* require conversion!
224
#define CHARSET_STANDARD 0
225
#define CHARSET_UTF_8 1
226
#define CHARSET_TO_CONVERT_8_BIT 2
228
class CSXMLProcessor : public CSXMLParser {
247
virtual ~CSXMLProcessor() { }
249
/* This function processes a UNICODE character from an XML
250
* document returns parsing instructions (operations).
251
* Each instruction can consist of up to 3 operations. The
252
* operations must be executed in the following order:
254
* - Data operation, record one of the following:
255
* - part of a tag name
256
* - part of an attribute name
257
* - part of an attribute value
260
* Output for the data operation (if any) is placed in the buffer
261
* in the state structure. The input state structure must be zeroed
262
* before processing begins. Input characters may be 1 byte or
263
* 2 byte. Output is always 2-byte UNICODE.
265
int32_t processChar(wchar_t ch);
267
bool getError(int32_t *err, char **msg);
268
void setError(int32_t err, char *msg);
269
void printError(char *prefix);
273
char err_message[CS_XML_ERR_MSG_SIZE];
276
/* When this function is called, use the name of the charset.
277
* to build the conversion table which maps characters in the
278
* range 128 to 255 to the unicode eqivalent.
280
virtual bool buildConversionTable();
282
int32_t charset_type;
283
char charset[CS_MAX_XML_NAME_SIZE];
284
wchar_t conversion_table[128];
288
char pr_tag[CS_MAX_XML_NAME_SIZE];
290
char pr_name[CS_MAX_XML_NAME_SIZE];
292
char pr_value[CS_MAX_XML_NAME_SIZE];
296
uint32_t utf8_buffer[6];
299
char entity[CS_MAX_XML_NAME_SIZE];
301
int32_t capture_initializer(wchar_t ch);
302
int32_t entity_translator(wchar_t ch);
303
int32_t charset_transformer(wchar_t ch);
304
void appendWCharToString(char *dstr, size_t *dlen, size_t dsize, wchar_t *schars, size_t slen);
307
/* path is a / separated list of nodes to date. */
308
/* Name and path are given in lower-case!!! */
310
#define XML_KEEP_EMPTY_CDATA 1
314
CSXMLString() : stringPtr(NULL), stringLen(0), stringSize(0) {}
315
virtual ~CSXMLString() { }
318
bool addChar(char ch, CSXMLProcessor *xml);
319
bool addChars(size_t size, wchar_t *buffer, bool to_lower, CSXMLProcessor *xml);
320
bool addString(const char *string, CSXMLProcessor *xml);
323
char *lastComponent();
324
char *findTrailingComponent(const char *comp);
325
void truncate(char *ptr);
332
class CSXML : public CSXMLProcessor {
334
bool parseXML(int32_t flags);
338
* Return CS_XML_EOF_CHAR when there are no more characters.
340
virtual bool getChar(wchar_t *ch) = 0;
343
* These methods are called as the input data
346
virtual bool openNode(char *path, char *value) = 0;
347
virtual bool closeNode(char *path) = 0;
348
virtual bool addAttribute(char *path, char *name, char *value) = 0;
353
CSXMLString xml_path;
354
CSXMLString xml_name;
355
CSXMLString xml_value;
357
int32_t nodeType(char *name);
358
bool internalCloseNode(const char *name, bool single);
359
bool internalOpenNode(const char *name);
362
class CSXMLPrint : public CSXML {
364
virtual bool openNode(char *path, char *value);
365
virtual bool closeNode(char *path);
366
virtual bool addAttribute(char *path, char *name, char *value);
369
class CSXMLBuffer : public CSXMLPrint {
371
bool parseString(const char *data, int32_t flags);
372
bool parseData(const char *data, size_t len, int32_t flags);
375
virtual bool getChar(wchar_t *ch);
378
const char *charData;
383
class CSXMLFile : public CSXMLPrint {
385
bool parseFile(char *file_name, int32_t flags);
388
virtual bool getChar(wchar_t *ch);