1
/* Copyright (C) 2010 PrimeBase Technologies GmbH, Germany
3
* PrimeBase Media Stream for MySQL
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
* Paul McCullagh (H&G2JCtL)
35
#define CS_XML_ERR_OUT_OF_MEMORY -1
36
#define CS_XML_ERR_CHAR_TOO_LARGE -2
38
#define CS_XML_EOF_CHAR WCHAR_MAX
40
#define CS_MAX_XML_NAME_SIZE 48
41
#define CS_XML_ERR_MSG_SIZE 128
43
/* pxml.h 23.3.01 Paul McCullagh */
45
/* Entities understood by XML:
52
Processing Instructions <? ... ?>
53
CDATA Sections <![CDATA[ ... ]]>
54
Document Type Definition <!DOCTYPE ... [ ...markup... ] >
55
Conditional Sections <![ ... [ ...markup... ]]>
58
#define XML_BEFORE_CDATA 0 /* XXX */
59
#define XML_IN_CDATA 1 /* XXX */
61
#define XML_LT 2 /* < */
62
#define XML_LT_BANG 3 /* <! */
63
#define XML_LT_BANG_DASH 4 /* <!- */
64
#define XML_LT_BANG_SQR 5 /* <![ */
65
#define XML_LT_BANG_SQR_IN_NAME 6
66
#define XML_LT_BANG_SQR_AFTER_NAME 7
68
#define XML_IN_TAG_NAME 8 /* abc */
70
#define XML_BEFORE_ATTR 9 /* ' ' */
71
#define XML_IN_ATTR 10 /* xyz */
73
#define XML_BEFORE_EQUAL 11 /* ' ' */
74
#define XML_AFTER_EQUAL 12 /* ' ' */
76
#define XML_QUOTE_BEFORE_VALUE 13 /* " or ' */
77
#define XML_IN_VALUE 14 /* ... */
78
#define XML_QUOTE_AFTER_VALUE 15 /* " or ' */
80
#define XML_SLASH 16 /* / */
81
#define XML_QMARK 17 /* ? */
82
#define XML_SQR 18 /* ] */
84
#define XML_IN_COMMENT 19 /* <!--... */
85
#define XML_IN_COMMENT_DASH 20 /* - */
86
#define XML_IN_COMMENT_DASH_DASH 21 /* -- */
87
#define XML_IN_COMMENT_3_DASH 22 /* --- */
89
#define XML_IN_CDATA_TAG 23 /* <![CDATA[... */
90
#define XML_IN_CDATA_TAG_SQR 24 /* ] */
91
#define XML_IN_CDATA_TAG_SQR_SQR 25 /* ]] */
92
#define XML_IN_CDATA_TAG_3_SQR 26 /* ]]] */
94
#define PARSE_BUFFER_SIZE 20
95
#define PARSE_STACK_SIZE 200
97
#define END_TAG_TYPE(x) (x->nesting-1 < PARSE_STACK_SIZE ? x->end_type[x->nesting-1] : XML_OP_1_END_UNKNOWN_TAG)
99
#define TO_LONG_CHAR(ch) ((unsigned char) (ch))
101
#define XML_STEP_NONE 0
102
#define XML_STEP_TAG 1
103
#define XML_STEP_ATTR 2
104
#define XML_STEP_VALUE 3
105
#define XML_STEP_NESTED 4
117
virtual ~CSXMLParser() { }
119
int32_t parseChar(wchar_t ch);
120
void setDataType(int32_t t) { type = t; }
121
int32_t getDataLen() { return count; }
122
wchar_t *getDataPtr() { return buffer; }
125
/* Internal information: */
130
/* Data: output is always in the buffer: */
131
int32_t type; /* Type of data in the buffer. */
132
int32_t count; /* Size of the buffer. */
133
wchar_t buffer[PARSE_BUFFER_SIZE]; /* Contains data to be added. */
135
/* Signals: tag start and end: */
136
int32_t nesting; /* Tag nesting depth. */
137
uint8_t end_type[PARSE_STACK_SIZE]; /* Stack of tag types */
139
bool match_string(const char *ch);
140
void increment_nesting(wchar_t ch);
143
#define XML_OP_1_MASK 0x0000000F
144
#define XML_ERROR 0x00001000
146
#define XML_OP_1_NOOP 0x00000000
147
#define XML_OP_1_END_TAG 0x00000001 /* < ... > */
148
#define XML_OP_1_END_CLOSE_TAG 0x00000002 /* </ ... > */
149
#define XML_OP_1_END_EMPTY_TAG 0x00000003 /* < ... /> */
150
#define XML_OP_1_END_PI_TAG 0x00000004 /* <? ... ?> */
151
#define XML_OP_1_END_ENTITY_TAG 0x00000005 /* <! ... > */
152
#define XML_OP_1_END_BRACKET_TAG 0x00000006 /* <![ ... ]> */
153
#define XML_OP_1_END_UNKNOWN_TAG 0x00000007 /* <_ ... > */
154
#define XML_OP_1_START_CDATA_TAG 0x00000008 /* <![CDATA[ ... */
155
#define XML_OP_1_START_COMMENT 0x00000009 /* <!-- ... */
156
#define XML_OP_1_START_TAG 0x0000000A /* <... */
157
#define XML_OP_1_ADD_ATTR 0x0000000B
158
#define XML_OP_1_END_CDATA 0x0000000C
159
#define XML_OP_1_END_CDATA_TAG 0x0000000D /* ... ]]> */
160
#define XML_OP_1_END_COMMENT 0x0000000E /* ... --> */
162
#define XML_DATA_MASK 0x000000F0
164
#define XML_NO_DATA 0x00000000
165
#define XML_DATA_TAG 0x00000010
166
#define XML_DATA_ATTR 0x00000020
167
#define XML_DATA_CDATA 0x00000030
168
#define XML_DATA_CDATA_TAG 0x00000040
169
#define XML_COMMENT 0x00000050
170
#define XML_DATA_VALUE 0x00000060
172
#define XML_OP_2_MASK 0x00000F00
174
#define XML_OP_2_NOOP 0x00000000
175
#define XML_OP_2_END_TAG 0x00000100
176
#define XML_OP_2_END_CLOSE_TAG 0x00000200
177
#define XML_OP_2_END_EMPTY_TAG 0x00000300
178
#define XML_OP_2_END_PI_TAG 0x00000400
179
#define XML_OP_2_END_ENTITY_TAG 0x00000500
180
#define XML_OP_2_END_BRACKET_TAG 0x00000600
181
#define XML_OP_2_END_UNKNOWN_TAG 0x00000700
182
#define XML_OP_2_START_CDATA_TAG 0x00000800
183
#define XML_OP_2_START_COMMENT 0x00000900
185
#define XML_noop (XML_OP_2_NOOP|XML_NO_DATA)
187
#define XML_CDATA_CH (XML_DATA_CDATA)
188
#define XML_end_cdata_TAG_CH (XML_OP_1_END_CDATA|XML_DATA_TAG)
189
#define XML_start_tag_TAG_CH (XML_OP_1_START_TAG|XML_DATA_TAG)
190
#define XML_add_attr_TAG_CH (XML_OP_1_ADD_ATTR|XML_DATA_TAG)
191
#define XML_TAG_CH (XML_DATA_TAG)
192
#define XML_start_tag_ATTR_CH (XML_OP_1_START_TAG|XML_DATA_ATTR)
193
#define XML_add_attr_ATTR_CH (XML_OP_1_ADD_ATTR|XML_DATA_ATTR)
194
#define XML_ATTR_CH (XML_DATA_ATTR)
195
#define XML_start_tag_VALUE_CH (XML_OP_1_START_TAG|XML_DATA_VALUE)
196
#define XML_add_attr_VALUE_CH (XML_OP_1_ADD_ATTR|XML_DATA_VALUE)
197
#define XML_VALUE_CH (XML_DATA_VALUE)
198
#define XML_start_tag_end_tag(x) (XML_OP_1_START_TAG|((x) << 8))
199
#define XML_add_attr_end_tag(x) (XML_OP_1_ADD_ATTR|((x) << 8))
200
#define XML_end_tag(x) (x)
201
#define XML_start_tag_end_empty_tag XML_start_tag_end_tag(XML_OP_1_END_EMPTY_TAG)
202
#define XML_add_attr_end_empty_tag XML_add_attr_end_tag(XML_OP_1_END_EMPTY_TAG)
203
#define XML_end_empty_tag XML_end_tag(XML_OP_1_END_EMPTY_TAG)
204
#define XML_start_tag_end_pi_tag XML_start_tag_end_tag(XML_OP_1_END_PI_TAG)
205
#define XML_add_attr_end_pi_tag XML_add_attr_end_tag(XML_OP_1_END_PI_TAG)
206
#define XML_end_pi_tag XML_end_tag(XML_OP_1_END_PI_TAG)
208
#define XML_end_cdata_start_cdata_tag (XML_OP_1_END_CDATA|XML_OP_2_START_CDATA_TAG)
209
#define XML_start_tag_start_cdata_tag (XML_OP_1_START_TAG|XML_OP_2_START_CDATA_TAG)
210
#define XML_add_attr_start_cdata_tag (XML_OP_1_ADD_ATTR|XML_OP_2_START_CDATA_TAG)
211
#define XML_start_cdata_tag (XML_OP_1_START_CDATA_TAG)
212
#define XML_CDATA_TAG_CH (XML_DATA_CDATA_TAG)
213
#define XML_end_cdata_tag (XML_OP_1_END_CDATA_TAG)
215
#define XML_end_cdata_start_comment (XML_OP_1_END_CDATA|XML_OP_2_START_COMMENT)
216
#define XML_start_tag_start_comment (XML_OP_1_START_TAG|XML_OP_2_START_COMMENT)
217
#define XML_add_attr_start_comment (XML_OP_1_ADD_ATTR|XML_OP_2_START_COMMENT)
218
#define XML_start_comment (XML_OP_1_START_COMMENT)
219
#define XML_COMMENT_CH (XML_COMMENT)
220
#define XML_end_comment (XML_OP_1_END_COMMENT)
222
/* Standard charsets are ISO-8879-1, US-ASCII or UNICODE. None
223
* require conversion!
225
#define CHARSET_STANDARD 0
226
#define CHARSET_UTF_8 1
227
#define CHARSET_TO_CONVERT_8_BIT 2
229
class CSXMLProcessor : public CSXMLParser {
248
virtual ~CSXMLProcessor() { }
250
/* This function processes a UNICODE character from an XML
251
* document returns parsing instructions (operations).
252
* Each instruction can consist of up to 3 operations. The
253
* operations must be executed in the following order:
255
* - Data operation, record one of the following:
256
* - part of a tag name
257
* - part of an attribute name
258
* - part of an attribute value
261
* Output for the data operation (if any) is placed in the buffer
262
* in the state structure. The input state structure must be zeroed
263
* before processing begins. Input characters may be 1 byte or
264
* 2 byte. Output is always 2-byte UNICODE.
266
int32_t processChar(wchar_t ch);
268
bool getError(int32_t *err, char **msg);
269
void setError(int32_t err, char *msg);
270
void printError(char *prefix);
274
char err_message[CS_XML_ERR_MSG_SIZE];
277
/* When this function is called, use the name of the charset.
278
* to build the conversion table which maps characters in the
279
* range 128 to 255 to the unicode eqivalent.
281
virtual bool buildConversionTable();
283
int32_t charset_type;
284
char charset[CS_MAX_XML_NAME_SIZE];
285
wchar_t conversion_table[128];
289
char pr_tag[CS_MAX_XML_NAME_SIZE];
291
char pr_name[CS_MAX_XML_NAME_SIZE];
293
char pr_value[CS_MAX_XML_NAME_SIZE];
297
uint32_t utf8_buffer[6];
300
char entity[CS_MAX_XML_NAME_SIZE];
302
int32_t capture_initializer(wchar_t ch);
303
int32_t entity_translator(wchar_t ch);
304
int32_t charset_transformer(wchar_t ch);
305
void appendWCharToString(char *dstr, size_t *dlen, size_t dsize, wchar_t *schars, size_t slen);
308
/* path is a / separated list of nodes to date. */
309
/* Name and path are given in lower-case!!! */
311
#define XML_KEEP_EMPTY_CDATA 1
315
CSXMLString() : stringPtr(NULL), stringLen(0), stringSize(0) {}
316
virtual ~CSXMLString() { }
319
bool addChar(char ch, CSXMLProcessor *xml);
320
bool addChars(size_t size, wchar_t *buffer, bool to_lower, CSXMLProcessor *xml);
321
bool addString(const char *string, CSXMLProcessor *xml);
324
char *lastComponent();
325
char *findTrailingComponent(const char *comp);
326
void truncate(char *ptr);
333
class CSXML : public CSXMLProcessor {
335
bool parseXML(int32_t flags);
339
* Return CS_XML_EOF_CHAR when there are no more characters.
341
virtual bool getChar(wchar_t *ch) = 0;
344
* These methods are called as the input data
347
virtual bool openNode(char *path, char *value) = 0;
348
virtual bool closeNode(char *path) = 0;
349
virtual bool addAttribute(char *path, char *name, char *value) = 0;
354
CSXMLString xml_path;
355
CSXMLString xml_name;
356
CSXMLString xml_value;
358
int32_t nodeType(char *name);
359
bool internalCloseNode(const char *name, bool single);
360
bool internalOpenNode(const char *name);
363
class CSXMLPrint : public CSXML {
365
virtual bool openNode(char *path, char *value);
366
virtual bool closeNode(char *path);
367
virtual bool addAttribute(char *path, char *name, char *value);
370
class CSXMLBuffer : public CSXMLPrint {
372
bool parseString(const char *data, int32_t flags);
373
bool parseData(const char *data, size_t len, int32_t flags);
376
virtual bool getChar(wchar_t *ch);
379
const char *charData;
384
class CSXMLFile : public CSXMLPrint {
386
bool parseFile(char *file_name, int32_t flags);
389
virtual bool getChar(wchar_t *ch);