~drizzle-trunk/drizzle/development

1548.2.11 by Barry.Leslie at PrimeBase
Removed libxml reqirement by using a home grown xml parser.
1
/* Copyright (c) 2010 PrimeBase Technologies GmbH, Germany
2
 *
3
 * PrimeBase Media Stream for MySQL
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
 *
19
 * Paul McCullagh (H&G2JCtL)
20
 *
21
 * 2010-01-12
22
 *
23
 * CORE SYSTEM:
24
 * XML Parsing
25
 *
26
 */
27
28
#include <inttypes.h>
29
#include <wchar.h>
30
31
#ifndef __CSXML_H__
32
#define __CSXML_H__
33
34
#define CS_XML_ERR_OUT_OF_MEMORY		-1
35
#define CS_XML_ERR_CHAR_TOO_LARGE		-2
36
37
#define CS_XML_EOF_CHAR					WCHAR_MAX
38
39
#define CS_MAX_XML_NAME_SIZE			48
40
#define CS_XML_ERR_MSG_SIZE				128
41
42
/* pxml.h 23.3.01 Paul McCullagh */
43
/* Parse XML */
44
/* Entities understood by XML:
45
   &gt;		(>)
46
   &lt;		(<)
47
   &amp;	(&)
48
   &apos;	(')
49
   &quot;	(")
50
51
   Processing Instructions		<? ... ?>
52
   CDATA Sections				<![CDATA[ ... ]]>
53
   Document Type Definition		<!DOCTYPE ... [ ...markup... ] >
54
   Conditional Sections			<![ ... [ ...markup... ]]>
55
 */
56
57
#define XML_BEFORE_CDATA				0		/* XXX */
58
#define XML_IN_CDATA					1		/* XXX */
59
60
#define XML_LT							2		/* < */
61
#define XML_LT_BANG						3		/* <! */
62
#define XML_LT_BANG_DASH				4		/* <!- */
63
#define XML_LT_BANG_SQR					5		/* <![ */
64
#define XML_LT_BANG_SQR_IN_NAME			6
65
#define XML_LT_BANG_SQR_AFTER_NAME		7
66
67
#define XML_IN_TAG_NAME					8		/* abc */
68
69
#define XML_BEFORE_ATTR					9		/* ' ' */
70
#define XML_IN_ATTR						10		/* xyz */
71
72
#define XML_BEFORE_EQUAL				11		/* ' ' */
73
#define XML_AFTER_EQUAL					12		/* ' ' */
74
75
#define XML_QUOTE_BEFORE_VALUE			13		/* " or ' */
76
#define XML_IN_VALUE					14		/* ... */
77
#define XML_QUOTE_AFTER_VALUE			15		/* " or ' */
78
79
#define XML_SLASH						16		/* / */
80
#define XML_QMARK						17		/* ? */
81
#define XML_SQR							18		/* ] */
82
83
#define XML_IN_COMMENT					19		/* <!--... */
84
#define XML_IN_COMMENT_DASH				20		/* - */
85
#define XML_IN_COMMENT_DASH_DASH		21		/* -- */
86
#define XML_IN_COMMENT_3_DASH			22		/* --- */
87
88
#define XML_IN_CDATA_TAG				23		/* <![CDATA[... */
89
#define XML_IN_CDATA_TAG_SQR			24		/* ] */
90
#define XML_IN_CDATA_TAG_SQR_SQR		25		/* ]] */
91
#define XML_IN_CDATA_TAG_3_SQR			26		/* ]]] */
92
93
#define PARSE_BUFFER_SIZE				20
94
#define PARSE_STACK_SIZE				200
95
96
#define END_TAG_TYPE(x)					(x->nesting-1 < PARSE_STACK_SIZE ? x->end_type[x->nesting-1] : XML_OP_1_END_UNKNOWN_TAG)
97
98
#define TO_LONG_CHAR(ch)				((unsigned char) (ch))
99
100
#define XML_STEP_NONE					0
101
#define XML_STEP_TAG					1
102
#define XML_STEP_ATTR					2
103
#define XML_STEP_VALUE					3
104
#define XML_STEP_NESTED					4
105
106
class CSXMLParser {
107
	public:
108
	CSXMLParser() :
109
		state(0),
110
		quote(0),
111
		step(0),
112
		type(0),
113
		count(0),
114
		nesting(0) {
115
	}
116
	virtual ~CSXMLParser() { }
117
118
	int32_t parseChar(wchar_t ch);
119
	void setDataType(int32_t t) { type = t; }
120
	int32_t getDataLen() { return count; }
121
	wchar_t *getDataPtr() { return buffer; }
122
123
	private:
124
	/* Internal information: */
125
	int32_t			state;
126
	int32_t			quote;
127
	int32_t			step;
128
129
	/* Data: output is always in the buffer: */
130
	int32_t			type;							/* Type of data in the buffer. */
131
	int32_t			count;							/* Size of the buffer.  */
132
	wchar_t			buffer[PARSE_BUFFER_SIZE];		/* Contains data to be added. */
133
134
	/* Signals: tag start and end: */
135
	int32_t			nesting;						/* Tag nesting depth. */
136
	uint8_t			end_type[PARSE_STACK_SIZE];		/* Stack of tag types */
137
138
	bool match_string(const char *ch);
139
	void increment_nesting(wchar_t ch);
140
};
141
142
#define XML_OP_1_MASK					0x0000000F
143
#define XML_ERROR						0x00001000
144
145
#define XML_OP_1_NOOP					0x00000000
146
#define XML_OP_1_END_TAG				0x00000001		/* < ... >   */
147
#define XML_OP_1_END_CLOSE_TAG			0x00000002		/* </ ... >  */
148
#define XML_OP_1_END_EMPTY_TAG			0x00000003		/* < ... />  */
149
#define XML_OP_1_END_PI_TAG				0x00000004		/* <? ... ?> */
150
#define XML_OP_1_END_ENTITY_TAG			0x00000005		/* <! ... >  */
151
#define XML_OP_1_END_BRACKET_TAG		0x00000006		/* <![ ... ]> */
152
#define XML_OP_1_END_UNKNOWN_TAG		0x00000007		/* <_ ... > */
153
#define XML_OP_1_START_CDATA_TAG		0x00000008		/* <![CDATA[ ... */
154
#define XML_OP_1_START_COMMENT			0x00000009		/* <!-- ... */
155
#define XML_OP_1_START_TAG				0x0000000A		/* <... */
156
#define XML_OP_1_ADD_ATTR				0x0000000B
157
#define XML_OP_1_END_CDATA				0x0000000C
158
#define XML_OP_1_END_CDATA_TAG			0x0000000D		/* ... ]]> */
159
#define XML_OP_1_END_COMMENT			0x0000000E		/* ... --> */
160
161
#define XML_DATA_MASK					0x000000F0
162
163
#define XML_NO_DATA						0x00000000
164
#define XML_DATA_TAG					0x00000010
165
#define XML_DATA_ATTR					0x00000020
166
#define XML_DATA_CDATA					0x00000030
167
#define XML_DATA_CDATA_TAG				0x00000040
168
#define XML_COMMENT						0x00000050
169
#define XML_DATA_VALUE					0x00000060
170
171
#define XML_OP_2_MASK					0x00000F00
172
173
#define XML_OP_2_NOOP					0x00000000
174
#define XML_OP_2_END_TAG				0x00000100
175
#define XML_OP_2_END_CLOSE_TAG			0x00000200
176
#define XML_OP_2_END_EMPTY_TAG			0x00000300
177
#define XML_OP_2_END_PI_TAG				0x00000400
178
#define XML_OP_2_END_ENTITY_TAG			0x00000500
179
#define XML_OP_2_END_BRACKET_TAG		0x00000600
180
#define XML_OP_2_END_UNKNOWN_TAG		0x00000700
181
#define XML_OP_2_START_CDATA_TAG		0x00000800
182
#define XML_OP_2_START_COMMENT			0x00000900
183
184
#define XML_noop						(XML_OP_2_NOOP|XML_NO_DATA)
185
186
#define XML_CDATA_CH					(XML_DATA_CDATA)
187
#define XML_end_cdata_TAG_CH			(XML_OP_1_END_CDATA|XML_DATA_TAG)
188
#define XML_start_tag_TAG_CH			(XML_OP_1_START_TAG|XML_DATA_TAG)
189
#define XML_add_attr_TAG_CH				(XML_OP_1_ADD_ATTR|XML_DATA_TAG)
190
#define XML_TAG_CH						(XML_DATA_TAG)
191
#define XML_start_tag_ATTR_CH			(XML_OP_1_START_TAG|XML_DATA_ATTR)
192
#define XML_add_attr_ATTR_CH			(XML_OP_1_ADD_ATTR|XML_DATA_ATTR)
193
#define XML_ATTR_CH						(XML_DATA_ATTR)
194
#define XML_start_tag_VALUE_CH			(XML_OP_1_START_TAG|XML_DATA_VALUE)
195
#define XML_add_attr_VALUE_CH			(XML_OP_1_ADD_ATTR|XML_DATA_VALUE)
196
#define XML_VALUE_CH					(XML_DATA_VALUE)
197
#define XML_start_tag_end_tag(x)		(XML_OP_1_START_TAG|((x) << 8))
198
#define XML_add_attr_end_tag(x)			(XML_OP_1_ADD_ATTR|((x) << 8))
199
#define XML_end_tag(x)					(x)
200
#define XML_start_tag_end_empty_tag		XML_start_tag_end_tag(XML_OP_1_END_EMPTY_TAG)
201
#define XML_add_attr_end_empty_tag		XML_add_attr_end_tag(XML_OP_1_END_EMPTY_TAG)
202
#define XML_end_empty_tag				XML_end_tag(XML_OP_1_END_EMPTY_TAG)
203
#define XML_start_tag_end_pi_tag		XML_start_tag_end_tag(XML_OP_1_END_PI_TAG)
204
#define XML_add_attr_end_pi_tag			XML_add_attr_end_tag(XML_OP_1_END_PI_TAG)
205
#define XML_end_pi_tag					XML_end_tag(XML_OP_1_END_PI_TAG)
206
207
#define XML_end_cdata_start_cdata_tag	(XML_OP_1_END_CDATA|XML_OP_2_START_CDATA_TAG)
208
#define XML_start_tag_start_cdata_tag	(XML_OP_1_START_TAG|XML_OP_2_START_CDATA_TAG)
209
#define XML_add_attr_start_cdata_tag	(XML_OP_1_ADD_ATTR|XML_OP_2_START_CDATA_TAG)
210
#define XML_start_cdata_tag				(XML_OP_1_START_CDATA_TAG)
211
#define XML_CDATA_TAG_CH				(XML_DATA_CDATA_TAG)
212
#define XML_end_cdata_tag				(XML_OP_1_END_CDATA_TAG)
213
214
#define XML_end_cdata_start_comment		(XML_OP_1_END_CDATA|XML_OP_2_START_COMMENT)
215
#define XML_start_tag_start_comment		(XML_OP_1_START_TAG|XML_OP_2_START_COMMENT)
216
#define XML_add_attr_start_comment		(XML_OP_1_ADD_ATTR|XML_OP_2_START_COMMENT)
217
#define XML_start_comment				(XML_OP_1_START_COMMENT)
218
#define XML_COMMENT_CH					(XML_COMMENT)
219
#define XML_end_comment					(XML_OP_1_END_COMMENT)
220
221
/* Standard charsets are ISO-8879-1, US-ASCII or UNICODE. None
222
 * require conversion!
223
 */
224
#define CHARSET_STANDARD				0
225
#define CHARSET_UTF_8					1
226
#define CHARSET_TO_CONVERT_8_BIT		2
227
228
class CSXMLProcessor : public CSXMLParser {
229
	public:
230
	CSXMLProcessor() :
231
		err_no(0),
232
		ip(false),
233
		tlength(0),
234
		nlength(0),
235
		vlength(0),
236
		utf8_count(0),
237
		utf8_length(0),
238
		elength(0) {
239
		err_message[0] = 0;
240
		charset[0] = 0;
241
		pr_tag[0] = 0;
242
		pr_name[0] = 0;
243
		pr_value[0] = 0;
244
		utf8_buffer[0] = 0;
245
		entity[0] = 0;
246
	}
247
	virtual ~CSXMLProcessor() { }
248
249
	/* This function processes a UNICODE character from an XML
250
	 * document returns parsing instructions (operations).
251
	 * Each instruction can consist of up to 3 operations. The
252
	 * operations must be executed in the following order:
253
	 * - Operation 1
254
	 * - Data operation, record one of the following:
255
	 *   - part of a tag name
256
	 *   - part of an attribute name
257
	 *   - part of an attribute value
258
	 *   - part of CDATA
259
	 * - Operation 2
260
	 * Output for the data operation (if any) is placed in the buffer
261
	 * in the state structure. The input state structure must be zeroed
262
	 * before processing begins. Input characters may be 1 byte or
263
	 * 2 byte. Output is always 2-byte UNICODE.
264
	 */
265
	int32_t processChar(wchar_t ch);
266
267
	bool getError(int32_t *err, char **msg);
268
	void setError(int32_t err, char *msg);
269
	void printError(char *prefix);
270
271
	private:
272
	int32_t			err_no;
273
	char			err_message[CS_XML_ERR_MSG_SIZE];
274
275
	private:
276
	/* When this function is called, use the name of the charset.
277
	 * to build the conversion table which maps characters in the
278
	 * range 128 to 255 to the unicode eqivalent.
279
	 */
280
	virtual bool buildConversionTable();
281
282
	int32_t			charset_type;
283
	char			charset[CS_MAX_XML_NAME_SIZE];
284
	wchar_t			conversion_table[128];
285
286
	bool			ip;
287
	size_t			tlength;
288
	char			pr_tag[CS_MAX_XML_NAME_SIZE];
289
	size_t			nlength;
290
	char			pr_name[CS_MAX_XML_NAME_SIZE];
291
	size_t			vlength;
292
	char			pr_value[CS_MAX_XML_NAME_SIZE];
293
294
	int32_t			utf8_count;
295
	int32_t			utf8_length;
296
	uint32_t		utf8_buffer[6];
297
298
	int32_t			elength;
299
	char			entity[CS_MAX_XML_NAME_SIZE];
300
301
	int32_t capture_initializer(wchar_t ch);
302
	int32_t entity_translator(wchar_t ch);
303
	int32_t charset_transformer(wchar_t ch);
304
	void appendWCharToString(char *dstr, size_t *dlen, size_t dsize, wchar_t *schars, size_t slen);
305
};
306
307
/* path is a / separated list of nodes to date. */
308
/* Name and path are given in lower-case!!! */
309
310
#define XML_KEEP_EMPTY_CDATA	1
311
312
class CSXMLString {
313
	public:
314
	CSXMLString() : stringPtr(NULL), stringLen(0), stringSize(0) {}
315
	virtual ~CSXMLString() { }
316
317
	public:
318
	bool addChar(char ch, CSXMLProcessor *xml);
319
	bool addChars(size_t size, wchar_t *buffer, bool to_lower, CSXMLProcessor *xml);
320
	bool addString(const char *string, CSXMLProcessor *xml);
321
	void setEmpty();
322
	void setNull();
323
	char *lastComponent();
324
	char *findTrailingComponent(const char *comp);
325
	void truncate(char *ptr);
326
327
	char			*stringPtr;
328
	size_t			stringLen;
329
	size_t			stringSize;
330
};
331
332
class CSXML : public CSXMLProcessor {
333
	public:
334
	bool parseXML(int32_t flags);
335
336
	private:
337
	/*
338
	 * Return CS_XML_EOF_CHAR when there are no more characters.
339
	 */
340
	virtual bool getChar(wchar_t *ch) = 0;
341
342
	/*
343
	 * These methods are called as the input data
344
	 * is parsed.
345
	 */
346
	virtual bool openNode(char *path, char *value) = 0;
347
	virtual bool closeNode(char *path) = 0;
348
	virtual bool addAttribute(char *path, char *name, char *value) = 0;
349
350
	private:
351
	uint32_t		flags;
352
353
	CSXMLString		xml_path;
354
	CSXMLString		xml_name;
355
	CSXMLString		xml_value;
356
357
	int32_t nodeType(char *name);
358
	bool internalCloseNode(const char *name, bool single);
359
	bool internalOpenNode(const char *name);
360
};
361
362
class CSXMLPrint : public CSXML {
363
	private:
364
	virtual bool openNode(char *path, char *value);
365
	virtual bool closeNode(char *path);
366
	virtual bool addAttribute(char *path, char *name, char *value);
367
};
368
369
class CSXMLBuffer : public CSXMLPrint {
370
	public:
371
	bool parseString(const char *data, int32_t flags);
372
	bool parseData(const char *data, size_t len, int32_t flags);
373
374
	private:
375
	virtual bool getChar(wchar_t *ch);
376
377
	private:
378
	const char		*charData;
379
	size_t			dataLen;
380
	size_t			dataPos;
381
};
382
383
class CSXMLFile : public CSXMLPrint {
384
	public:
385
	bool parseFile(char *file_name, int32_t flags);
386
387
	private:
388
	virtual bool getChar(wchar_t *ch);
389
390
	private:
391
	char			*fileName;
392
	FILE			*file;
393
};
394
395
#endif