~drizzle-trunk/drizzle/development

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
/* Copyright (C) 2010 PrimeBase Technologies GmbH, Germany
 *
 * PrimeBase Media Stream for MySQL
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 *
 * Paul McCullagh (H&G2JCtL)
 *
 * 2010-01-12
 *
 * CORE SYSTEM:
 * XML Parsing
 *
 */

#include <inttypes.h>
#include <wchar.h>

#ifndef __CSXML_H__
#define __CSXML_H__

#define CS_XML_ERR_OUT_OF_MEMORY		-1
#define CS_XML_ERR_CHAR_TOO_LARGE		-2

#define CS_XML_EOF_CHAR					WCHAR_MAX

#define CS_MAX_XML_NAME_SIZE			48
#define CS_XML_ERR_MSG_SIZE				128

/* pxml.h 23.3.01 Paul McCullagh */
/* Parse XML */
/* Entities understood by XML:
   &gt;		(>)
   &lt;		(<)
   &amp;	(&)
   &apos;	(')
   &quot;	(")

   Processing Instructions		<? ... ?>
   CDATA Sections				<![CDATA[ ... ]]>
   Document Type Definition		<!DOCTYPE ... [ ...markup... ] >
   Conditional Sections			<![ ... [ ...markup... ]]>
 */

#define XML_BEFORE_CDATA				0		/* XXX */
#define XML_IN_CDATA					1		/* XXX */

#define XML_LT							2		/* < */
#define XML_LT_BANG						3		/* <! */
#define XML_LT_BANG_DASH				4		/* <!- */
#define XML_LT_BANG_SQR					5		/* <![ */
#define XML_LT_BANG_SQR_IN_NAME			6
#define XML_LT_BANG_SQR_AFTER_NAME		7

#define XML_IN_TAG_NAME					8		/* abc */

#define XML_BEFORE_ATTR					9		/* ' ' */
#define XML_IN_ATTR						10		/* xyz */

#define XML_BEFORE_EQUAL				11		/* ' ' */
#define XML_AFTER_EQUAL					12		/* ' ' */

#define XML_QUOTE_BEFORE_VALUE			13		/* " or ' */
#define XML_IN_VALUE					14		/* ... */
#define XML_QUOTE_AFTER_VALUE			15		/* " or ' */

#define XML_SLASH						16		/* / */
#define XML_QMARK						17		/* ? */
#define XML_SQR							18		/* ] */

#define XML_IN_COMMENT					19		/* <!--... */
#define XML_IN_COMMENT_DASH				20		/* - */
#define XML_IN_COMMENT_DASH_DASH		21		/* -- */
#define XML_IN_COMMENT_3_DASH			22		/* --- */

#define XML_IN_CDATA_TAG				23		/* <![CDATA[... */
#define XML_IN_CDATA_TAG_SQR			24		/* ] */
#define XML_IN_CDATA_TAG_SQR_SQR		25		/* ]] */
#define XML_IN_CDATA_TAG_3_SQR			26		/* ]]] */

#define PARSE_BUFFER_SIZE				20
#define PARSE_STACK_SIZE				200

#define END_TAG_TYPE(x)					(x->nesting-1 < PARSE_STACK_SIZE ? x->end_type[x->nesting-1] : XML_OP_1_END_UNKNOWN_TAG)

#define TO_LONG_CHAR(ch)				((unsigned char) (ch))

#define XML_STEP_NONE					0
#define XML_STEP_TAG					1
#define XML_STEP_ATTR					2
#define XML_STEP_VALUE					3
#define XML_STEP_NESTED					4

class CSXMLParser {
	public:
	CSXMLParser() :
		state(0),
		quote(0),
		step(0),
		type(0),
		count(0),
		nesting(0) {
	}
	virtual ~CSXMLParser() { }

	int32_t parseChar(wchar_t ch);
	void setDataType(int32_t t) { type = t; }
	int32_t getDataLen() { return count; }
	wchar_t *getDataPtr() { return buffer; }

	private:
	/* Internal information: */
	int32_t			state;
	int32_t			quote;
	int32_t			step;

	/* Data: output is always in the buffer: */
	int32_t			type;							/* Type of data in the buffer. */
	int32_t			count;							/* Size of the buffer.  */
	wchar_t			buffer[PARSE_BUFFER_SIZE];		/* Contains data to be added. */

	/* Signals: tag start and end: */
	int32_t			nesting;						/* Tag nesting depth. */
	uint8_t			end_type[PARSE_STACK_SIZE];		/* Stack of tag types */

	bool match_string(const char *ch);
	void increment_nesting(wchar_t ch);
};

#define XML_OP_1_MASK					0x0000000F
#define XML_ERROR						0x00001000

#define XML_OP_1_NOOP					0x00000000
#define XML_OP_1_END_TAG				0x00000001		/* < ... >   */
#define XML_OP_1_END_CLOSE_TAG			0x00000002		/* </ ... >  */
#define XML_OP_1_END_EMPTY_TAG			0x00000003		/* < ... />  */
#define XML_OP_1_END_PI_TAG				0x00000004		/* <? ... ?> */
#define XML_OP_1_END_ENTITY_TAG			0x00000005		/* <! ... >  */
#define XML_OP_1_END_BRACKET_TAG		0x00000006		/* <![ ... ]> */
#define XML_OP_1_END_UNKNOWN_TAG		0x00000007		/* <_ ... > */
#define XML_OP_1_START_CDATA_TAG		0x00000008		/* <![CDATA[ ... */
#define XML_OP_1_START_COMMENT			0x00000009		/* <!-- ... */
#define XML_OP_1_START_TAG				0x0000000A		/* <... */
#define XML_OP_1_ADD_ATTR				0x0000000B
#define XML_OP_1_END_CDATA				0x0000000C
#define XML_OP_1_END_CDATA_TAG			0x0000000D		/* ... ]]> */
#define XML_OP_1_END_COMMENT			0x0000000E		/* ... --> */

#define XML_DATA_MASK					0x000000F0

#define XML_NO_DATA						0x00000000
#define XML_DATA_TAG					0x00000010
#define XML_DATA_ATTR					0x00000020
#define XML_DATA_CDATA					0x00000030
#define XML_DATA_CDATA_TAG				0x00000040
#define XML_COMMENT						0x00000050
#define XML_DATA_VALUE					0x00000060

#define XML_OP_2_MASK					0x00000F00

#define XML_OP_2_NOOP					0x00000000
#define XML_OP_2_END_TAG				0x00000100
#define XML_OP_2_END_CLOSE_TAG			0x00000200
#define XML_OP_2_END_EMPTY_TAG			0x00000300
#define XML_OP_2_END_PI_TAG				0x00000400
#define XML_OP_2_END_ENTITY_TAG			0x00000500
#define XML_OP_2_END_BRACKET_TAG		0x00000600
#define XML_OP_2_END_UNKNOWN_TAG		0x00000700
#define XML_OP_2_START_CDATA_TAG		0x00000800
#define XML_OP_2_START_COMMENT			0x00000900

#define XML_noop						(XML_OP_2_NOOP|XML_NO_DATA)

#define XML_CDATA_CH					(XML_DATA_CDATA)
#define XML_end_cdata_TAG_CH			(XML_OP_1_END_CDATA|XML_DATA_TAG)
#define XML_start_tag_TAG_CH			(XML_OP_1_START_TAG|XML_DATA_TAG)
#define XML_add_attr_TAG_CH				(XML_OP_1_ADD_ATTR|XML_DATA_TAG)
#define XML_TAG_CH						(XML_DATA_TAG)
#define XML_start_tag_ATTR_CH			(XML_OP_1_START_TAG|XML_DATA_ATTR)
#define XML_add_attr_ATTR_CH			(XML_OP_1_ADD_ATTR|XML_DATA_ATTR)
#define XML_ATTR_CH						(XML_DATA_ATTR)
#define XML_start_tag_VALUE_CH			(XML_OP_1_START_TAG|XML_DATA_VALUE)
#define XML_add_attr_VALUE_CH			(XML_OP_1_ADD_ATTR|XML_DATA_VALUE)
#define XML_VALUE_CH					(XML_DATA_VALUE)
#define XML_start_tag_end_tag(x)		(XML_OP_1_START_TAG|((x) << 8))
#define XML_add_attr_end_tag(x)			(XML_OP_1_ADD_ATTR|((x) << 8))
#define XML_end_tag(x)					(x)
#define XML_start_tag_end_empty_tag		XML_start_tag_end_tag(XML_OP_1_END_EMPTY_TAG)
#define XML_add_attr_end_empty_tag		XML_add_attr_end_tag(XML_OP_1_END_EMPTY_TAG)
#define XML_end_empty_tag				XML_end_tag(XML_OP_1_END_EMPTY_TAG)
#define XML_start_tag_end_pi_tag		XML_start_tag_end_tag(XML_OP_1_END_PI_TAG)
#define XML_add_attr_end_pi_tag			XML_add_attr_end_tag(XML_OP_1_END_PI_TAG)
#define XML_end_pi_tag					XML_end_tag(XML_OP_1_END_PI_TAG)

#define XML_end_cdata_start_cdata_tag	(XML_OP_1_END_CDATA|XML_OP_2_START_CDATA_TAG)
#define XML_start_tag_start_cdata_tag	(XML_OP_1_START_TAG|XML_OP_2_START_CDATA_TAG)
#define XML_add_attr_start_cdata_tag	(XML_OP_1_ADD_ATTR|XML_OP_2_START_CDATA_TAG)
#define XML_start_cdata_tag				(XML_OP_1_START_CDATA_TAG)
#define XML_CDATA_TAG_CH				(XML_DATA_CDATA_TAG)
#define XML_end_cdata_tag				(XML_OP_1_END_CDATA_TAG)

#define XML_end_cdata_start_comment		(XML_OP_1_END_CDATA|XML_OP_2_START_COMMENT)
#define XML_start_tag_start_comment		(XML_OP_1_START_TAG|XML_OP_2_START_COMMENT)
#define XML_add_attr_start_comment		(XML_OP_1_ADD_ATTR|XML_OP_2_START_COMMENT)
#define XML_start_comment				(XML_OP_1_START_COMMENT)
#define XML_COMMENT_CH					(XML_COMMENT)
#define XML_end_comment					(XML_OP_1_END_COMMENT)

/* Standard charsets are ISO-8879-1, US-ASCII or UNICODE. None
 * require conversion!
 */
#define CHARSET_STANDARD				0
#define CHARSET_UTF_8					1
#define CHARSET_TO_CONVERT_8_BIT		2

class CSXMLProcessor : public CSXMLParser {
	public:
	CSXMLProcessor() :
		err_no(0),
		ip(false),
		tlength(0),
		nlength(0),
		vlength(0),
		utf8_count(0),
		utf8_length(0),
		elength(0) {
		err_message[0] = 0;
		charset[0] = 0;
		pr_tag[0] = 0;
		pr_name[0] = 0;
		pr_value[0] = 0;
		utf8_buffer[0] = 0;
		entity[0] = 0;
	}
	virtual ~CSXMLProcessor() { }

	/* This function processes a UNICODE character from an XML
	 * document returns parsing instructions (operations).
	 * Each instruction can consist of up to 3 operations. The
	 * operations must be executed in the following order:
	 * - Operation 1
	 * - Data operation, record one of the following:
	 *   - part of a tag name
	 *   - part of an attribute name
	 *   - part of an attribute value
	 *   - part of CDATA
	 * - Operation 2
	 * Output for the data operation (if any) is placed in the buffer
	 * in the state structure. The input state structure must be zeroed
	 * before processing begins. Input characters may be 1 byte or
	 * 2 byte. Output is always 2-byte UNICODE.
	 */
	int32_t processChar(wchar_t ch);

	bool getError(int32_t *err, char **msg);
	void setError(int32_t err, char *msg);
	void printError(char *prefix);

	private:
	int32_t			err_no;
	char			err_message[CS_XML_ERR_MSG_SIZE];

	private:
	/* When this function is called, use the name of the charset.
	 * to build the conversion table which maps characters in the
	 * range 128 to 255 to the unicode eqivalent.
	 */
	virtual bool buildConversionTable();

	int32_t			charset_type;
	char			charset[CS_MAX_XML_NAME_SIZE];
	wchar_t			conversion_table[128];

	bool			ip;
	size_t			tlength;
	char			pr_tag[CS_MAX_XML_NAME_SIZE];
	size_t			nlength;
	char			pr_name[CS_MAX_XML_NAME_SIZE];
	size_t			vlength;
	char			pr_value[CS_MAX_XML_NAME_SIZE];

	int32_t			utf8_count;
	int32_t			utf8_length;
	uint32_t		utf8_buffer[6];

	int32_t			elength;
	char			entity[CS_MAX_XML_NAME_SIZE];

	int32_t capture_initializer(wchar_t ch);
	int32_t entity_translator(wchar_t ch);
	int32_t charset_transformer(wchar_t ch);
	void appendWCharToString(char *dstr, size_t *dlen, size_t dsize, wchar_t *schars, size_t slen);
};

/* path is a / separated list of nodes to date. */
/* Name and path are given in lower-case!!! */

#define XML_KEEP_EMPTY_CDATA	1

class CSXMLString {
	public:
	CSXMLString() : stringPtr(NULL), stringLen(0), stringSize(0) {}
	virtual ~CSXMLString() { }

	public:
	bool addChar(char ch, CSXMLProcessor *xml);
	bool addChars(size_t size, wchar_t *buffer, bool to_lower, CSXMLProcessor *xml);
	bool addString(const char *string, CSXMLProcessor *xml);
	void setEmpty();
	void setNull();
	char *lastComponent();
	char *findTrailingComponent(const char *comp);
	void truncate(char *ptr);

	char			*stringPtr;
	size_t			stringLen;
	size_t			stringSize;
};

class CSXML : public CSXMLProcessor {
	public:
	bool parseXML(int32_t flags);

	private:
	/*
	 * Return CS_XML_EOF_CHAR when there are no more characters.
	 */
	virtual bool getChar(wchar_t *ch) = 0;

	/*
	 * These methods are called as the input data
	 * is parsed.
	 */
	virtual bool openNode(char *path, char *value) = 0;
	virtual bool closeNode(char *path) = 0;
	virtual bool addAttribute(char *path, char *name, char *value) = 0;

	private:
	uint32_t		flags;

	CSXMLString		xml_path;
	CSXMLString		xml_name;
	CSXMLString		xml_value;

	int32_t nodeType(char *name);
	bool internalCloseNode(const char *name, bool single);
	bool internalOpenNode(const char *name);
};

class CSXMLPrint : public CSXML {
	private:
	virtual bool openNode(char *path, char *value);
	virtual bool closeNode(char *path);
	virtual bool addAttribute(char *path, char *name, char *value);
};

class CSXMLBuffer : public CSXMLPrint {
	public:
	bool parseString(const char *data, int32_t flags);
	bool parseData(const char *data, size_t len, int32_t flags);

	private:
	virtual bool getChar(wchar_t *ch);

	private:
	const char		*charData;
	size_t			dataLen;
	size_t			dataPos;
};

class CSXMLFile : public CSXMLPrint {
	public:
	bool parseFile(char *file_name, int32_t flags);

	private:
	virtual bool getChar(wchar_t *ch);

	private:
	char			*fileName;
	FILE			*file;
};

#endif