~drizzle-trunk/drizzle/development : contents of drizzled/lex_input

~drizzle-trunk/drizzle/development : (revision 1410.4.2)

/* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
 *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
 *
 *  Copyright (C) 2008 Sun Microsystems
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#ifndef DRIZZLED_LEX_INPUT_STREAM_H
#define DRIZZLED_LEX_INPUT_STREAM_H

/**
  @brief This class represents the character input stream consumed during
  lexical analysis.

  In addition to consuming the input stream, this class performs some
  comment pre processing, by filtering out out of bound special text
  from the query input stream.
  Two buffers, with pointers inside each buffers, are maintained in
  parallel. The 'raw' buffer is the original query text, which may
  contain out-of-bound comments. The 'cpp' (for comments pre processor)
  is the pre-processed buffer that contains only the query text that
  should be seen once out-of-bound data is removed.
*/

namespace drizzled
{

class Lex_input_stream
{
public:
  Lex_input_stream(Session *session, const char* buff, unsigned int length);
  ~Lex_input_stream();

  /**
    Set the echo mode.

    When echo is true, characters parsed from the raw input stream are
    preserved. When false, characters parsed are silently ignored.
    @param echo the echo mode.
  */
  void set_echo(bool echo)
  {
    m_echo= echo;
  }

  /**
    Skip binary from the input stream.
    @param n number of bytes to accept.
  */
  void skip_binary(int n)
  {
    if (m_echo)
    {
      memcpy(m_cpp_ptr, m_ptr, n);
      m_cpp_ptr += n;
    }
    m_ptr += n;
  }

  /**
    Get a character, and advance in the stream.
    @return the next character to parse.
  */
  char yyGet()
  {
    char c= *m_ptr++;
    if (m_echo)
      *m_cpp_ptr++ = c;
    return c;
  }

  /**
    Get the last character accepted.
    @return the last character accepted.
  */
  char yyGetLast()
  {
    return m_ptr[-1];
  }

  /**
    Look at the next character to parse, but do not accept it.
  */
  char yyPeek()
  {
    return m_ptr[0];
  }

  /**
    Look ahead at some character to parse.
    @param n offset of the character to look up
  */
  char yyPeekn(int n)
  {
    return m_ptr[n];
  }

  /**
    Cancel the effect of the last yyGet() or yySkip().
    Note that the echo mode should not change between calls to yyGet / yySkip
    and yyUnget. The caller is responsible for ensuring that.
  */
  void yyUnget()
  {
    m_ptr--;
    if (m_echo)
      m_cpp_ptr--;
  }

  /**
    Accept a character, by advancing the input stream.
  */
  void yySkip()
  {
    if (m_echo)
      *m_cpp_ptr++ = *m_ptr++;
    else
      m_ptr++;
  }

  /**
    Accept multiple characters at once.
    @param n the number of characters to accept.
  */
  void yySkipn(int n)
  {
    if (m_echo)
    {
      memcpy(m_cpp_ptr, m_ptr, n);
      m_cpp_ptr += n;
    }
    m_ptr += n;
  }

  /**
    End of file indicator for the query text to parse.
    @return true if there are no more characters to parse
  */
  bool eof()
  {
    return (m_ptr >= m_end_of_query);
  }

  /**
    End of file indicator for the query text to parse.
    @param n number of characters expected
    @return true if there are less than n characters to parse
  */
  bool eof(int n)
  {
    return ((m_ptr + n) >= m_end_of_query);
  }

  /** Get the raw query buffer. */
  const char *get_buf()
  {
    return m_buf;
  }

  /** Get the pre-processed query buffer. */
  const char *get_cpp_buf()
  {
    return m_cpp_buf;
  }

  /** Get the end of the raw query buffer. */
  const char *get_end_of_query()
  {
    return m_end_of_query;
  }

  /** Mark the stream position as the start of a new token. */
  void start_token()
  {
    m_tok_start_prev= m_tok_start;
    m_tok_start= m_ptr;
    m_tok_end= m_ptr;

    m_cpp_tok_start_prev= m_cpp_tok_start;
    m_cpp_tok_start= m_cpp_ptr;
    m_cpp_tok_end= m_cpp_ptr;
  }

  /**
    Adjust the starting position of the current token.
    This is used to compensate for starting whitespace.
  */
  void restart_token()
  {
    m_tok_start= m_ptr;
    m_cpp_tok_start= m_cpp_ptr;
  }

  /** Get the token start position, in the raw buffer. */
  const char *get_tok_start()
  {
    return m_tok_start;
  }

  /** Get the token start position, in the pre-processed buffer. */
  const char *get_cpp_tok_start()
  {
    return m_cpp_tok_start;
  }

  /** Get the token end position, in the raw buffer. */
  const char *get_tok_end()
  {
    return m_tok_end;
  }

  /** Get the token end position, in the pre-processed buffer. */
  const char *get_cpp_tok_end()
  {
    return m_cpp_tok_end;
  }

  /** Get the previous token start position, in the raw buffer. */
  const char *get_tok_start_prev()
  {
    return m_tok_start_prev;
  }

  /** Get the current stream pointer, in the raw buffer. */
  const char *get_ptr()
  {
    return m_ptr;
  }

  /** Get the current stream pointer, in the pre-processed buffer. */
  const char *get_cpp_ptr()
  {
    return m_cpp_ptr;
  }

  /** Get the length of the current token, in the raw buffer. */
  uint32_t yyLength()
  {
    /*
      The assumption is that the lexical analyser is always 1 character ahead,
      which the -1 account for.
    */
    assert(m_ptr > m_tok_start);
    return (uint32_t) ((m_ptr - m_tok_start) - 1);
  }

  /** Get the utf8-body string. */
  const char *get_body_utf8_str()
  {
    return m_body_utf8;
  }

  /** Get the utf8-body length. */
  uint32_t get_body_utf8_length()
  {
    return m_body_utf8_ptr - m_body_utf8;
  }

  void body_utf8_start(Session *session, const char *begin_ptr);
  void body_utf8_append(const char *ptr);
  void body_utf8_append(const char *ptr, const char *end_ptr);
  void body_utf8_append_literal(const LEX_STRING *txt,
                                const char *end_ptr);

  /** Current thread. */
  Session *m_session;

  /** Current line number. */
  uint32_t yylineno;

  /** Length of the last token parsed. */
  uint32_t yytoklen;

  /** Interface with bison, value of the last token parsed. */
  LEX_YYSTYPE yylval;

  /** LALR(2) resolution, look ahead token.*/
  int lookahead_token;

  /** LALR(2) resolution, value of the look ahead token.*/
  LEX_YYSTYPE lookahead_yylval;

private:
  /** Pointer to the current position in the raw input stream. */
  const char *m_ptr;

  /** Starting position of the last token parsed, in the raw buffer. */
  const char *m_tok_start;

  /** Ending position of the previous token parsed, in the raw buffer. */
  const char *m_tok_end;

  /** End of the query text in the input stream, in the raw buffer. */
  const char *m_end_of_query;

  /** Starting position of the previous token parsed, in the raw buffer. */
  const char *m_tok_start_prev;

  /** Begining of the query text in the input stream, in the raw buffer. */
  const char *m_buf;

  /** Length of the raw buffer. */
  uint32_t m_buf_length;

  /** Echo the parsed stream to the pre-processed buffer. */
  bool m_echo;

  /** Pre-processed buffer. */
  char *m_cpp_buf;

  /** Pointer to the current position in the pre-processed input stream. */
  char *m_cpp_ptr;

  /**
    Starting position of the last token parsed,
    in the pre-processed buffer.
  */
  const char *m_cpp_tok_start;

  /**
    Starting position of the previous token parsed,
    in the pre-procedded buffer.
  */
  const char *m_cpp_tok_start_prev;

  /**
    Ending position of the previous token parsed,
    in the pre-processed buffer.
  */
  const char *m_cpp_tok_end;

  /** UTF8-body buffer created during parsing. */
  char *m_body_utf8;

  /** Pointer to the current position in the UTF8-body buffer. */
  char *m_body_utf8_ptr;

  /**
    Position in the pre-processed buffer. The query from m_cpp_buf to
    m_cpp_utf_processed_ptr is converted to UTF8-body.
  */
  const char *m_cpp_utf8_processed_ptr;

public:

  /** Current state of the lexical analyser. */
  enum my_lex_states next_state;

  /** Token character bitmaps, to detect 7bit strings. */
  unsigned char tok_bitmap;

  /** SQL_MODE = IGNORE_SPACE. */
  bool ignore_space;

  /** State of the lexical analyser for comments. */
  enum_comment_state in_comment;

  /**
    Starting position of the TEXT_STRING or IDENT in the pre-processed
    buffer.

    NOTE: this member must be used within DRIZZLElex() function only.
  */
  const char *m_cpp_text_start;

  /**
    Ending position of the TEXT_STRING or IDENT in the pre-processed
    buffer.

    NOTE: this member must be used within DRIZZLElex() function only.
    */
  const char *m_cpp_text_end;

};

} /* namespace drizzled */

#endif /* DRIZZLED_LEX_INPUT_STREAM_H */

1014.4.8 by Jay Pipes Split Lex_input_stream out into its own header.	1	/* -- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; --
	2	* vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
	3	*
	4	* Copyright (C) 2008 Sun Microsystems
	5	*
	6	* This program is free software; you can redistribute it and/or modify
	7	* it under the terms of the GNU General Public License as published by
	8	* the Free Software Foundation; version 2 of the License.
	9	*
	10	* This program is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	* GNU General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
	18	*/
	19
	20	#ifndef DRIZZLED_LEX_INPUT_STREAM_H
	21	#define DRIZZLED_LEX_INPUT_STREAM_H
	22
	23	/**
	24	@brief This class represents the character input stream consumed during
	25	lexical analysis.
	26
	27	In addition to consuming the input stream, this class performs some
	28	comment pre processing, by filtering out out of bound special text
	29	from the query input stream.
	30	Two buffers, with pointers inside each buffers, are maintained in
	31	parallel. The 'raw' buffer is the original query text, which may
	32	contain out-of-bound comments. The 'cpp' (for comments pre processor)
	33	is the pre-processed buffer that contains only the query text that
	34	should be seen once out-of-bound data is removed.
	35	*/
	36
1280.1.10 by Monty Taylor Put everything in drizzled into drizzled namespace.	37	namespace drizzled
	38	{
	39
1014.4.8 by Jay Pipes Split Lex_input_stream out into its own header.	40	class Lex_input_stream
	41	{
	42	public:
	43	Lex_input_stream(Session session, const char buff, unsigned int length);
	44	~Lex_input_stream();
	45
	46	/**
	47	Set the echo mode.
	48
	49	When echo is true, characters parsed from the raw input stream are
	50	preserved. When false, characters parsed are silently ignored.
	51	@param echo the echo mode.
	52	*/
	53	void set_echo(bool echo)
	54	{
	55	m_echo= echo;
	56	}
	57
	58	/**
	59	Skip binary from the input stream.
	60	@param n number of bytes to accept.
	61	*/
	62	void skip_binary(int n)
	63	{
	64	if (m_echo)
	65	{
	66	memcpy(m_cpp_ptr, m_ptr, n);
	67	m_cpp_ptr += n;
	68	}
	69	m_ptr += n;
	70	}
	71
	72	/**
	73	Get a character, and advance in the stream.
	74	@return the next character to parse.
	75	*/
	76	char yyGet()
	77	{
	78	char c= *m_ptr++;
	79	if (m_echo)
	80	*m_cpp_ptr++ = c;
	81	return c;
	82	}
	83
	84	/**
	85	Get the last character accepted.
	86	@return the last character accepted.
	87	*/
	88	char yyGetLast()
	89	{
	90	return m_ptr[-1];
	91	}
	92
	93	/**
	94	Look at the next character to parse, but do not accept it.
	95	*/
	96	char yyPeek()
	97	{
	98	return m_ptr[0];
	99	}
	100
	101	/**
	102	Look ahead at some character to parse.
	103	@param n offset of the character to look up
104	*/
105	char yyPeekn(int n)
106	{
107	return m_ptr[n];
108	}
109
110	/**
111	Cancel the effect of the last yyGet() or yySkip().
112	Note that the echo mode should not change between calls to yyGet / yySkip
113	and yyUnget. The caller is responsible for ensuring that.
114	*/
115	void yyUnget()
116	{
117	m_ptr--;
118	if (m_echo)
119	m_cpp_ptr--;
120	}
121
122	/**
123	Accept a character, by advancing the input stream.
124	*/
125	void yySkip()
126	{
127	if (m_echo)
128	m_cpp_ptr++ = m_ptr++;
129	else
130	m_ptr++;
131	}
132
133	/**
134	Accept multiple characters at once.
135	@param n the number of characters to accept.
136	*/
137	void yySkipn(int n)
138	{
139	if (m_echo)
140	{
141	memcpy(m_cpp_ptr, m_ptr, n);
142	m_cpp_ptr += n;
143	}
144	m_ptr += n;
145	}
146
147	/**
148	End of file indicator for the query text to parse.
149	@return true if there are no more characters to parse
150	*/
151	bool eof()
152	{
153	return (m_ptr >= m_end_of_query);
154	}
155
156	/**
157	End of file indicator for the query text to parse.
158	@param n number of characters expected
159	@return true if there are less than n characters to parse
160	*/
161	bool eof(int n)
162	{
163	return ((m_ptr + n) >= m_end_of_query);
164	}
165
166	/** Get the raw query buffer. */
167	const char *get_buf()
168	{
169	return m_buf;
170	}
171
172	/** Get the pre-processed query buffer. */
173	const char *get_cpp_buf()
174	{
175	return m_cpp_buf;
176	}
177
178	/** Get the end of the raw query buffer. */
179	const char *get_end_of_query()
180	{
181	return m_end_of_query;
182	}
183
184	/** Mark the stream position as the start of a new token. */
185	void start_token()
186	{
187	m_tok_start_prev= m_tok_start;
188	m_tok_start= m_ptr;
189	m_tok_end= m_ptr;
190
191	m_cpp_tok_start_prev= m_cpp_tok_start;
192	m_cpp_tok_start= m_cpp_ptr;
193	m_cpp_tok_end= m_cpp_ptr;
194	}
195
196	/**
197	Adjust the starting position of the current token.
198	This is used to compensate for starting whitespace.
199	*/
200	void restart_token()
201	{
202	m_tok_start= m_ptr;
203	m_cpp_tok_start= m_cpp_ptr;
204	}
205
206	/** Get the token start position, in the raw buffer. */
207	const char *get_tok_start()
208	{
209	return m_tok_start;
210	}
211
212	/** Get the token start position, in the pre-processed buffer. */
213	const char *get_cpp_tok_start()
214	{
215	return m_cpp_tok_start;
216	}
217
218	/** Get the token end position, in the raw buffer. */
219	const char *get_tok_end()
220	{
221	return m_tok_end;
222	}
223
224	/** Get the token end position, in the pre-processed buffer. */
225	const char *get_cpp_tok_end()
226	{
227	return m_cpp_tok_end;
228	}
229
230	/** Get the previous token start position, in the raw buffer. */
231	const char *get_tok_start_prev()
232	{
233	return m_tok_start_prev;
234	}
235
236	/** Get the current stream pointer, in the raw buffer. */
237	const char *get_ptr()
238	{
239	return m_ptr;
240	}
241
242	/** Get the current stream pointer, in the pre-processed buffer. */
243	const char *get_cpp_ptr()
244	{
245	return m_cpp_ptr;
246	}
247
248	/** Get the length of the current token, in the raw buffer. */
249	uint32_t yyLength()
250	{
251	/*
252	The assumption is that the lexical analyser is always 1 character ahead,
253	which the -1 account for.
254	*/
255	assert(m_ptr > m_tok_start);
256	return (uint32_t) ((m_ptr - m_tok_start) - 1);
257	}
258
259	/** Get the utf8-body string. */
260	const char *get_body_utf8_str()
261	{
262	return m_body_utf8;
263	}
264
265	/** Get the utf8-body length. */
266	uint32_t get_body_utf8_length()
267	{
268	return m_body_utf8_ptr - m_body_utf8;
269	}
270
271	void body_utf8_start(Session session, const char begin_ptr);
272	void body_utf8_append(const char *ptr);
273	void body_utf8_append(const char ptr, const char end_ptr);
1054.2.11 by Monty Taylor Removed copy_and_convert.	274	void body_utf8_append_literal(const LEX_STRING *txt,
1014.4.8 by Jay Pipes Split Lex_input_stream out into its own header.	275	const char *end_ptr);
	276
	277	/** Current thread. */
	278	Session *m_session;
	279
	280	/** Current line number. */
	281	uint32_t yylineno;
	282
	283	/** Length of the last token parsed. */
	284	uint32_t yytoklen;
	285
	286	/** Interface with bison, value of the last token parsed. */
	287	LEX_YYSTYPE yylval;
	288
	289	/** LALR(2) resolution, look ahead token.*/
	290	int lookahead_token;
	291
	292	/** LALR(2) resolution, value of the look ahead token.*/
	293	LEX_YYSTYPE lookahead_yylval;
	294
	295	private:
	296	/** Pointer to the current position in the raw input stream. */
	297	const char *m_ptr;
	298
	299	/** Starting position of the last token parsed, in the raw buffer. */
	300	const char *m_tok_start;
	301
	302	/** Ending position of the previous token parsed, in the raw buffer. */
	303	const char *m_tok_end;
	304
	305	/** End of the query text in the input stream, in the raw buffer. */
	306	const char *m_end_of_query;
	307
	308	/** Starting position of the previous token parsed, in the raw buffer. */
	309	const char *m_tok_start_prev;
	310
	311	/** Begining of the query text in the input stream, in the raw buffer. */
	312	const char *m_buf;
	313
	314	/** Length of the raw buffer. */
	315	uint32_t m_buf_length;
	316
	317	/** Echo the parsed stream to the pre-processed buffer. */
	318	bool m_echo;
	319
	320	/** Pre-processed buffer. */
	321	char *m_cpp_buf;
	322
	323	/** Pointer to the current position in the pre-processed input stream. */
	324	char *m_cpp_ptr;
	325
	326	/**
	327	Starting position of the last token parsed,
	328	in the pre-processed buffer.
	329	*/
	330	const char *m_cpp_tok_start;
	331
	332	/**
	333	Starting position of the previous token parsed,
	334	in the pre-procedded buffer.
	335	*/
	336	const char *m_cpp_tok_start_prev;
	337
	338	/**
339	Ending position of the previous token parsed,
340	in the pre-processed buffer.
341	*/
342	const char *m_cpp_tok_end;
343
344	/** UTF8-body buffer created during parsing. */
345	char *m_body_utf8;
346
347	/** Pointer to the current position in the UTF8-body buffer. */
348	char *m_body_utf8_ptr;
349
350	/**
351	Position in the pre-processed buffer. The query from m_cpp_buf to
352	m_cpp_utf_processed_ptr is converted to UTF8-body.
353	*/
354	const char *m_cpp_utf8_processed_ptr;
355
356	public:
357
358	/** Current state of the lexical analyser. */
359	enum my_lex_states next_state;
360
361	/** Token character bitmaps, to detect 7bit strings. */
362	unsigned char tok_bitmap;
363
364	/** SQL_MODE = IGNORE_SPACE. */
365	bool ignore_space;
366
367	/** State of the lexical analyser for comments. */
368	enum_comment_state in_comment;
369
370	/**
371	Starting position of the TEXT_STRING or IDENT in the pre-processed
372	buffer.
373
374	NOTE: this member must be used within DRIZZLElex() function only.
375	*/
376	const char *m_cpp_text_start;
377
378	/**
379	Ending position of the TEXT_STRING or IDENT in the pre-processed
380	buffer.
381
382	NOTE: this member must be used within DRIZZLElex() function only.
383	*/
384	const char *m_cpp_text_end;
385
386	};
1280.1.10 by Monty Taylor Put everything in drizzled into drizzled namespace.	387
	388	} /* namespace drizzled */
	389
1014.4.8 by Jay Pipes Split Lex_input_stream out into its own header.	390	#endif /* DRIZZLED_LEX_INPUT_STREAM_H */