117
117
m_cpp_utf8_processed_ptr will be set in the end of the
120
void Lex_input_stream::body_utf8_append(const char *ptr, const char *end_ptr)
120
void Lex_input_stream::body_utf8_append(const char *ptr,
122
123
assert(m_cpp_buf <= ptr && ptr <= m_cpp_buf + m_buf_length);
123
124
assert(m_cpp_buf <= end_ptr && end_ptr <= m_cpp_buf + m_buf_length);
160
161
m_cpp_utf8_processed_ptr will be set in the end of the
163
void Lex_input_stream::body_utf8_append_literal(str_ref txt, const char *end_ptr)
164
void Lex_input_stream::body_utf8_append_literal(const lex_string_t *txt,
165
167
if (!m_cpp_utf8_processed_ptr)
168
170
/* NOTE: utf_txt.length is in bytes, not in symbols. */
170
memcpy(m_body_utf8_ptr, txt.data(), txt.size());
171
m_body_utf8_ptr += txt.size();
172
memcpy(m_body_utf8_ptr, txt->str, txt->length);
173
m_body_utf8_ptr += txt->length;
172
174
*m_body_utf8_ptr= 0;
174
176
m_cpp_utf8_processed_ptr= end_ptr;
273
276
const char *tok= lip->get_tok_start();
274
277
uint32_t tok_pos= 0;
275
278
for (;tok_pos<len && tok_pos<63;tok_pos++)
276
tok_upper[tok_pos]= system_charset_info->toupper(tok[tok_pos]);
279
tok_upper[tok_pos]=my_toupper(system_charset_info, tok[tok_pos]);
277
280
tok_upper[tok_pos]=0;
279
282
const SYMBOL *symbol= lookup_symbol(tok_upper, len, function);
292
295
/* make a copy of token before ptr and set yytoklen */
293
296
static lex_string_t get_token(Lex_input_stream *lip, uint32_t skip, uint32_t length)
295
299
lip->yyUnget(); // ptr points now after last token char
296
lip->yytoklen= length;
298
tmp.assign(lip->m_session->mem.strdup(lip->get_tok_start() + skip, length), length);
300
tmp.length=lip->yytoklen=length;
301
tmp.str= lip->m_session->mem.strdup(lip->get_tok_start() + skip, tmp.length);
299
303
lip->m_cpp_text_start= lip->get_cpp_tok_start() + skip;
300
lip->m_cpp_text_end= lip->m_cpp_text_start + tmp.size();
304
lip->m_cpp_text_end= lip->m_cpp_text_start + tmp.length;
312
317
uint32_t length, char quote)
314
320
lip->yyUnget(); // ptr points now after last token char
315
lip->yytoklen= length;
317
tmp.assign((char*)lip->m_session->mem.alloc(length + 1), length);
321
tmp.length= lip->yytoklen=length;
322
tmp.str=(char*) lip->m_session->mem.alloc(tmp.length+1);
318
323
const char* from= lip->get_tok_start() + skip;
319
char* to= (char*)tmp.data();
324
char* to= (char*)tmp.str;
320
325
const char* end= to+length;
322
327
lip->m_cpp_text_start= lip->get_cpp_tok_start() + skip;
342
347
static char *get_text(Lex_input_stream *lip, int pre_skip, int post_skip)
344
350
bool found_escape= false;
345
const charset_info_st* const cs= lip->m_session->charset();
351
const charset_info_st * const cs= lip->m_session->charset();
347
353
lip->tok_bitmap= 0;
348
unsigned char sep= lip->yyGetLast(); // String should end with this
349
while (not lip->eof())
354
sep= lip->yyGetLast(); // String should end with this
351
unsigned char c= lip->yyGet();
352
358
lip->tok_bitmap|= c;
651
657
case MY_LEX_ESCAPE:
652
658
if (lip->yyGet() == 'N')
653
659
{ // Allow \N as shortcut for NULL
654
yylval->lex_str.assign("\\N", 2);
660
yylval->lex_str.str=(char*) "\\N";
661
yylval->lex_str.length=2;
657
664
case MY_LEX_CHAR: // Unknown or single char token
658
665
case MY_LEX_SKIP: // This should not happen
659
666
if (c == '-' && lip->yyPeek() == '-' &&
660
(cs->isspace(lip->yyPeekn(1)) ||
661
cs->iscntrl(lip->yyPeekn(1))))
667
(my_isspace(cs,lip->yyPeekn(1)) ||
668
my_iscntrl(cs,lip->yyPeekn(1))))
663
670
state=MY_LEX_COMMENT;
751
758
lip->yySkip(); // next state does a unget
753
yylval->lex_str= get_token(lip, 0, length);
760
yylval->lex_str=get_token(lip, 0, length);
755
762
lip->body_utf8_append(lip->m_cpp_text_start);
757
lip->body_utf8_append_literal(yylval->lex_str, lip->m_cpp_text_end);
764
lip->body_utf8_append_literal(&yylval->lex_str, lip->m_cpp_text_end);
759
766
return(result_state); // IDENT or IDENT_QUOTED
761
768
case MY_LEX_IDENT_SEP: // Found ident and now '.'
762
yylval->lex_str.assign(lip->get_ptr(), 1);
769
yylval->lex_str.str= (char*) lip->get_ptr();
770
yylval->lex_str.length= 1;
763
771
c= lip->yyGet(); // should be '.'
764
772
lip->next_state= MY_LEX_IDENT_START;// Next is an ident (not a keyword)
765
773
if (!ident_map[(uint8_t)lip->yyPeek()]) // Probably ` or "
775
while (cs->isxdigit((c = lip->yyGet()))) ;
783
while (my_isxdigit(cs,(c = lip->yyGet()))) ;
776
784
if ((lip->yyLength() >= 3) && !ident_map[c])
779
yylval->lex_str= get_token(lip, 2, lip->yyLength()-2);
787
yylval->lex_str=get_token(lip, 2, lip->yyLength()-2);
780
788
return (HEX_NUM);
808
816
if (c == 'e' || c == 'E')
810
818
// The following test is written this way to allow numbers of type 1e1
811
if (cs->isdigit(lip->yyPeek()) ||
819
if (my_isdigit(cs,lip->yyPeek()) ||
812
820
(c=(lip->yyGet())) == '+' || c == '-')
814
if (cs->isdigit(lip->yyPeek())) // Number must have digit after sign
822
if (my_isdigit(cs,lip->yyPeek())) // Number must have digit after sign
817
while (cs->isdigit(lip->yyGet())) ;
818
yylval->lex_str= get_token(lip, 0, lip->yyLength());
825
while (my_isdigit(cs,lip->yyGet())) ;
826
yylval->lex_str=get_token(lip, 0, lip->yyLength());
819
827
return(FLOAT_NUM);
850
858
yylval->lex_str= get_token(lip, 0, lip->yyLength());
852
860
lip->body_utf8_append(lip->m_cpp_text_start);
853
lip->body_utf8_append_literal(yylval->lex_str, lip->m_cpp_text_end);
862
lip->body_utf8_append_literal(&yylval->lex_str, lip->m_cpp_text_end);
855
864
return(result_state);
877
886
lip->skip_binary(var_length-1);
879
yylval->lex_str= double_quotes
880
? get_quoted_token(lip, 1, lip->yyLength() - double_quotes - 1, quote_char)
881
: get_token(lip, 1, lip->yyLength() - 1);
889
yylval->lex_str=get_quoted_token(lip, 1, lip->yyLength() - double_quotes -1, quote_char);
891
yylval->lex_str=get_token(lip, 1, lip->yyLength() -1);
882
892
if (c == quote_char)
883
893
lip->yySkip(); // Skip end `
884
894
lip->next_state= MY_LEX_START;
885
895
lip->body_utf8_append(lip->m_cpp_text_start);
886
lip->body_utf8_append_literal(yylval->lex_str, lip->m_cpp_text_end);
896
lip->body_utf8_append_literal(&yylval->lex_str, lip->m_cpp_text_end);
897
return(IDENT_QUOTED);
889
899
case MY_LEX_INT_OR_REAL: // Complete int or incomplete real
891
901
{ // Found complete integer number.
892
902
yylval->lex_str=get_token(lip, 0, lip->yyLength());
893
return int_token(yylval->lex_str.data(), yylval->lex_str.size());
903
return int_token(yylval->lex_str.str,yylval->lex_str.length);
896
906
case MY_LEX_REAL: // Incomplete real number
897
while (cs->isdigit(c = lip->yyGet())) ;
907
while (my_isdigit(cs,c = lip->yyGet())) ;
899
909
if (c == 'e' || c == 'E')
901
911
c = lip->yyGet();
902
912
if (c == '-' || c == '+')
903
913
c = lip->yyGet(); // Skip sign
914
if (!my_isdigit(cs,c))
905
915
{ // No digit after sign
906
916
state= MY_LEX_CHAR;
909
while (cs->isdigit(lip->yyGet())) ;
919
while (my_isdigit(cs,lip->yyGet())) ;
910
920
yylval->lex_str=get_token(lip, 0, lip->yyLength());
911
921
return(FLOAT_NUM);
916
926
case MY_LEX_HEX_NUMBER: // Found x'hexstring'
917
927
lip->yySkip(); // Accept opening '
918
while (cs->isxdigit((c= lip->yyGet()))) ;
928
while (my_isxdigit(cs, (c= lip->yyGet()))) ;
920
930
return(ABORT_SYM); // Illegal hex constant
921
931
lip->yySkip(); // Accept closing '
922
932
length= lip->yyLength(); // Length of hexnum+3
924
return ABORT_SYM; // odd number of hex digits
933
if ((length % 2) == 0)
934
return(ABORT_SYM); // odd number of hex digits
925
935
yylval->lex_str=get_token(lip,
927
937
length-3); // don't count x' and last '
987
997
/* " used for strings */
988
998
case MY_LEX_STRING: // Incomplete text string
989
if (!(yylval->lex_str.str_ = get_text(lip, 1, 1)))
999
if (!(yylval->lex_str.str = get_text(lip, 1, 1)))
991
1001
state= MY_LEX_CHAR; // Read char by char
994
yylval->lex_str.assign(yylval->lex_str.data(), lip->yytoklen);
1004
yylval->lex_str.length=lip->yytoklen;
996
1006
lip->body_utf8_append(lip->m_cpp_text_start);
997
lip->body_utf8_append_literal(yylval->lex_str, lip->m_cpp_text_end);
1008
lip->body_utf8_append_literal(&yylval->lex_str, lip->m_cpp_text_end);
999
1010
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
1000
1011
return(TEXT_STRING);
1162
1173
/* Actually real shouldn't start with . but allow them anyhow */
1164
1175
case MY_LEX_REAL_OR_POINT:
1165
if (cs->isdigit(lip->yyPeek()))
1176
if (my_isdigit(cs,lip->yyPeek()))
1166
1177
state= MY_LEX_REAL; // Real
1184
1195
lip->next_state=MY_LEX_HOSTNAME;
1187
yylval->lex_str.assign(lip->get_ptr(), 1);
1198
yylval->lex_str.str=(char*) lip->get_ptr();
1199
yylval->lex_str.length=1;
1190
1202
case MY_LEX_HOSTNAME: // end '@' of user@hostname
1191
1203
for (c=lip->yyGet() ;
1192
cs->isalnum(c) || c == '.' || c == '_' || c == '$';
1204
my_isalnum(cs,c) || c == '.' || c == '_' || c == '$';
1193
1205
c= lip->yyGet()) ;
1194
1206
yylval->lex_str=get_token(lip, 0, lip->yyLength());
1195
1207
return(LEX_HOSTNAME);
1197
1209
case MY_LEX_SYSTEM_VAR:
1198
yylval->lex_str.assign(lip->get_ptr(), 1);
1210
yylval->lex_str.str=(char*) lip->get_ptr();
1211
yylval->lex_str.length=1;
1199
1212
lip->yySkip(); // Skip '@'
1200
1213
lip->next_state= (state_map[(uint8_t)lip->yyPeek()] ==
1201
1214
MY_LEX_USER_VARIABLE_DELIMITER ?
1229
1242
yylval->lex_str=get_token(lip, 0, length);
1231
1244
lip->body_utf8_append(lip->m_cpp_text_start);
1232
lip->body_utf8_append_literal(yylval->lex_str, lip->m_cpp_text_end);
1234
return result_state;
1246
lip->body_utf8_append_literal(&yylval->lex_str, lip->m_cpp_text_end);
1248
return(result_state);