in sql/sql_lex.cc [1416:2156]
static int lex_one_token(YYSTYPE *yylval, THD *thd)
{
uchar c= 0;
bool comment_closed;
int tokval, result_state;
uint length;
enum my_lex_states state;
Lex_input_stream *lip= & thd->m_parser_state->m_lip;
const CHARSET_INFO *cs= thd->charset();
const my_lex_states *state_map= cs->state_maps->main_map;
const uchar *ident_map= cs->ident_map;
lip->yylval=yylval; // The global state
lip->start_token();
state=lip->next_state;
lip->next_state=MY_LEX_START;
for (;;)
{
switch (state) {
case MY_LEX_START: // Start of token
// Skip starting whitespace
while(state_map[c= lip->yyPeek()] == MY_LEX_SKIP)
{
if (c == '\n')
lip->yylineno++;
lip->yySkip();
}
/* Start of real token */
lip->restart_token();
c= lip->yyGet();
state= state_map[c];
break;
case MY_LEX_CHAR: // Unknown or single char token
case MY_LEX_SKIP: // This should not happen
if (c == '-' && lip->yyPeek() == '-' &&
(my_isspace(cs,lip->yyPeekn(1)) ||
my_iscntrl(cs,lip->yyPeekn(1))))
{
state=MY_LEX_COMMENT;
break;
}
if (c == '-' && lip->yyPeek() == '>') // '->'
{
lip->yySkip();
lip->next_state= MY_LEX_START;
if (lip->yyPeek() == '>')
{
lip->yySkip();
return JSON_UNQUOTED_SEPARATOR_SYM;
}
return JSON_SEPARATOR_SYM;
}
if (c != ')')
lip->next_state= MY_LEX_START; // Allow signed numbers
if (c == ',')
{
/*
Warning:
This is a work around, to make the "remember_name" rule in
sql/sql_yacc.yy work properly.
The problem is that, when parsing "select expr1, expr2",
the code generated by bison executes the *pre* action
remember_name (see select_item) *before* actually parsing the
first token of expr2.
*/
lip->restart_token();
}
else
{
/*
Check for a placeholder: it should not precede a possible identifier
because of binlogging: when a placeholder is replaced with
its value in a query for the binlog, the query must stay
grammatically correct.
*/
if (c == '?' && lip->stmt_prepare_mode && !ident_map[lip->yyPeek()])
return(PARAM_MARKER);
}
return((int) c);
case MY_LEX_IDENT_OR_NCHAR:
if (lip->yyPeek() != '\'')
{
state= MY_LEX_IDENT;
break;
}
/* Found N'string' */
lip->yySkip(); // Skip '
if (!(yylval->lex_str.str = get_text(lip, 2, 1)))
{
state= MY_LEX_CHAR; // Read char by char
break;
}
yylval->lex_str.length= lip->yytoklen;
return(NCHAR_STRING);
case MY_LEX_IDENT_OR_HEX:
if (lip->yyPeek() == '\'')
{ // Found x'hex-number'
state= MY_LEX_HEX_NUMBER;
break;
}
// Fall through.
case MY_LEX_IDENT_OR_BIN:
if (lip->yyPeek() == '\'')
{ // Found b'bin-number'
state= MY_LEX_BIN_NUMBER;
break;
}
// Fall through.
case MY_LEX_IDENT:
const char *start;
if (use_mb(cs))
{
result_state= IDENT_QUOTED;
switch (my_mbcharlen(cs, lip->yyGetLast()))
{
case 1:
break;
case 0:
if (my_mbmaxlenlen(cs) < 2)
break;
/* else fall through */
default:
int l = my_ismbchar(cs,
lip->get_ptr() -1,
lip->get_end_of_query());
if (l == 0) {
state = MY_LEX_CHAR;
continue;
}
lip->skip_binary(l - 1);
}
while (ident_map[c=lip->yyGet()])
{
switch (my_mbcharlen(cs, c))
{
case 1:
break;
case 0:
if (my_mbmaxlenlen(cs) < 2)
break;
/* else fall through */
default:
int l;
if ((l = my_ismbchar(cs,
lip->get_ptr() -1,
lip->get_end_of_query())) == 0)
break;
lip->skip_binary(l-1);
}
}
}
else
{
for (result_state= c; ident_map[c= lip->yyGet()]; result_state|= c) ;
/* If there were non-ASCII characters, mark that we must convert */
result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
}
length= lip->yyLength();
start= lip->get_ptr();
if (lip->ignore_space)
{
/*
If we find a space then this can't be an identifier. We notice this
below by checking start != lex->ptr.
*/
for (; state_map[c] == MY_LEX_SKIP ; c= lip->yyGet())
{
if (c == '\n')
lip->yylineno++;
}
}
if (start == lip->get_ptr() && c == '.' && ident_map[lip->yyPeek()])
lip->next_state=MY_LEX_IDENT_SEP;
else
{ // '(' must follow directly if function
lip->yyUnget();
if ((tokval = find_keyword(lip, length, c == '(')))
{
lip->next_state= MY_LEX_START; // Allow signed numbers
return(tokval); // Was keyword
}
lip->yySkip(); // next state does a unget
}
yylval->lex_str=get_token(lip, 0, length);
/*
Note: "SELECT _bla AS 'alias'"
_bla should be considered as a IDENT if charset haven't been found.
So we don't use MYF(MY_WME) with get_charset_by_csname to avoid
producing an error.
*/
if (yylval->lex_str.str[0] == '_')
{
CHARSET_INFO *cs= get_charset_by_csname(yylval->lex_str.str + 1,
MY_CS_PRIMARY, MYF(0));
if (cs)
{
yylval->charset= cs;
lip->m_underscore_cs= cs;
lip->body_utf8_append(lip->m_cpp_text_start,
lip->get_cpp_tok_start() + length);
return(UNDERSCORE_CHARSET);
}
}
lip->body_utf8_append(lip->m_cpp_text_start);
lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
lip->m_cpp_text_end);
return(result_state); // IDENT or IDENT_QUOTED
case MY_LEX_IDENT_SEP: // Found ident and now '.'
yylval->lex_str.str= (char*) lip->get_ptr();
yylval->lex_str.length= 1;
c= lip->yyGet(); // should be '.'
lip->next_state= MY_LEX_IDENT_START;// Next is an ident (not a keyword)
if (!ident_map[lip->yyPeek()]) // Probably ` or "
lip->next_state= MY_LEX_START;
return((int) c);
case MY_LEX_NUMBER_IDENT: // number or ident which num-start
if (lip->yyGetLast() == '0')
{
c= lip->yyGet();
if (c == 'x')
{
while (my_isxdigit(cs,(c = lip->yyGet()))) ;
if ((lip->yyLength() >= 3) && !ident_map[c])
{
/* skip '0x' */
yylval->lex_str=get_token(lip, 2, lip->yyLength()-2);
return (HEX_NUM);
}
lip->yyUnget();
state= MY_LEX_IDENT_START;
break;
}
else if (c == 'b')
{
while ((c= lip->yyGet()) == '0' || c == '1') ;
if ((lip->yyLength() >= 3) && !ident_map[c])
{
/* Skip '0b' */
yylval->lex_str= get_token(lip, 2, lip->yyLength()-2);
return (BIN_NUM);
}
lip->yyUnget();
state= MY_LEX_IDENT_START;
break;
}
lip->yyUnget();
}
while (my_isdigit(cs, (c = lip->yyGet()))) ;
if (!ident_map[c])
{ // Can't be identifier
state=MY_LEX_INT_OR_REAL;
break;
}
if (c == 'e' || c == 'E')
{
// The following test is written this way to allow numbers of type 1e1
if (my_isdigit(cs,lip->yyPeek()) ||
(c=(lip->yyGet())) == '+' || c == '-')
{ // Allow 1E+10
if (my_isdigit(cs,lip->yyPeek())) // Number must have digit after sign
{
lip->yySkip();
while (my_isdigit(cs,lip->yyGet())) ;
yylval->lex_str=get_token(lip, 0, lip->yyLength());
return(FLOAT_NUM);
}
}
lip->yyUnget();
}
// fall through
case MY_LEX_IDENT_START: // We come here after '.'
result_state= IDENT;
if (use_mb(cs))
{
result_state= IDENT_QUOTED;
while (ident_map[c=lip->yyGet()])
{
switch (my_mbcharlen(cs, c))
{
case 1:
break;
case 0:
if (my_mbmaxlenlen(cs) < 2)
break;
/* else fall through */
default:
int l;
if ((l = my_ismbchar(cs,
lip->get_ptr() -1,
lip->get_end_of_query())) == 0)
break;
lip->skip_binary(l-1);
}
}
}
else
{
for (result_state=0; ident_map[c= lip->yyGet()]; result_state|= c) ;
/* If there were non-ASCII characters, mark that we must convert */
result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
}
if (c == '.' && ident_map[lip->yyPeek()])
lip->next_state=MY_LEX_IDENT_SEP;// Next is '.'
yylval->lex_str= get_token(lip, 0, lip->yyLength());
lip->body_utf8_append(lip->m_cpp_text_start);
lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
lip->m_cpp_text_end);
return(result_state);
case MY_LEX_USER_VARIABLE_DELIMITER: // Found quote char
{
uint double_quotes= 0;
char quote_char= c; // Used char
for(;;)
{
c= lip->yyGet();
if (c == 0)
{
lip->yyUnget();
return ABORT_SYM; // Unmatched quotes
}
int var_length;
if ((var_length= my_mbcharlen(cs, c)) == 1)
{
if (c == quote_char)
{
if (lip->yyPeek() != quote_char)
break;
c=lip->yyGet();
double_quotes++;
continue;
}
}
else if (use_mb(cs))
{
if ((var_length= my_ismbchar(cs, lip->get_ptr() - 1,
lip->get_end_of_query())))
lip->skip_binary(var_length-1);
}
}
if (double_quotes)
yylval->lex_str=get_quoted_token(lip, 1,
lip->yyLength() - double_quotes -1,
quote_char);
else
yylval->lex_str=get_token(lip, 1, lip->yyLength() -1);
if (c == quote_char)
lip->yySkip(); // Skip end `
lip->next_state= MY_LEX_START;
lip->body_utf8_append(lip->m_cpp_text_start);
lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
lip->m_cpp_text_end);
return(IDENT_QUOTED);
}
case MY_LEX_INT_OR_REAL: // Complete int or incomplete real
if (c != '.')
{ // Found complete integer number.
yylval->lex_str=get_token(lip, 0, lip->yyLength());
return int_token(yylval->lex_str.str, (uint) yylval->lex_str.length);
}
// fall through
case MY_LEX_REAL: // Incomplete real number
while (my_isdigit(cs,c = lip->yyGet())) ;
if (c == 'e' || c == 'E')
{
c = lip->yyGet();
if (c == '-' || c == '+')
c = lip->yyGet(); // Skip sign
if (!my_isdigit(cs,c))
{ // No digit after sign
state= MY_LEX_CHAR;
break;
}
while (my_isdigit(cs,lip->yyGet())) ;
yylval->lex_str=get_token(lip, 0, lip->yyLength());
return(FLOAT_NUM);
}
yylval->lex_str=get_token(lip, 0, lip->yyLength());
return(DECIMAL_NUM);
case MY_LEX_HEX_NUMBER: // Found x'hexstring'
lip->yySkip(); // Accept opening '
while (my_isxdigit(cs, (c= lip->yyGet()))) ;
if (c != '\'')
return(ABORT_SYM); // Illegal hex constant
lip->yySkip(); // Accept closing '
length= lip->yyLength(); // Length of hexnum+3
if ((length % 2) == 0)
return(ABORT_SYM); // odd number of hex digits
yylval->lex_str=get_token(lip,
2, // skip x'
length-3); // don't count x' and last '
return (HEX_NUM);
case MY_LEX_BIN_NUMBER: // Found b'bin-string'
lip->yySkip(); // Accept opening '
while ((c= lip->yyGet()) == '0' || c == '1') ;
if (c != '\'')
return(ABORT_SYM); // Illegal hex constant
lip->yySkip(); // Accept closing '
length= lip->yyLength(); // Length of bin-num + 3
yylval->lex_str= get_token(lip,
2, // skip b'
length-3); // don't count b' and last '
return (BIN_NUM);
case MY_LEX_CMP_OP: // Incomplete comparison operator
if (state_map[lip->yyPeek()] == MY_LEX_CMP_OP ||
state_map[lip->yyPeek()] == MY_LEX_LONG_CMP_OP)
lip->yySkip();
if ((tokval = find_keyword(lip, lip->yyLength() + 1, 0)))
{
lip->next_state= MY_LEX_START; // Allow signed numbers
return(tokval);
}
state = MY_LEX_CHAR; // Something fishy found
break;
case MY_LEX_LONG_CMP_OP: // Incomplete comparison operator
if (state_map[lip->yyPeek()] == MY_LEX_CMP_OP ||
state_map[lip->yyPeek()] == MY_LEX_LONG_CMP_OP)
{
lip->yySkip();
if (state_map[lip->yyPeek()] == MY_LEX_CMP_OP)
lip->yySkip();
}
if ((tokval = find_keyword(lip, lip->yyLength() + 1, 0)))
{
lip->next_state= MY_LEX_START; // Found long op
return(tokval);
}
state = MY_LEX_CHAR; // Something fishy found
break;
case MY_LEX_BOOL:
if (c != lip->yyPeek())
{
state=MY_LEX_CHAR;
break;
}
lip->yySkip();
tokval = find_keyword(lip,2,0); // Is a bool operator
lip->next_state= MY_LEX_START; // Allow signed numbers
return(tokval);
case MY_LEX_STRING_OR_DELIMITER:
if (thd->variables.sql_mode & MODE_ANSI_QUOTES)
{
state= MY_LEX_USER_VARIABLE_DELIMITER;
break;
}
/* " used for strings */
// Fall through.
case MY_LEX_STRING: // Incomplete text string
if (!(yylval->lex_str.str = get_text(lip, 1, 1)))
{
state= MY_LEX_CHAR; // Read char by char
break;
}
yylval->lex_str.length=lip->yytoklen;
lip->body_utf8_append(lip->m_cpp_text_start);
lip->body_utf8_append_literal(thd, &yylval->lex_str,
lip->m_underscore_cs ? lip->m_underscore_cs : cs,
lip->m_cpp_text_end);
lip->m_underscore_cs= NULL;
return(TEXT_STRING);
case MY_LEX_COMMENT: // Comment
thd->m_parser_state->add_comment();
while ((c = lip->yyGet()) != '\n' && c) ;
lip->yyUnget(); // Safety against eof
state = MY_LEX_START; // Try again
break;
case MY_LEX_LONG_COMMENT: /* Long C comment? */
if (lip->yyPeek() != '*')
{
state=MY_LEX_CHAR; // Probable division
break;
}
thd->m_parser_state->add_comment();
/* Reject '/' '*', since we might need to turn off the echo */
lip->yyUnget();
lip->save_in_comment_state();
if (lip->yyPeekn(2) == '!')
{
lip->in_comment= DISCARD_COMMENT;
/* Accept '/' '*' '!', but do not keep this marker. */
lip->set_echo(FALSE);
lip->yySkip();
lip->yySkip();
lip->yySkip();
/*
The special comment format is very strict:
'/' '*' '!', followed by exactly
1 digit (major), 2 digits (minor), then 2 digits (dot).
32302 -> 3.23.02
50032 -> 5.0.32
50114 -> 5.1.14
*/
char version_str[6];
if ( my_isdigit(cs, (version_str[0]= lip->yyPeekn(0)))
&& my_isdigit(cs, (version_str[1]= lip->yyPeekn(1)))
&& my_isdigit(cs, (version_str[2]= lip->yyPeekn(2)))
&& my_isdigit(cs, (version_str[3]= lip->yyPeekn(3)))
&& my_isdigit(cs, (version_str[4]= lip->yyPeekn(4)))
)
{
version_str[5]= 0;
ulong version;
version=strtol(version_str, NULL, 10);
if (version <= MYSQL_VERSION_ID)
{
/* Accept 'M' 'm' 'm' 'd' 'd' */
lip->yySkipn(5);
/* Expand the content of the special comment as real code */
lip->set_echo(TRUE);
state=MY_LEX_START;
break; /* Do not treat contents as a comment. */
}
else
{
/*
Patch and skip the conditional comment to avoid it
being propagated infinitely (eg. to a slave).
*/
char *pcom= lip->yyUnput(' ');
comment_closed= ! consume_comment(lip, 1);
if (! comment_closed)
{
*pcom= '!';
}
/* version allowed to have one level of comment inside. */
}
}
else
{
/* Not a version comment. */
state=MY_LEX_START;
lip->set_echo(TRUE);
break;
}
}
else
{
lip->in_comment= PRESERVE_COMMENT;
lip->yySkip(); // Accept /
lip->yySkip(); // Accept *
comment_closed= ! consume_comment(lip, 0);
/* regular comments can have zero comments inside. */
}
/*
Discard:
- regular '/' '*' comments,
- special comments '/' '*' '!' for a future version,
by scanning until we find a closing '*' '/' marker.
Nesting regular comments isn't allowed. The first
'*' '/' returns the parser to the previous state.
/#!VERSI oned containing /# regular #/ is allowed #/
Inside one versioned comment, another versioned comment
is treated as a regular discardable comment. It gets
no special parsing.
*/
/* Unbalanced comments with a missing '*' '/' are a syntax error */
if (! comment_closed)
return (ABORT_SYM);
state = MY_LEX_START; // Try again
lip->restore_in_comment_state();
break;
case MY_LEX_END_LONG_COMMENT:
if ((lip->in_comment != NO_COMMENT) && lip->yyPeek() == '/')
{
/* Reject '*' '/' */
lip->yyUnget();
/* Accept '*' '/', with the proper echo */
lip->set_echo(lip->in_comment == PRESERVE_COMMENT);
lip->yySkipn(2);
/* And start recording the tokens again */
lip->set_echo(TRUE);
/*
C-style comments are replaced with a single space (as it
is in C and C++). If there is already a whitespace
character at this point in the stream, the space is
not inserted.
See also ISO/IEC 9899:1999 §5.1.1.2
("Programming languages — C")
*/
if (!my_isspace(cs, lip->yyPeek()) &&
lip->get_cpp_ptr() != lip->get_cpp_buf() &&
!my_isspace(cs, *(lip->get_cpp_ptr() - 1)))
lip->cpp_inject(' ');
lip->in_comment=NO_COMMENT;
state=MY_LEX_START;
}
else
state=MY_LEX_CHAR; // Return '*'
break;
case MY_LEX_SET_VAR: // Check if ':='
if (lip->yyPeek() != '=')
{
state=MY_LEX_CHAR; // Return ':'
break;
}
lip->yySkip();
return (SET_VAR);
case MY_LEX_SEMICOLON: // optional line terminator
state= MY_LEX_CHAR; // Return ';'
break;
case MY_LEX_EOL:
if (lip->eof())
{
lip->yyUnget(); // Reject the last '\0'
lip->set_echo(FALSE);
lip->yySkip();
lip->set_echo(TRUE);
/* Unbalanced comments with a missing '*' '/' are a syntax error */
if (lip->in_comment != NO_COMMENT)
return (ABORT_SYM);
lip->next_state=MY_LEX_END; // Mark for next loop
return(END_OF_INPUT);
}
state=MY_LEX_CHAR;
break;
case MY_LEX_END:
lip->next_state=MY_LEX_END;
return(0); // We found end of input last time
/* Actually real shouldn't start with . but allow them anyhow */
case MY_LEX_REAL_OR_POINT:
if (my_isdigit(cs,lip->yyPeek()))
state = MY_LEX_REAL; // Real
else
{
state= MY_LEX_IDENT_SEP; // return '.'
lip->yyUnget(); // Put back '.'
}
break;
case MY_LEX_USER_END: // end '@' of user@hostname
switch (state_map[lip->yyPeek()]) {
case MY_LEX_STRING:
case MY_LEX_USER_VARIABLE_DELIMITER:
case MY_LEX_STRING_OR_DELIMITER:
break;
case MY_LEX_USER_END:
lip->next_state=MY_LEX_SYSTEM_VAR;
break;
default:
lip->next_state=MY_LEX_HOSTNAME;
break;
}
yylval->lex_str.str=(char*) lip->get_ptr();
yylval->lex_str.length=1;
return((int) '@');
case MY_LEX_HOSTNAME: // end '@' of user@hostname
for (c=lip->yyGet() ;
my_isalnum(cs,c) || c == '.' || c == '_' || c == '$';
c= lip->yyGet()) ;
yylval->lex_str=get_token(lip, 0, lip->yyLength());
return(LEX_HOSTNAME);
case MY_LEX_SYSTEM_VAR:
yylval->lex_str.str=(char*) lip->get_ptr();
yylval->lex_str.length=1;
lip->yySkip(); // Skip '@'
lip->next_state= (state_map[lip->yyPeek()] ==
MY_LEX_USER_VARIABLE_DELIMITER ?
MY_LEX_START :
MY_LEX_IDENT_OR_KEYWORD);
return((int) '@');
case MY_LEX_IDENT_OR_KEYWORD:
/*
We come here when we have found two '@' in a row.
We should now be able to handle:
[(global | local | session) .]variable_name
*/
for (result_state= 0; ident_map[c= lip->yyGet()]; result_state|= c) ;
/* If there were non-ASCII characters, mark that we must convert */
result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
if (c == '.')
lip->next_state=MY_LEX_IDENT_SEP;
length= lip->yyLength();
if (length == 0)
return(ABORT_SYM); // Names must be nonempty.
if ((tokval= find_keyword(lip, length,0)))
{
lip->yyUnget(); // Put back 'c'
return(tokval); // Was keyword
}
yylval->lex_str=get_token(lip, 0, length);
lip->body_utf8_append(lip->m_cpp_text_start);
lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
lip->m_cpp_text_end);
return(result_state);
}
}
}