in mysqlshdk/libs/db/mysqlx/tokenizer.cc [300:503]
void Tokenizer::get_tokens() {
bool arrow_last = false;
bool inside_arrow = false;
for (size_t i = 0; i < _input.size(); ++i) {
char c = _input[i];
if (std::isspace(c)) {
// do nothing
continue;
} else if (std::isdigit(c)) {
// numerical literal
const int start = i;
// floating grammar is
// float -> int '.' (int | (int expo[sign] int))
// int -> digit +
// expo -> 'E' | 'e'
// sign -> '-' | '+'
while (i < _input.size() && std::isdigit(c = _input[i])) ++i;
if (i < _input.size() && _input[i] == '.') {
++i;
while (i < _input.size() && std::isdigit(_input[i])) ++i;
if (i < _input.size() && std::toupper(_input[i]) == 'E') {
++i;
if (i < _input.size() && (((c = _input[i]) == '-') || (c == '+')))
++i;
size_t j = i;
while (i < _input.size() && std::isdigit(_input[i])) i++;
if (i == j)
throw Parser_error(
"Missing exponential value for floating point at position " +
std::to_string(start));
}
_tokens.push_back(Token(Token::Type::LNUM,
std::string(_input, start, i - start), start));
} else {
_tokens.push_back(Token(Token::Type::LINTEGER,
std::string(_input, start, i - start), start));
}
if (i < _input.size()) --i;
} else if (!std::isalpha(c) && c != '_') {
// # non-identifier, e.g. operator or quoted literal
if (c == '?') {
_tokens.push_back(
Token(Token::Type::PLACEHOLDER, std::string(1, c), i));
} else if (c == '+') {
_tokens.push_back(Token(Token::Type::PLUS, std::string(1, c), i));
} else if (c == '-') {
if (!arrow_last && next_char_is(i, '>')) {
if (next_char_is(i + 1, '>')) {
_tokens.push_back(Token(Token::Type::TWOHEADARROW, "->>", i));
i += 2;
} else {
_tokens.push_back(Token(Token::Type::ARROW, "->", i++));
}
arrow_last = true;
continue;
} else {
_tokens.push_back(Token(Token::Type::MINUS, std::string(1, c), i));
}
} else if (c == '*') {
if (next_char_is(i, '*')) {
_tokens.push_back(
Token(Token::Type::DOUBLESTAR, std::string("**"), i++));
} else {
_tokens.push_back(Token(Token::Type::MUL, std::string(1, c), i));
}
} else if (c == '/') {
_tokens.push_back(Token(Token::Type::DIV, std::string(1, c), i));
} else if (c == '$') {
_tokens.push_back(Token(Token::Type::DOLLAR, std::string(1, c), i));
} else if (c == '%') {
_tokens.push_back(Token(Token::Type::MOD, std::string(1, c), i));
} else if (c == '=') {
_tokens.push_back(Token(Token::Type::EQ, std::string(1, c), i));
} else if (c == '&') {
_tokens.push_back(Token(Token::Type::BITAND, std::string(1, c), i));
} else if (c == '|') {
_tokens.push_back(Token(Token::Type::BITOR, std::string(1, c), i));
} else if (c == '(') {
_tokens.push_back(Token(Token::Type::LPAREN, std::string(1, c), i));
} else if (c == ')') {
_tokens.push_back(Token(Token::Type::RPAREN, std::string(1, c), i));
} else if (c == '[') {
_tokens.push_back(Token(Token::Type::LSQBRACKET, std::string(1, c), i));
} else if (c == ']') {
_tokens.push_back(Token(Token::Type::RSQBRACKET, std::string(1, c), i));
} else if (c == '{') {
_tokens.push_back(Token(Token::Type::LCURLY, std::string(1, c), i));
} else if (c == '}') {
_tokens.push_back(Token(Token::Type::RCURLY, std::string(1, c), i));
} else if (c == '~') {
_tokens.push_back(Token(Token::Type::NEG, std::string(1, c), i));
} else if (c == ',') {
_tokens.push_back(Token(Token::Type::COMMA, std::string(1, c), i));
} else if (c == ':') {
_tokens.push_back(Token(Token::Type::COLON, std::string(1, c), i));
} else if (c == '!') {
if (next_char_is(i, '=')) {
_tokens.push_back(Token(Token::Type::NE, std::string("!="), i++));
} else {
_tokens.push_back(Token(Token::Type::BANG, std::string(1, c), i));
}
} else if (c == '<') {
if (next_char_is(i, '<')) {
_tokens.push_back(Token(Token::Type::LSHIFT, std::string("<<"), i++));
} else if (next_char_is(i, '=')) {
_tokens.push_back(Token(Token::Type::LE, std::string("<="), i++));
} else if (next_char_is(i, '>')) {
_tokens.push_back(Token(Token::Type::NE, std::string("!="), i++));
} else {
_tokens.push_back(Token(Token::Type::LT, std::string("<"), i));
}
} else if (c == '>') {
if (next_char_is(i, '>')) {
_tokens.push_back(Token(Token::Type::RSHIFT, std::string(">>"), i++));
} else if (next_char_is(i, '=')) {
_tokens.push_back(Token(Token::Type::GE, std::string(">="), i++));
} else {
_tokens.push_back(Token(Token::Type::GT, std::string(1, c), i));
}
} else if (c == '.') {
if ((i + 1) < _input.size() && std::isdigit(_input[i + 1])) {
const size_t start = i;
++i;
// floating grammar is
// float -> '.' (int | (int expo[sign] int))
// nint->digit +
// expo -> 'E' | 'e'
// sign -> '-' | '+'
while (i < _input.size() && std::isdigit(_input[i])) ++i;
if (i < _input.size() && std::toupper(_input[i]) == 'E') {
++i;
if (i < _input.size() && (((c = _input[i]) == '+') || (c == '-')))
++i;
size_t j = i;
while (i < _input.size() && std::isdigit(_input[i])) ++i;
if (i == j)
throw Parser_error(
"Missing exponential value for floating point at position " +
std::to_string(start));
}
_tokens.push_back(Token(
Token::Type::LNUM, std::string(_input, start, i - start), start));
if (i < _input.size()) --i;
} else {
_tokens.push_back(Token(Token::Type::DOT, std::string(1, c), i));
}
} else if (c == '\'' && arrow_last) {
_tokens.push_back(Token(Token::Type::QUOTE, "'", i));
if (!inside_arrow)
inside_arrow = true;
else {
arrow_last = false;
inside_arrow = false;
}
} else if (c == '"' || c == '\'' || c == '`') {
char quote_char = c;
std::string val;
size_t start = ++i;
while (i < _input.size()) {
c = _input[i];
if ((c == quote_char) && ((i + 1) < _input.size()) &&
(_input[i + 1] != quote_char)) {
// break if we have a quote char that's not double
break;
} else if ((c == quote_char) || (c == '\\' && quote_char != '`')) {
// && quote_char != '`'
// this quote char has to be doubled
if ((i + 1) >= _input.size()) break;
val.append(1, _input[++i]);
} else {
val.append(1, c);
}
++i;
}
if ((i >= _input.size()) && (_input[i] != quote_char)) {
throw Parser_error(
"Unterminated quoted string starting at position " +
std::to_string(start));
}
if (quote_char == '`') {
_tokens.push_back(Token(Token::Type::IDENT, val, start));
} else {
_tokens.push_back(Token(Token::Type::LSTRING, val, start));
}
} else {
throw Parser_error("Unknown character at position " +
std::to_string(i));
}
} else {
size_t start = i;
while (i < _input.size() && (std::isalnum(_input[i]) || _input[i] == '_'))
++i;
std::string val(_input, start, i - start);
Maps::reserved_words_t::const_iterator it = map.reserved_words.find(val);
if (it != map.reserved_words.end()) {
_tokens.push_back(Token(it->second, val, start));
} else {
_tokens.push_back(Token(Token::Type::IDENT, val, start));
}
--i;
}
}
}