in thrift/compiler/parse/lexer.cc [366:495]
parser::symbol_type lexer::get_next_token() {
lineno_ = std::max(lineno_, 1);
if (lex_whitespace_or_comment() == comment_lex_result::doc_comment) {
return parser::make_tok_inline_doc(token_text(), make_location());
}
start_token();
char c = *ptr_++;
if (is_letter(c) || c == '_') {
// Lex an identifier or a keyword.
while (is_identifier_char(*ptr_)) {
++ptr_;
}
auto text = token_text();
auto it = keywords.find(text);
if (it != keywords.end()) {
return it->second(make_location());
}
return parser::make_tok_identifier(text, make_location());
} else if (c == '.') {
if (const char* p = lex_float_constant(ptr_)) {
ptr_ = p;
return make_float_constant();
}
} else if (is_dec_digit(c)) {
if (c == '0') {
switch (*ptr_) {
case 'x':
case 'X':
// Lex a hexadecimal constant.
if (!is_hex_digit(ptr_[1])) {
return unexpected_token();
}
ptr_ += 2;
while (is_hex_digit(*ptr_)) {
++ptr_;
}
return make_int_constant(2, 16);
case 'b':
case 'B':
// Lex a binary constant.
if (!is_bin_digit(ptr_[1])) {
return unexpected_token();
}
ptr_ += 2;
while (is_bin_digit(*ptr_)) {
++ptr_;
}
return make_int_constant(2, 2);
}
}
// Lex a decimal, octal or floating-point constant.
ptr_ = lex_dec_constant(ptr_, ptr_);
switch (*ptr_) {
case '.':
if (const char* p = lex_float_constant(ptr_ + 1)) {
ptr_ = p;
return make_float_constant();
}
break;
case 'e':
case 'E':
if (const char* p = lex_float_exponent(ptr_)) {
ptr_ = p;
return make_float_constant();
}
break;
}
if (c != '0') {
// Lex a decimal constant.
return make_int_constant(0, 10);
}
// Lex an octal constant.
const char* p = std::find_if(
token_start_, ptr_, [](char c) { return !is_oct_digit(c); });
if (p != ptr_) {
return unexpected_token();
}
return make_int_constant(1, 8);
} else if (c == '"' || c == '\'') {
// Lex a string literal.
const char* p = std::find(ptr_, end(), c);
if (*p) {
ptr_ = p + 1;
update_line();
return parser::make_tok_literal(
std::string(token_start_ + 1, p), make_location());
}
} else if (!c && ptr_ > end()) {
--ptr_; // Put '\0' back in case get_next_token() is called again.
return parser::make_tok_eof(make_location());
}
// Lex operators and punctuators.
switch (c) {
case '{':
return parser::make_tok_char_bracket_curly_l(make_location());
case '}':
return parser::make_tok_char_bracket_curly_r(make_location());
case ',':
return parser::make_tok_char_comma(make_location());
case ';':
return parser::make_tok_char_semicolon(make_location());
case '=':
return parser::make_tok_char_equal(make_location());
case '[':
return parser::make_tok_char_bracket_square_l(make_location());
case ']':
return parser::make_tok_char_bracket_square_r(make_location());
case ':':
return parser::make_tok_char_colon(make_location());
case '(':
return parser::make_tok_char_bracket_round_l(make_location());
case ')':
return parser::make_tok_char_bracket_round_r(make_location());
case '<':
return parser::make_tok_char_bracket_angle_l(make_location());
case '>':
return parser::make_tok_char_bracket_angle_r(make_location());
case '@':
return parser::make_tok_char_at_sign(make_location());
case '-':
return parser::make_tok_char_minus(make_location());
case '+':
return parser::make_tok_char_plus(make_location());
}
return unexpected_token();
}