parser::symbol_type lexer::get_next_token()

in thrift/compiler/parse/lexer.cc [366:495]


parser::symbol_type lexer::get_next_token() {
  lineno_ = std::max(lineno_, 1);
  if (lex_whitespace_or_comment() == comment_lex_result::doc_comment) {
    return parser::make_tok_inline_doc(token_text(), make_location());
  }

  start_token();

  char c = *ptr_++;
  if (is_letter(c) || c == '_') {
    // Lex an identifier or a keyword.
    while (is_identifier_char(*ptr_)) {
      ++ptr_;
    }
    auto text = token_text();
    auto it = keywords.find(text);
    if (it != keywords.end()) {
      return it->second(make_location());
    }
    return parser::make_tok_identifier(text, make_location());
  } else if (c == '.') {
    if (const char* p = lex_float_constant(ptr_)) {
      ptr_ = p;
      return make_float_constant();
    }
  } else if (is_dec_digit(c)) {
    if (c == '0') {
      switch (*ptr_) {
        case 'x':
        case 'X':
          // Lex a hexadecimal constant.
          if (!is_hex_digit(ptr_[1])) {
            return unexpected_token();
          }
          ptr_ += 2;
          while (is_hex_digit(*ptr_)) {
            ++ptr_;
          }
          return make_int_constant(2, 16);
        case 'b':
        case 'B':
          // Lex a binary constant.
          if (!is_bin_digit(ptr_[1])) {
            return unexpected_token();
          }
          ptr_ += 2;
          while (is_bin_digit(*ptr_)) {
            ++ptr_;
          }
          return make_int_constant(2, 2);
      }
    }
    // Lex a decimal, octal or floating-point constant.
    ptr_ = lex_dec_constant(ptr_, ptr_);
    switch (*ptr_) {
      case '.':
        if (const char* p = lex_float_constant(ptr_ + 1)) {
          ptr_ = p;
          return make_float_constant();
        }
        break;
      case 'e':
      case 'E':
        if (const char* p = lex_float_exponent(ptr_)) {
          ptr_ = p;
          return make_float_constant();
        }
        break;
    }
    if (c != '0') {
      // Lex a decimal constant.
      return make_int_constant(0, 10);
    }
    // Lex an octal constant.
    const char* p = std::find_if(
        token_start_, ptr_, [](char c) { return !is_oct_digit(c); });
    if (p != ptr_) {
      return unexpected_token();
    }
    return make_int_constant(1, 8);
  } else if (c == '"' || c == '\'') {
    // Lex a string literal.
    const char* p = std::find(ptr_, end(), c);
    if (*p) {
      ptr_ = p + 1;
      update_line();
      return parser::make_tok_literal(
          std::string(token_start_ + 1, p), make_location());
    }
  } else if (!c && ptr_ > end()) {
    --ptr_; // Put '\0' back in case get_next_token() is called again.
    return parser::make_tok_eof(make_location());
  }

  // Lex operators and punctuators.
  switch (c) {
    case '{':
      return parser::make_tok_char_bracket_curly_l(make_location());
    case '}':
      return parser::make_tok_char_bracket_curly_r(make_location());
    case ',':
      return parser::make_tok_char_comma(make_location());
    case ';':
      return parser::make_tok_char_semicolon(make_location());
    case '=':
      return parser::make_tok_char_equal(make_location());
    case '[':
      return parser::make_tok_char_bracket_square_l(make_location());
    case ']':
      return parser::make_tok_char_bracket_square_r(make_location());
    case ':':
      return parser::make_tok_char_colon(make_location());
    case '(':
      return parser::make_tok_char_bracket_round_l(make_location());
    case ')':
      return parser::make_tok_char_bracket_round_r(make_location());
    case '<':
      return parser::make_tok_char_bracket_angle_l(make_location());
    case '>':
      return parser::make_tok_char_bracket_angle_r(make_location());
    case '@':
      return parser::make_tok_char_at_sign(make_location());
    case '-':
      return parser::make_tok_char_minus(make_location());
    case '+':
      return parser::make_tok_char_plus(make_location());
  }

  return unexpected_token();
}