inline Token TokenizeOnce()

in src/parser/tokenizer.h [336:548]


  inline Token TokenizeOnce() {
    int line = this->line;
    int col = this->col;
    auto next = Peek();
    DLOG(INFO) << "tvm::parser::TokenizeOnce: next=" << next;
    if (next == '\n') {
      auto token = NewToken(TokenType::kNewline);
      Next();
      return token;
    } else if (next == '\r') {
      Next();
      if (More() && Peek() == '\n') {
        auto token = NewToken(TokenType::kNewline);
        return token;
      } else {
        auto span = SpanFrom(line, col);
        this->diag_ctx.EmitFatal(
            Diagnostic::Error(span)
            << "\\r carriage returns must be followed by a \\n in the TVM text format");
        return Token();
      }
    } else if (next == '"') {
      // TODO(@jroesch): Properly tokenize escape sequences in strings.
      // see https://github.com/apache/tvm/issues/6153.
      Next();
      std::stringstream string_content;
      while (More() && Peek() != '"') {
        string_content << Next();
      }
      Next();
      return NewToken(TokenType::kStringLiteral, tvm::String(string_content.str()));
    } else if (IsWhitespace(next)) {
      auto token = NewToken(TokenType::kWhitespace);
      Next();
      return token;
    } else if (next == '-') {
      int negs = 0;
      while (More() && Peek() == '-') {
        Next();
        negs++;
      }
      bool is_neg = negs % 2 == 1;
      if (More() && IsDigit(Peek())) {
        return ParseNumber(!is_neg);
      } else if (More() && MatchString("inff")) {
        return ParseNumber(!is_neg, true, "inff");
      } else {
        // If there isn't a number right after either,
        // this is really slow for lexing, should replace
        // with multi-token return or something.
        pos = pos - (negs - 1);
        return NewToken(TokenType::kMinus);
      }
    } else if (IsDigit(next)) {
      return ParseNumber(true);
    } else if (MatchString("inff")) {
      return ParseNumber(true, true, "inff");
    } else if (next == '.') {
      auto token = NewToken(TokenType::kPeriod);
      Next();
      return token;
    } else if (next == ',') {
      auto token = NewToken(TokenType::kComma);
      Next();
      return token;
    } else if (next == '=') {
      auto token = NewToken(TokenType::kEqual);
      Next();
      return token;
    } else if (next == ';') {
      auto token = NewToken(TokenType::kSemicolon);
      Next();
      return token;
    } else if (next == ':') {
      auto token = NewToken(TokenType::kColon);
      Next();
      return token;
    } else if (next == '(') {
      auto token = NewToken(TokenType::kOpenParen);
      Next();
      return token;
    } else if (next == ')') {
      auto token = NewToken(TokenType::kCloseParen);
      Next();
      return token;
    } else if (next == '+') {
      auto token = NewToken(TokenType::kPlus);
      Next();
      return token;
    } else if (next == '*') {
      auto token = NewToken(TokenType::kStar);
      Next();
      return token;
    } else if (next == '<') {
      auto token = NewToken(TokenType::kLAngle);
      Next();
      return token;
    } else if (next == '>') {
      auto token = NewToken(TokenType::kRAngle);
      Next();
      return token;
    } else if (next == '{') {
      auto token = NewToken(TokenType::kLCurly);
      Next();
      return token;
    } else if (next == '}') {
      auto token = NewToken(TokenType::kRCurly);
      Next();
      return token;
    } else if (next == '[') {
      auto token = NewToken(TokenType::kLSquare);
      Next();
      return token;
    } else if (next == ']') {
      auto token = NewToken(TokenType::kRSquare);
      Next();
      return token;
    } else if (next == '!') {
      auto token = NewToken(TokenType::kBang);
      Next();
      return token;
    } else if (next == '@') {
      auto token = NewToken(TokenType::kAt);
      Next();
      return token;
    } else if (next == '?') {
      auto token = NewToken(TokenType::kQuestion);
      Next();
      return token;
    } else if (MatchString("meta")) {
      return TokenizeMetaRef();
    } else if (next == '#') {
      return TokenizeAttr();
    } else if (next == '%') {
      auto token = NewToken(TokenType::kPercent);
      Next();

      std::stringstream number;
      while (More() && IsDigit(Peek())) {
        number << Next();
      }

      auto number_str = number.str();
      if (number_str.size()) {
        auto num_tok = ParseNumber(true, false, number_str);
        auto span = SpanFrom(token->span->line, token->span->column);
        token = Token(span, TokenType::kGraph, num_tok->data);
      }

      return token;
    } else if (next == '/') {
      Next();
      if (Peek() == '/') {
        auto token = NewToken(TokenType::kLineComment);
        // Consume the /
        Next();
        std::stringstream comment;
        while (More() && Peek() != '\n') {
          comment << Next();
        }
        token->data = tvm::String(comment.str());
        return token;
      } else if (Peek() == '*') {
        // Eat the first /* pair before entering the state machine.
        Next();
        std::string comment;
        MatchComment(&comment);
        auto token = NewToken(TokenType::kComment, tvm::String(comment));
        return token;
      } else {
        return NewToken(TokenType::kDivision);
      }
    } else if (IsIdentLetter(next)) {
      std::stringstream ss;
      // Due the below code we need to patch
      // the line/col info to the start of
      // token.
      int line = this->line;
      int col = this->col;

      while (More() && IsIdent(Peek())) {
        ss << Next();
      }

      std::string keyword = ss.str();
      auto it = KEYWORD_TABLE.find(keyword);

      TokenType token_type;
      if (it != KEYWORD_TABLE.end()) {
        token_type = it->second;

        if (token_type == TokenType::kMatch) {
          if (More() && Peek() == '?') {
            Next();
            token_type = TokenType::kPartialMatch;
          }
        }
      } else {
        token_type = TokenType::kIdentifier;
      }

      auto span = SpanFrom(line, col);
      return Token(span, token_type, tvm::String(ss.str()));
    } else {
      std::stringstream ss;
      while (More() && !IsWhitespace(Peek())) {
        ss << Next();
      }
      auto token = NewToken(TokenType::kUnknown);
      token->data = tvm::String(ss.str());
      return token;
    }
  }