in src/parser/tokenizer.h [336:548]
inline Token TokenizeOnce() {
int line = this->line;
int col = this->col;
auto next = Peek();
DLOG(INFO) << "tvm::parser::TokenizeOnce: next=" << next;
if (next == '\n') {
auto token = NewToken(TokenType::kNewline);
Next();
return token;
} else if (next == '\r') {
Next();
if (More() && Peek() == '\n') {
auto token = NewToken(TokenType::kNewline);
return token;
} else {
auto span = SpanFrom(line, col);
this->diag_ctx.EmitFatal(
Diagnostic::Error(span)
<< "\\r carriage returns must be followed by a \\n in the TVM text format");
return Token();
}
} else if (next == '"') {
// TODO(@jroesch): Properly tokenize escape sequences in strings.
// see https://github.com/apache/tvm/issues/6153.
Next();
std::stringstream string_content;
while (More() && Peek() != '"') {
string_content << Next();
}
Next();
return NewToken(TokenType::kStringLiteral, tvm::String(string_content.str()));
} else if (IsWhitespace(next)) {
auto token = NewToken(TokenType::kWhitespace);
Next();
return token;
} else if (next == '-') {
int negs = 0;
while (More() && Peek() == '-') {
Next();
negs++;
}
bool is_neg = negs % 2 == 1;
if (More() && IsDigit(Peek())) {
return ParseNumber(!is_neg);
} else if (More() && MatchString("inff")) {
return ParseNumber(!is_neg, true, "inff");
} else {
// If there isn't a number right after either,
// this is really slow for lexing, should replace
// with multi-token return or something.
pos = pos - (negs - 1);
return NewToken(TokenType::kMinus);
}
} else if (IsDigit(next)) {
return ParseNumber(true);
} else if (MatchString("inff")) {
return ParseNumber(true, true, "inff");
} else if (next == '.') {
auto token = NewToken(TokenType::kPeriod);
Next();
return token;
} else if (next == ',') {
auto token = NewToken(TokenType::kComma);
Next();
return token;
} else if (next == '=') {
auto token = NewToken(TokenType::kEqual);
Next();
return token;
} else if (next == ';') {
auto token = NewToken(TokenType::kSemicolon);
Next();
return token;
} else if (next == ':') {
auto token = NewToken(TokenType::kColon);
Next();
return token;
} else if (next == '(') {
auto token = NewToken(TokenType::kOpenParen);
Next();
return token;
} else if (next == ')') {
auto token = NewToken(TokenType::kCloseParen);
Next();
return token;
} else if (next == '+') {
auto token = NewToken(TokenType::kPlus);
Next();
return token;
} else if (next == '*') {
auto token = NewToken(TokenType::kStar);
Next();
return token;
} else if (next == '<') {
auto token = NewToken(TokenType::kLAngle);
Next();
return token;
} else if (next == '>') {
auto token = NewToken(TokenType::kRAngle);
Next();
return token;
} else if (next == '{') {
auto token = NewToken(TokenType::kLCurly);
Next();
return token;
} else if (next == '}') {
auto token = NewToken(TokenType::kRCurly);
Next();
return token;
} else if (next == '[') {
auto token = NewToken(TokenType::kLSquare);
Next();
return token;
} else if (next == ']') {
auto token = NewToken(TokenType::kRSquare);
Next();
return token;
} else if (next == '!') {
auto token = NewToken(TokenType::kBang);
Next();
return token;
} else if (next == '@') {
auto token = NewToken(TokenType::kAt);
Next();
return token;
} else if (next == '?') {
auto token = NewToken(TokenType::kQuestion);
Next();
return token;
} else if (MatchString("meta")) {
return TokenizeMetaRef();
} else if (next == '#') {
return TokenizeAttr();
} else if (next == '%') {
auto token = NewToken(TokenType::kPercent);
Next();
std::stringstream number;
while (More() && IsDigit(Peek())) {
number << Next();
}
auto number_str = number.str();
if (number_str.size()) {
auto num_tok = ParseNumber(true, false, number_str);
auto span = SpanFrom(token->span->line, token->span->column);
token = Token(span, TokenType::kGraph, num_tok->data);
}
return token;
} else if (next == '/') {
Next();
if (Peek() == '/') {
auto token = NewToken(TokenType::kLineComment);
// Consume the /
Next();
std::stringstream comment;
while (More() && Peek() != '\n') {
comment << Next();
}
token->data = tvm::String(comment.str());
return token;
} else if (Peek() == '*') {
// Eat the first /* pair before entering the state machine.
Next();
std::string comment;
MatchComment(&comment);
auto token = NewToken(TokenType::kComment, tvm::String(comment));
return token;
} else {
return NewToken(TokenType::kDivision);
}
} else if (IsIdentLetter(next)) {
std::stringstream ss;
// Due the below code we need to patch
// the line/col info to the start of
// token.
int line = this->line;
int col = this->col;
while (More() && IsIdent(Peek())) {
ss << Next();
}
std::string keyword = ss.str();
auto it = KEYWORD_TABLE.find(keyword);
TokenType token_type;
if (it != KEYWORD_TABLE.end()) {
token_type = it->second;
if (token_type == TokenType::kMatch) {
if (More() && Peek() == '?') {
Next();
token_type = TokenType::kPartialMatch;
}
}
} else {
token_type = TokenType::kIdentifier;
}
auto span = SpanFrom(line, col);
return Token(span, token_type, tvm::String(ss.str()));
} else {
std::stringstream ss;
while (More() && !IsWhitespace(Peek())) {
ss << Next();
}
auto token = NewToken(TokenType::kUnknown);
token->data = tvm::String(ss.str());
return token;
}
}