in tensorflow/tensorflow/compiler/xla/service/hlo_lexer.cc [93:222]
TokKind HloLexer::LexToken() {
while (true) {
token_state_.token_start = current_ptr_;
int current_char = GetNextChar();
switch (current_char) {
default:
// [a-zA-Z_]
if (absl::ascii_isalpha(static_cast<unsigned char>(current_char)) ||
current_char == '_') {
return LexIdentifier();
}
return TokKind::kError;
case kEOF:
// Hit the end of the input buffer.
return TokKind::kEof;
case kError:
// Hit an invalid character in the input buffer.
return TokKind::kError;
case ' ':
case '\t':
case '\n':
case '\r':
// Ignore whitespace.
continue;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
if (current_char == '-' && PeekCurrentChar() == '>') {
current_ptr_++;
return TokKind::kArrow;
}
return LexNumberOrPattern();
case '=':
return TokKind::kEqual;
case '<':
if (current_char == '<' && PeekCurrentChar() == '=') {
current_ptr_++;
return TokKind::kLeq;
}
return TokKind::kError;
case ',':
return TokKind::kComma;
case '%':
return LexPercent();
case ':':
return TokKind::kColon;
case '*':
return TokKind::kAsterisk;
case '[':
return TokKind::kLsquare;
case ']':
return TokKind::kRsquare;
case '{':
return TokKind::kLbrace;
case '}':
return TokKind::kRbrace;
case '(':
return TokKind::kLparen;
case ')':
return TokKind::kRparen;
case '/': {
if (PeekCurrentChar() == '*') {
// This is the start of a /*...*/ delimited comment. Save the current
// location in case the comment is unterminated so the error message
// will point to the beginning of the comment.
const char* comment_start = current_ptr_;
current_ptr_++;
// Advance until '*/' is found.
while (true) {
int current = GetNextChar();
if (current == '*' && PeekCurrentChar() == '/') {
// End of comment.
current_ptr_++;
break;
}
if (current == kEOF) {
// Unterminated comment.
current_ptr_ = comment_start;
return TokKind::kError;
}
if (current == kError) {
return TokKind::kError;
}
}
// Return no token for the comment. Keep lexing.
continue;
} else if (PeekCurrentChar() == '/') {
// This is the start of a '//' delimited comment. Throw away
// everything until end of line or file. The end-of-line character(s)
// are left unlexed in the buffer which is harmless because these are
// skipped later by the lexer. This approach enables support for
// different end-of-line encodings.
while (true) {
int current = PeekCurrentChar();
if (current == kEOF || current == '\n' || current == '\r') {
break;
}
if (current == kError) {
return TokKind::kError;
}
current_ptr_++;
}
continue;
}
// A lone '/' is an error.
return TokKind::kError;
}
case '.':
if (PeekCurrentChar() == '.') {
current_ptr_++;
if (PeekCurrentChar() == '.') {
current_ptr_++;
return TokKind::kDots;
}
}
return TokKind::kError;
case '"':
return LexString();
}
}
}