TokKind HloLexer::LexToken()

in tensorflow/tensorflow/compiler/xla/service/hlo_lexer.cc [93:222]


TokKind HloLexer::LexToken() {
  while (true) {
    token_state_.token_start = current_ptr_;

    int current_char = GetNextChar();
    switch (current_char) {
      default:
        // [a-zA-Z_]
        if (absl::ascii_isalpha(static_cast<unsigned char>(current_char)) ||
            current_char == '_') {
          return LexIdentifier();
        }
        return TokKind::kError;
      case kEOF:
        // Hit the end of the input buffer.
        return TokKind::kEof;
      case kError:
        // Hit an invalid character in the input buffer.
        return TokKind::kError;
      case ' ':
      case '\t':
      case '\n':
      case '\r':
        // Ignore whitespace.
        continue;
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':
      case '-':
        if (current_char == '-' && PeekCurrentChar() == '>') {
          current_ptr_++;
          return TokKind::kArrow;
        }
        return LexNumberOrPattern();
      case '=':
        return TokKind::kEqual;
      case '<':
        if (current_char == '<' && PeekCurrentChar() == '=') {
          current_ptr_++;
          return TokKind::kLeq;
        }
        return TokKind::kError;
      case ',':
        return TokKind::kComma;
      case '%':
        return LexPercent();
      case ':':
        return TokKind::kColon;
      case '*':
        return TokKind::kAsterisk;
      case '[':
        return TokKind::kLsquare;
      case ']':
        return TokKind::kRsquare;
      case '{':
        return TokKind::kLbrace;
      case '}':
        return TokKind::kRbrace;
      case '(':
        return TokKind::kLparen;
      case ')':
        return TokKind::kRparen;
      case '/': {
        if (PeekCurrentChar() == '*') {
          // This is the start of a /*...*/ delimited comment. Save the current
          // location in case the comment is unterminated so the error message
          // will point to the beginning of the comment.
          const char* comment_start = current_ptr_;
          current_ptr_++;
          // Advance until '*/' is found.
          while (true) {
            int current = GetNextChar();
            if (current == '*' && PeekCurrentChar() == '/') {
              // End of comment.
              current_ptr_++;
              break;
            }
            if (current == kEOF) {
              // Unterminated comment.
              current_ptr_ = comment_start;
              return TokKind::kError;
            }
            if (current == kError) {
              return TokKind::kError;
            }
          }
          // Return no token for the comment. Keep lexing.
          continue;
        } else if (PeekCurrentChar() == '/') {
          // This is the start of a '//' delimited comment. Throw away
          // everything until end of line or file. The end-of-line character(s)
          // are left unlexed in the buffer which is harmless because these are
          // skipped later by the lexer. This approach enables support for
          // different end-of-line encodings.
          while (true) {
            int current = PeekCurrentChar();
            if (current == kEOF || current == '\n' || current == '\r') {
              break;
            }
            if (current == kError) {
              return TokKind::kError;
            }
            current_ptr_++;
          }
          continue;
        }
        // A lone '/' is an error.
        return TokKind::kError;
      }
      case '.':
        if (PeekCurrentChar() == '.') {
          current_ptr_++;
          if (PeekCurrentChar() == '.') {
            current_ptr_++;
            return TokKind::kDots;
          }
        }
        return TokKind::kError;
      case '"':
        return LexString();
    }
  }
}