in base/src/com/google/idea/blaze/base/lang/buildfile/lexer/BuildLexerBase.java [669:806]
private void tokenize() {
while (pos < buffer.length) {
if (tokenizeTwoChars()) {
pos += 2;
continue;
}
char c = buffer[pos];
pos++;
switch (c) {
case '{':
addToken(TokenKind.LBRACE, pos - 1, pos);
openParenStackDepth++;
break;
case '}':
addToken(TokenKind.RBRACE, pos - 1, pos);
popParen();
break;
case '(':
addToken(TokenKind.LPAREN, pos - 1, pos);
openParenStackDepth++;
break;
case ')':
addToken(TokenKind.RPAREN, pos - 1, pos);
popParen();
break;
case '[':
addToken(TokenKind.LBRACKET, pos - 1, pos);
openParenStackDepth++;
break;
case ']':
addToken(TokenKind.RBRACKET, pos - 1, pos);
popParen();
break;
case '>':
addToken(TokenKind.GREATER, pos - 1, pos);
break;
case '<':
addToken(TokenKind.LESS, pos - 1, pos);
break;
case ':':
addToken(TokenKind.COLON, pos - 1, pos);
break;
case ',':
addToken(TokenKind.COMMA, pos - 1, pos);
break;
case '+':
addToken(TokenKind.PLUS, pos - 1, pos);
break;
case '-':
addToken(TokenKind.MINUS, pos - 1, pos);
break;
case '|':
addToken(TokenKind.PIPE, pos - 1, pos);
break;
case '=':
addToken(TokenKind.EQUALS, pos - 1, pos);
break;
case '%':
addToken(TokenKind.PERCENT, pos - 1, pos);
break;
case '/':
if (lookaheadIs(0, '/') && lookaheadIs(1, '=')) {
addToken(TokenKind.SLASH_SLASH_EQUALS, pos - 1, pos + 2);
pos += 2;
} else if (lookaheadIs(0, '/')) {
addToken(TokenKind.SLASH_SLASH, pos - 1, pos + 1);
pos++;
} else {
// /= is handled by tokenizeTwoChars.
addToken(TokenKind.SLASH, pos - 1, pos);
}
break;
case ';':
addToken(TokenKind.SEMI, pos - 1, pos);
break;
case '.':
addToken(TokenKind.DOT, pos - 1, pos);
break;
case '*':
addToken(TokenKind.STAR, pos - 1, pos);
break;
case ' ':
case '\t':
case '\r':
addWhitespace();
break;
case '\\':
// Backslash character is valid only at the end of a line (or in a string)
if (lookaheadIs(0, '\n')) {
// treat end of line backslash and newline char as whitespace
// (they're ignored by the parser)
pos++;
addToken(TokenKind.WHITESPACE, pos - 2, pos, Character.toString(c));
} else {
addToken(TokenKind.ILLEGAL, pos - 1, pos, Character.toString(c));
}
break;
case '\n':
newline();
break;
case '#':
int oldPos = pos - 1;
while (pos < buffer.length) {
c = buffer[pos];
if (c == '\n') {
break;
} else {
pos++;
}
}
addToken(TokenKind.COMMENT, oldPos, pos, bufferSlice(oldPos, pos));
break;
case '\'':
case '\"':
addStringLiteral(c, false);
break;
default:
// detect raw strings, e.g. r"str"
if (c == 'r' && (lookaheadIs(0, '\'') || lookaheadIs(0, '\"'))) {
c = buffer[pos];
pos++;
addStringLiteral(c, true);
break;
}
if (Character.isDigit(c)) {
addInteger();
} else if (Character.isJavaIdentifierStart(c) && c != '$') {
addIdentifierOrKeyword();
} else {
// Some characters in Python are not recognized in Blaze syntax (e.g. '!')
addToken(TokenKind.ILLEGAL, pos - 1, pos, Character.toString(c));
error("invalid character: '" + c + "'");
}
break;
} // switch
} // while
}