in enterprise/el.lexer/src/org/netbeans/modules/el/lexer/ELLexer.java [146:841]
public Token<ELTokenId> nextToken() {
int actChar;
while (true) {
actChar = input.read();
if (actChar == EOF) {
if(input.readLengthEOF() == 1) {
return null; //just EOL is read
} else {
//there is something else in the buffer except EOL
//we will return last token now
input.backup(1); //backup the EOL, we will return null in next nextToken() call
break;
}
}
switch (lexerState) { // switch by the current internal state
case INIT:
switch (actChar) {
case '"':
lexerState = ISI_STRING;
break;
case '\'':
lexerState = ISI_CHAR;
break;
case '/':
return token(ELTokenId.DIV);
case '=':
lexerState = ISA_EQ;
break;
case '>':
lexerState = ISA_GT;
break;
case '<':
lexerState = ISA_LT;
break;
case '+':
lexerState = ISA_PLUS;
break;
case '-':
lexerState = ISA_MINUS;
break;
case '*':
return token(ELTokenId.MUL);
case '|':
lexerState = ISA_PIPE;
break;
case '&':
lexerState = ISA_AND;
break;
case '[':
return token(ELTokenId.LBRACKET);
case ']':
return token(ELTokenId.RBRACKET);
case '%':
return token(ELTokenId.MOD);
case ':':
conditionalOperatorCount--;
return token(ELTokenId.COLON);
case ';':
return token(ELTokenId.SEMICOLON);
case '!':
lexerState = ISA_EXCLAMATION;
break;
case '(':
return token(ELTokenId.LPAREN);
case ')':
return token(ELTokenId.RPAREN);
case ',':
return token(ELTokenId.COMMA);
case '?':
conditionalOperatorCount++;
return token(ELTokenId.QUESTION);
case '\n':
return token(ELTokenId.EOL);
case '0':
lexerState = ISA_ZERO;
break;
case '.':
lexerState = ISA_DOT;
break;
case '\\':
// issue #242361 - coloring in case of EL inside quoted JSP attr_value
int nextChar = input.read();
input.backup(1);
if (nextChar == '"') {
return token(ELTokenId.STRING_LITERAL);
}
break;
default:
// Check for whitespace
if (Character.isWhitespace(actChar)) {
lexerState = ISI_WHITESPACE;
break;
}
// check whether it can be identifier
if (Character.isJavaIdentifierStart(actChar)){
lexerState = ISI_IDENTIFIER;
break;
}
// Check for digit
if (Character.isDigit(actChar)) {
lexerState = ISI_INT;
break;
}
return token(ELTokenId.INVALID_CHAR);
//break;
}
break;
case ISI_WHITESPACE: // white space
if (!Character.isWhitespace(actChar)) {
lexerState = INIT;
input.backup(1);
return token(ELTokenId.WHITESPACE);
}
break;
case ISI_BRACKET:
switch (actChar){
case ']':
lexerState = INIT;
input.backup(1);
return token(ELTokenId.IDENTIFIER);
case '"':
return token(ELTokenId.LBRACKET);
case '\'':
return token(ELTokenId.LBRACKET);
case '/':
return token(ELTokenId.DIV);
case '+':
lexerState = ISI_BRACKET_ISA_PLUS;
break;
case '-':
lexerState = ISI_BRACKET_ISA_MINUS;
break;
case '*':
return token(ELTokenId.MUL);
case '[':
return token(ELTokenId.LBRACKET);
case '%':
return token(ELTokenId.MOD);
case ':':
return token(ELTokenId.COLON);
case ';':
return token(ELTokenId.SEMICOLON);
case '(':
return token(ELTokenId.LPAREN);
case ')':
return token(ELTokenId.RPAREN);
case ',':
return token(ELTokenId.COMMA);
case '?':
return token(ELTokenId.QUESTION);
case '=':
lexerState = ISI_BRACKET_ISA_EQ;
break;
case '>':
lexerState = ISI_BRACKET_ISA_GT;
break;
case '<':
lexerState = ISI_BRACKET_ISA_LT;
break;
case '|':
lexerState = ISI_BRACKET_ISA_PIPE;
break;
case '&':
lexerState = ISI_BRACKET_ISA_AND;
break;
case '0':
lexerState = ISI_BRACKET_ISA_ZERO;
break;
case '.':
lexerState = ISI_BRACKET_ISA_DOT;
break;
default :
// Check for whitespace
if (Character.isWhitespace(actChar)) {
lexerState = ISI_BRACKET_A_WHITESPACE;
break;
}
if (Character.isJavaIdentifierStart(actChar)){
// - System.out.print(" state->ISI_IDENTIFIER ");
lexerState = ISI_BRACKET_A_IDENTIFIER;
break;
}
// Check for digit
if (Character.isDigit(actChar)) {
lexerState = ISI_BRACKET_ISI_INT;
break;
}
return token(ELTokenId.INVALID_CHAR);
//break;
}
break;
case ISI_BRACKET_A_WHITESPACE:
if (!Character.isWhitespace(actChar)) {
lexerState = ISI_BRACKET;
input.backup(1);
return token(ELTokenId.WHITESPACE);
}
break;
case ISI_BRACKET_ISA_EQ:
case ISA_EQ:
switch (actChar) {
case '=':
lexerState = INIT;
return token(ELTokenId.EQ_EQ);
default:
lexerState = (lexerState == ISI_BRACKET_ISA_EQ) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.EQ);
}
case ISI_BRACKET_ISA_MINUS:
case ISA_MINUS:
switch (actChar) {
case '>':
lexerState = INIT;
return token(ELTokenId.LAMBDA);
default:
lexerState = (lexerState == ISI_BRACKET_ISA_MINUS) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.MINUS);
}
case ISI_BRACKET_ISA_PLUS:
case ISA_PLUS:
switch (actChar) {
case '=':
lexerState = INIT;
return token(ELTokenId.CONCAT);
default:
lexerState = (lexerState == ISI_BRACKET_ISA_PLUS) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.PLUS);
}
case ISI_BRACKET_ISA_GT:
case ISA_GT:
switch (actChar) {
case '=':
lexerState = INIT;
return token(ELTokenId.GT_EQ);
default:
lexerState = (lexerState == ISI_BRACKET_ISA_GT) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.GT);
}
//break;
case ISI_BRACKET_ISA_LT:
case ISA_LT:
switch (actChar) {
case '=':
lexerState = INIT;
return token(ELTokenId.LT_EQ);
default:
lexerState = (lexerState == ISI_BRACKET_ISA_LT) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.LT);
}
//break;
case ISI_BRACKET_ISA_PIPE:
case ISA_PIPE:
switch (actChar) {
case '|':
lexerState = INIT;
return token(ELTokenId.OR_OR);
default:
lexerState = (lexerState == ISI_BRACKET_ISA_PIPE) ? ISI_BRACKET : INIT;
input.backup(1);
break;
}
break;
case ISI_BRACKET_ISA_AND:
case ISA_AND:
switch (actChar) {
case '&':
lexerState = INIT;
return token(ELTokenId.AND_AND);
default:
lexerState = (lexerState == ISI_BRACKET_ISA_AND) ? ISI_BRACKET : INIT;
input.backup(1);
break;
}
break;
case ISA_EXCLAMATION:
switch (actChar) {
case '=':
lexerState = INIT;
return token(ELTokenId.NOT_EQ);
default:
lexerState = INIT;
input.backup(1);
return token(ELTokenId.NOT);
}
case ISI_STRING:
switch (actChar) {
case '\\':
lexerState = ISI_STRING_A_BSLASH;
break;
case '\n':
lexerState = INIT;
input.backup(1);
return token(ELTokenId.STRING_LITERAL);
case '"': // NOI18N
lexerState = INIT;
return token(ELTokenId.STRING_LITERAL);
}
break;
case ISI_STRING_A_BSLASH:
lexerState = ISI_STRING;
break;
case ISI_BRACKET_A_IDENTIFIER:
case ISI_IDENTIFIER:
if (!(Character.isJavaIdentifierPart(actChar))){
switch (lexerState){
case ISI_IDENTIFIER:
lexerState = INIT; break;
case ISI_BRACKET_A_IDENTIFIER:
lexerState = ISI_BRACKET;
break;
}
input.backup(1);
Token<ELTokenId> tid = matchKeyword(input);
if (tid == null){
if (actChar == ':'){
if ( conditionalOperatorCount >0 ){
tid = token(ELTokenId.IDENTIFIER);
}
else {
tid = token(ELTokenId.TAG_LIB_PREFIX);
}
} else{
tid = token(ELTokenId.IDENTIFIER);
}
}
return tid;
}
break;
case ISI_CHAR:
switch (actChar) {
case '\\':
lexerState = ISI_CHAR_A_BSLASH;
break;
case '\n':
lexerState = INIT;
input.backup(1);
return token(ELTokenId.CHAR_LITERAL);
case '\'':
lexerState = INIT;
return token(ELTokenId.CHAR_LITERAL);
default :
char prevChar = input.readText().charAt(input.readLength() - 1);
if (prevChar != '\'' && prevChar != '\\'){
lexerState = ISI_CHAR_STRING;
}
}
break;
case ISI_CHAR_A_BSLASH:
switch (actChar) {
case '\'':
case '\\':
break;
default:
input.backup(1);
break;
}
lexerState = ISI_CHAR;
break;
case ISI_CHAR_STRING:
// - System.out.print(" ISI_CHAR_STRING (");
switch (actChar) {
case '\\':
// - System.out.print(" state->ISI_CHAR_A_BSLASH )");
lexerState = ISI_CHAR_STRING_A_BSLASH;
break;
case '\n':
lexerState = INIT;
input.backup(1);
return token(ELTokenId.STRING_LITERAL);
case '\'':
lexerState = INIT;
return token(ELTokenId.STRING_LITERAL);
}
// - System.out.print(")");
break;
case ISI_CHAR_STRING_A_BSLASH:
switch (actChar) {
case '\'':
case '\\':
break;
default:
input.backup(1);
break;
}
lexerState = ISI_CHAR_STRING;
break;
case ISI_BRACKET_ISA_ZERO:
case ISA_ZERO:
switch (actChar) {
case '.':
lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE : ISI_DOUBLE;
break;
case 'x':
case 'X':
lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_HEX : ISI_HEX;
break;
case 'l':
case 'L':
lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
return token(ELTokenId.LONG_LITERAL);
case 'f':
case 'F':
lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
return token(ELTokenId.FLOAT_LITERAL);
case 'd':
case 'D':
lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
return token(ELTokenId.DOUBLE_LITERAL);
case '8': // it's error to have '8' and '9' in octal number
case '9':
lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
return token(ELTokenId.INVALID_OCTAL_LITERAL);
case 'e':
case 'E':
lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE_EXP : ISI_DOUBLE_EXP;
break;
default:
if (Character.isDigit(actChar)) { // '8' and '9' already handled
lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_OCTAL : ISI_OCTAL;
break;
}
lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.INT_LITERAL);
}
break;
case ISI_BRACKET_ISI_INT:
case ISI_INT:
switch (actChar) {
case 'l':
case 'L':
lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
return token(ELTokenId.LONG_LITERAL);
case '.':
lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET_ISI_DOUBLE : ISI_DOUBLE;
break;
case 'f':
case 'F':
lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
return token(ELTokenId.FLOAT_LITERAL);
case 'd':
case 'D':
lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
return token(ELTokenId.DOUBLE_LITERAL);
case 'e':
case 'E':
lexerState = ISI_DOUBLE_EXP;
break;
default:
if (!(actChar >= '0' && actChar <= '9')) {
lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.INT_LITERAL);
}
}
break;
case ISI_BRACKET_ISI_OCTAL:
case ISI_OCTAL:
if (!(actChar >= '0' && actChar <= '7')) {
lexerState = (lexerState == ISI_BRACKET_ISI_OCTAL) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.OCTAL_LITERAL);
}
break;
case ISI_BRACKET_ISI_DOUBLE:
case ISI_DOUBLE:
switch (actChar) {
case 'f':
case 'F':
lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET : INIT;
return token(ELTokenId.FLOAT_LITERAL);
case 'd':
case 'D':
lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET : INIT;
return token(ELTokenId.DOUBLE_LITERAL);
case 'e':
case 'E':
lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET_ISI_DOUBLE_EXP : ISI_DOUBLE_EXP;
break;
default:
if (!((actChar >= '0' && actChar <= '9')
|| actChar == '.')) {
lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.DOUBLE_LITERAL);
}
}
break;
case ISI_DOUBLE_EXP:
case ISI_BRACKET_ISI_DOUBLE_EXP:
switch (actChar) {
case 'f':
case 'F':
lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET : INIT;
return token(ELTokenId.FLOAT_LITERAL);
case 'd':
case 'D':
lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET : INIT;
return token(ELTokenId.DOUBLE_LITERAL);
case '-':
case '+':
lexerState = ISI_DOULE_EXP_ISA_SIGN;
break;
default:
if (!Character.isDigit(actChar)){
//|| ch == '-' || ch == '+')) {
lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.DOUBLE_LITERAL);
}
}
break;
case ISI_DOULE_EXP_ISA_SIGN:
case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
if (!Character.isDigit(actChar)){
lexerState = (lexerState == ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN) ? ISI_BRACKET : INIT;
switch (actChar) {
case 'f':
case 'F':
return token(ELTokenId.FLOAT_LITERAL);
case 'd':
case 'D':
return token(ELTokenId.DOUBLE_LITERAL);
default:
input.backup(1);
return token(ELTokenId.DOUBLE_LITERAL);
}
}
break;
case ISI_BRACKET_ISI_HEX:
case ISI_HEX:
if (!((actChar >= 'a' && actChar <= 'f')
|| (actChar >= 'A' && actChar <= 'F')
|| Character.isDigit(actChar))
) {
lexerState = (lexerState == ISI_BRACKET_ISI_HEX) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.HEX_LITERAL);
}
break;
case ISI_BRACKET_ISA_DOT:
case ISA_DOT:
if (Character.isDigit(actChar)) {
lexerState = (lexerState == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET_ISI_DOUBLE : ISI_DOUBLE;
} else { // only single dot
lexerState = (lexerState == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET : INIT;
input.backup(1);
return token(ELTokenId.DOT);
}
break;
} // end of switch(state)
} //end of big while
/** At this stage there's no more text in the scanned buffer.
* Scanner first checks whether this is completely the last
* available buffer.
*/
switch (lexerState) {
case INIT:
if (input.readLength() == 0) {
return null;
}
break;
case ISI_WHITESPACE:
lexerState = INIT;
return token(ELTokenId.WHITESPACE);
case ISI_IDENTIFIER:
lexerState = INIT;
Token<ELTokenId> kwd = matchKeyword(input);
return (kwd != null) ? kwd : token(ELTokenId.IDENTIFIER);
case ISI_STRING:
case ISI_STRING_A_BSLASH:
return token(ELTokenId.STRING_LITERAL); // hold the state
case ISI_CHAR:
case ISI_CHAR_A_BSLASH:
return token(ELTokenId.CHAR_LITERAL);
case ISI_CHAR_STRING :
case ISI_CHAR_STRING_A_BSLASH :
return token(ELTokenId.STRING_LITERAL);
case ISA_ZERO:
case ISI_INT:
lexerState = INIT;
return token(ELTokenId.INT_LITERAL);
case ISI_OCTAL:
lexerState = INIT;
return token(ELTokenId.OCTAL_LITERAL);
case ISI_DOUBLE:
case ISI_DOUBLE_EXP:
case ISI_DOULE_EXP_ISA_SIGN:
case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
lexerState = INIT;
return token(ELTokenId.DOUBLE_LITERAL);
case ISI_HEX:
lexerState = INIT;
return token(ELTokenId.HEX_LITERAL);
case ISA_DOT:
lexerState = INIT;
return token(ELTokenId.DOT);
case ISA_EQ:
lexerState = INIT;
return token(ELTokenId.EQ);
case ISA_MINUS:
lexerState = INIT;
return token(ELTokenId.MINUS);
case ISA_PLUS:
lexerState = INIT;
return token(ELTokenId.PLUS);
case ISA_GT:
lexerState = INIT;
return token(ELTokenId.GT);
case ISA_LT:
lexerState = INIT;
return token(ELTokenId.LT);
case ISA_PIPE:
lexerState = INIT;
return token(ELTokenId.OR_OR);
case ISA_AND:
lexerState = INIT;
return token(ELTokenId.AND_AND);
case ISA_EXCLAMATION:
lexerState = INIT;
return token(ELTokenId.NOT);
case ISI_BRACKET:
case ISI_BRACKET_A_IDENTIFIER:
lexerState = INIT;
return token(ELTokenId.IDENTIFIER);
case ISI_BRACKET_A_WHITESPACE:
lexerState = ISI_BRACKET;
return token(ELTokenId.WHITESPACE);
case ISI_BRACKET_ISA_EQ:
lexerState = ISI_BRACKET;
return token(ELTokenId.EQ);
case ISI_BRACKET_ISA_MINUS:
lexerState = ISI_BRACKET;
return token(ELTokenId.MINUS);
case ISI_BRACKET_ISA_PLUS:
lexerState = ISI_BRACKET;
return token(ELTokenId.PLUS);
case ISI_BRACKET_ISA_GT:
lexerState = ISI_BRACKET;
return token(ELTokenId.GT_EQ);
case ISI_BRACKET_ISA_LT:
lexerState = ISI_BRACKET;
return token(ELTokenId.LT_EQ);
case ISI_BRACKET_ISA_AND:
lexerState = ISI_BRACKET;
return token(ELTokenId.AND_AND);
case ISI_BRACKET_ISA_PIPE:
lexerState = ISI_BRACKET;
return token(ELTokenId.OR_OR);
case ISI_BRACKET_ISA_DOT:
lexerState = ISI_BRACKET;
return token(ELTokenId.DOT);
case ISI_BRACKET_ISA_ZERO:
case ISI_BRACKET_ISI_INT:
lexerState = ISI_BRACKET;
return token(ELTokenId.INT_LITERAL);
}
return null;
}