in ide/html.lexer/src/org/netbeans/lib/html/lexer/HtmlLexer.java [492:1432]
public Token<HTMLTokenId> nextToken() {
int actChar;
main: while (true) {
actChar = input.read();
if (actChar == EOF) {
if(input.readLengthEOF() == 1) {
return null; //just EOL is read
} else {
//there is something else in the buffer except EOL
//we will return last token now
input.backup(1); //backup the EOL, we will return null in next nextToken() call
break;
}
}
//System.out.println("HTMLSyntax: parseToken tokenOffset=" + tokenOffset + ", actChar='" + actChar + "', offset=" + offset + ", state=" + getStateName(state) +
// ", stopOffset=" + stopOffset + ", lastBuffer=" + lastBuffer);
switch( lexerState ) {
case INIT: // DONE
switch( actChar ) {
case '<':
lexerState = ISA_LT;
continue main;
case '&':
lexerState = ISA_REF;
lexerSubState = ISI_TEXT;
continue main;
default:
lexerState = ISI_TEXT;
break;
}
//fall through to ISI_TEXT
case ISI_TEXT: // DONE
switch( actChar ) {
case '<':
case '&':
lexerState = INIT;
input.backup(1);
if(input.readLength() > 0) { //is there any text before & or < ???
return token(HTMLTokenId.TEXT);
}
break;
}
//custom EL support
delimiters: for(byte delimiterIndex = 0; delimiterIndex < customELQuery.getOpenDelimiters().length; delimiterIndex++ ) {
String openDelimiter = customELQuery.getOpenDelimiters()[delimiterIndex];
if(openDelimiter == null) {
continue;
}
int alreadyRead = input.readLength();
char read = (char)actChar; //first char is already read
for(int i = 0; i < openDelimiter.length(); i++) {
char delimChar = openDelimiter.charAt(i);
if(read != delimChar) {
//no match
input.backup(input.readLengthEOF() - alreadyRead); //backup text
continue delimiters; //and try next one
}
if((i+1) < openDelimiter.length()) {
//will be next loop, read char
read = (char)input.read();
}
}
//we've found an open delimiter
//check if the there was already something read before checking the delimiter,
//if so then return it and re-run this step again so then we can return
//clean token for the delimiter
if(input.readLength() > openDelimiter.length()) {
input.backup(openDelimiter.length());
return token(HTMLTokenId.TEXT);
} else {
//return the open symbol token and switch to "in el" state
lexerState = ISI_EL;
customELIndex = (byte)(delimiterIndex + 1); //0 is reserved for "no delimiter", 1 means delimiter with index 0
//save the provider's index in the delimiter token's property so once can recognize what should be
//the delimiters' content if it is empty
//TODO "contentMimetype" INTO API???
return token(HTMLTokenId.EL_OPEN_DELIMITER,
new HtmlTokenPropertyProvider(EL_EXPRESSION_CONTENT_MIMETYPE_TOKEN_PROPERTY_KEY, customELQuery.getMimeTypes()[delimiterIndex]));
}
}
break;
case ISI_EL:
delimiters: for(byte delimiterIndex = 0; delimiterIndex < customELQuery.getOpenDelimiters().length; delimiterIndex++ ) {
String closeDelimiter = customELQuery.getCloseDelimiters()[delimiterIndex];
if(closeDelimiter == null) {
continue;
}
int alreadyRead = input.readLength();
char read = (char)actChar; //first char is already read
for(int i = 0; i < closeDelimiter.length(); i++) {
char delimChar = closeDelimiter.charAt(i);
if(read != delimChar) {
//no match
input.backup(input.readLength() - alreadyRead); //backup text
continue delimiters; //and try next one
}
if((i+1) < closeDelimiter.length()) {
//will be next loop, read char
read = (char)input.read();
}
}
//we've found a close delimiter
//check if the there was already something read before checking the delimiter,
//if so then return it and re-run this step again so then we can return
//clean token for the delimiter
if(input.readLength() > closeDelimiter.length()) {
input.backup(closeDelimiter.length());
//save the provider's index in the token's property so we can set the corresponding embdding in HTMLTokenId.language()
return token(HTMLTokenId.EL_CONTENT, new HtmlTokenPropertyProvider(EL_CONTENT_PROVIDER_INDEX, (byte)(customELIndex - 1)));
} else {
//return the open symbol token and switch to "in el" state
lexerState = INIT;
customELIndex = INIT;
return token(HTMLTokenId.EL_CLOSE_DELIMITER);
}
}
break;
case ISI_ERROR: // DONE
lexerState = INIT;
tag = null;
return token(HTMLTokenId.ERROR);
case ISA_LT: // PENDING other transitions - e.g '<?'
if( isAZ( actChar ) ) { // <'a..Z'
lexerState = ISI_TAG;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
return token(HTMLTokenId.TAG_OPEN_SYMBOL);
}
break;
}
switch( actChar ) {
case '/': // ETAGO - </
lexerState = ISA_SLASH;
return token(HTMLTokenId.TAG_OPEN_SYMBOL);
case '>': // Empty start tag <>, RELAXED
lexerState = INIT;
return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
case '!':
lexerState = ISA_SGML_ESCAPE;
break;
case '?':
lexerState = ISI_XML_PI;
break;
default:
input.backup(1);
lexerState = ISI_TEXT;
break;
}
break;
case ISI_XML_PI:
if(actChar == '?') {
lexerState = ISI_XML_PI_QM;
break;
}
//else stay in XML PI
break;
case ISI_XML_PI_QM:
if(actChar == '>') {
//XML PI token
lexerState = INIT;
return token(HTMLTokenId.XML_PI);
} else {
lexerState = ISI_XML_PI;
break;
}
case ISA_SLASH: // DONE
if( isAZ( actChar ) ) { // </'a..Z'
lexerState = ISI_ENDTAG;
break;
}
switch( actChar ) {
case '>': // Empty end tag </>, RELAXED
lexerState = INIT;
return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
default: // Part of text, e.g. </3, </'\n', RELAXED
lexerState = ISI_TEXT;
input.backup(1);
break;
}
break;
case ISI_ENDTAG: // DONE
if( isName( actChar ) ) break; // Still in endtag identifier, eat next char
lexerState = ISP_ENDTAG_X;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
return token(HTMLTokenId.TAG_CLOSE);
}
break;
case ISP_ENDTAG_X: // DONE
if( isWS( actChar ) ) {
lexerState = ISP_ENDTAG_WS;
break;
}
tag = null;
switch( actChar ) {
case '>': // Closing of endtag, e.g. </H6 _>_
lexerState = INIT;
return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
case '<': // next tag, e.g. </H6 _<_, RELAXED
lexerState = INIT;
input.backup(1);
break;
default:
lexerState = ISI_ERROR;
input.backup(1);
break;
}
break;
case ISP_ENDTAG_WS: // DONE
if( isWS( actChar ) ) break; // eat all WS
lexerState = ISP_ENDTAG_X;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
return token(HTMLTokenId.WS);
}
break;
case ISI_TAG: // DONE
if( isName( actChar ) ) break; // Still in tag identifier, eat next char
lexerState = ISP_TAG_X;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
//test if the tagname is SCRIPT
tag = input.readText().toString();
if(equals(SCRIPT, tag, true, true)) {
lexerEmbeddingState = ISI_SCRIPT;
}
if(equals(STYLE, tag, true, true)) {
lexerEmbeddingState = ISI_STYLE;
}
return token(HTMLTokenId.TAG_OPEN);
}
break;
case ISP_TAG_X: // DONE
if( isWS( actChar ) ) {
lexerState = ISP_TAG_WS;
break;
}
if( isAttributeName(actChar) ) {
lexerState = ISI_ARG;
break;
}
switch( actChar ) {
case '/':
lexerState = ISI_TAG_SLASH;
break;
case '>':
switch (lexerEmbeddingState) {
case INIT:
lexerState = INIT;
break;
case ISI_SCRIPT:
//script w/ "text/html" content type workaround
//do lex the script content as normal html code
if(scriptType != null && "text/html".equalsIgnoreCase(scriptType)) { //NOI18N
lexerEmbeddingState = INIT;
scriptType = null;
lexerState = INIT;
} else {
lexerState = ISI_SCRIPT_CONTENT;
}
break;
case ISI_STYLE:
lexerState = ISI_STYLE_CONTENT;
break;
}
tag = null;
return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
case '<':
tag = null;
lexerState = INIT;
input.backup(1);
break;
default:
lexerState = ISP_TAG_X_ERROR;
break;
}
break;
case ISP_TAG_X_ERROR:
if(isWS(actChar)) {
lexerState = tag == null ? INIT : ISP_TAG_X;
input.backup(1); //backup the WS
return token(HTMLTokenId.ERROR);
}
switch(actChar) {
case '/':
case '>':
lexerState = tag == null ? INIT : ISP_TAG_X;
input.backup(1); //lets reread the token again
return token(HTMLTokenId.ERROR);
}
//stay in error
break;
case ISP_TAG_WS: // DONE
if( isWS( actChar ) ) break; // eat all WS
lexerState = ISP_TAG_X;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
return token(HTMLTokenId.WS);
}
case ISI_TAG_SLASH:
tag = null;
switch( actChar ) {
case '>':
lexerEmbeddingState = INIT; //possibly cancel 'in script' if empty tag found
lexerState = INIT;
return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
default:
lexerState = tag == null ? INIT : ISP_TAG_X;
input.backup(1);
return token(HTMLTokenId.ERROR);
}
case ISI_SCRIPT_CONTENT:
switch( actChar ) {
case '<' :
lexerState = ISI_SCRIPT_CONTENT_AFTER_LT;
break;
default:
break;
}
break;
case ISI_SCRIPT_CONTENT_AFTER_LT:
if (actChar == '/') {
if (followsCloseTag(SCRIPT)) {
//end of script section found
lexerEmbeddingState = INIT;
lexerState = INIT;
tag = null;
String type = scriptType;
scriptType = null;
input.backup(input.readLength() > 2 ? 2 : input.readLength()); //backup the '</', we will read it again
if (input.readLength() > 0) {
//the script has a body
return token(HTMLTokenId.SCRIPT, new HtmlTokenPropertyProvider(HTMLTokenId.SCRIPT_TYPE_TOKEN_PROPERTY, type)); //NOI18N
} else {
break;
}
}
}
lexerState = ISI_SCRIPT_CONTENT;
break;
case ISI_STYLE_CONTENT:
switch( actChar ) {
case '<' :
lexerState = ISI_STYLE_CONTENT_AFTER_LT;
break;
default:
break;
}
break;
case ISI_STYLE_CONTENT_AFTER_LT:
if (actChar == '/') {
if (followsCloseTag(STYLE)) {
//end of script section found
lexerEmbeddingState = INIT;
lexerState = INIT;
tag = null;
input.backup(input.readLength() > 2 ? 2 : input.readLength()); //backup the '</', we will read it again
if (input.readLength() > 0) {
//the script has a body
return token(HTMLTokenId.STYLE);
} else {
break;
}
}
}
lexerState = ISI_STYLE_CONTENT;
break;
case ISI_ARG: // DONE
if( isAttributeName(actChar) ) break; // eat next char
lexerState = ISP_ARG_X;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
attribute =input.readText().toString();
return token(HTMLTokenId.ARGUMENT);
}
break;
case ISP_ARG_X:
if( isWS( actChar ) ) {
lexerState = ISP_ARG_WS;
break;
}
if( isAttributeName(actChar) ) {
lexerState = ISI_ARG;
break;
}
switch( actChar ) {
case '/':
case '>':
input.backup(1);
lexerState = ISP_TAG_X;
break;
case '<':
lexerState = INIT;
input.backup(1);
break;
case '=':
lexerState = ISP_EQ;
return token(HTMLTokenId.OPERATOR);
default:
lexerState = ISI_ERROR;
input.backup(1);
break;
}
break;
case ISP_ARG_WS:
if( isWS( actChar ) ) break; // Eat all WhiteSpace
lexerState = ISP_ARG_X;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
return token(HTMLTokenId.WS);
}
break;
case ISP_EQ:
if( isWS( actChar ) ) {
lexerState = ISP_EQ_WS;
break;
}
switch( actChar ) {
case '\'':
quoteType = false;
lexerState = ISI_VAL_QUOT;
break;
case '"':
quoteType = true;
lexerState = ISI_VAL_QUOT;
break;
case '/':
case '>':
case '<':
input.backup(1);
lexerState = ISP_TAG_X;
break;
default:
lexerState = ISI_VAL; //everything else if attribute value
break;
}
break;
case ISP_EQ_WS:
if( isWS( actChar ) ) break; // Consume all WS
lexerState = ISP_EQ;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
return token(HTMLTokenId.WS);
}
break;
case ISI_VAL:
if(actChar == '/') {
//slash in unquoted value -- may be there but not followed by >.
//In such case IMO the value should be closed
char next = (char)input.read();
input.backup(1); //backup the next char
if(next != '>') {
//continue lexing the value
break;
}
} else if(!isWS(actChar) && actChar != '>' && actChar != '<') {
break; //continue lexing the attribute value
}
//finish lexing the value
lexerState = ISP_TAG_X;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
Token<HTMLTokenId> resolveValueToken = resolveValueToken();
attribute = null;
return resolveValueToken;
}
break;
case ISI_VAL_QUOT:
//custom EL support
delimiters: for(byte delimiterIndex = 0; delimiterIndex < customELQuery.getOpenDelimiters().length; delimiterIndex++ ) {
String openDelimiter = customELQuery.getOpenDelimiters()[delimiterIndex];
if(openDelimiter == null) {
continue;
}
int alreadyRead = input.readLength();
char read = (char)actChar; //first char is already read
for(int i = 0; i < openDelimiter.length(); i++) {
char delimChar = openDelimiter.charAt(i);
if(read != delimChar) {
//no match
input.backup(input.readLength() - alreadyRead); //backup text
continue delimiters; //and try next one
}
if((i+1) < openDelimiter.length()) {
//will be next loop, read char
read = (char)input.read();
}
}
//we've found an open delimiter
//check if the there was already something read before checking the delimiter,
//if so then return it and re-run this step again so then we can return
//clean token for the delimiter
if(input.readLength() > openDelimiter.length()) {
input.backup(openDelimiter.length());
return resolveValueToken();
} else {
//return the open symbol token and switch to "in el" state
lexerState = ISI_VAL_QUOT_EL;
customELIndex = (byte)(delimiterIndex + 1); //0 is reserved for "no delimiter", 1 means delimiter with index 0
//save the provider's index in the delimiter token's property so once can recognize what should be
//the delimiters' content if it is empty
//TODO "contentMimetype" INTO API???
return token(HTMLTokenId.EL_OPEN_DELIMITER,
new HtmlTokenPropertyProvider(EL_EXPRESSION_CONTENT_MIMETYPE_TOKEN_PROPERTY_KEY, customELQuery.getMimeTypes()[delimiterIndex]));
}
}
switch (actChar) {
case '\\':
//may be escaped quote
lexerState = ISI_VAL_QUOT_ESC;
break;
case '\'':
case '"':
if(actChar == '\'' && !quoteType || actChar == '"' && quoteType) {
//reset the 'script embedding will follow state' if the value represents a
//type attribute value of a script tag
if(equals(SCRIPT, tag, true, true) && equals("type", attribute, true, true)) { //NOI18N
//inside script tag
scriptType = getScriptType(input.readText(), true).toString();
}
lexerState = ISP_TAG_X;
Token<HTMLTokenId> resolveValueToken = resolveValueToken();
attribute = null;
return resolveValueToken;
}
}
break; // else simply consume next char of VALUE
case ISI_VAL_QUOT_EL:
delimiters: for(byte delimiterIndex = 0; delimiterIndex < customELQuery.getOpenDelimiters().length; delimiterIndex++ ) {
String closeDelimiter = customELQuery.getCloseDelimiters()[delimiterIndex];
if(closeDelimiter == null) {
continue;
}
int alreadyRead = input.readLength();
char read = (char)actChar; //first char is already read
for(int i = 0; i < closeDelimiter.length(); i++) {
char delimChar = closeDelimiter.charAt(i);
if(read != delimChar) {
//no match
input.backup(input.readLength() - alreadyRead); //backup text
continue delimiters; //and try next one
}
if((i+1) < closeDelimiter.length()) {
//will be next loop, read char
read = (char)input.read();
}
}
//we've found a close delimiter
//check if the there was already something read before checking the delimiter,
//if so then return it and re-run this step again so then we can return
//clean token for the delimiter
if(input.readLength() > closeDelimiter.length()) {
input.backup(closeDelimiter.length());
//save the provider's index in the token's property so we can set the corresponding embdding in HTMLTokenId.language()
return token(HTMLTokenId.EL_CONTENT, new HtmlTokenPropertyProvider(EL_CONTENT_PROVIDER_INDEX, (byte)(customELIndex - 1)));
} else {
//return the close symbol token and switch to "in value" state
lexerState = ISI_VAL_QUOT;
customELIndex = INIT;
return token(HTMLTokenId.EL_CLOSE_DELIMITER);
}
}
break;
case ISI_VAL_QUOT_ESC:
//Just consume the escaped char.
//The state prevents the quoted value
//to be finished by an escaped quote.
lexerState = ISI_VAL_QUOT;
break;
case ISA_SGML_ESCAPE: // DONE
if( isAZ(actChar) ) {
lexerState = ISI_SGML_DECL;
break;
}
switch( actChar ) {
case '-':
lexerState = ISA_SGML_DASH;
break;
default:
lexerState = ISI_TEXT;
input.backup(1);
continue;
}
break;
case ISA_SGML_DASH: // DONE
switch( actChar ) {
case '-':
lexerState = ISI_HTML_COMMENT;
break;
default:
lexerState = ISI_TEXT;
input.backup(1);
continue;
}
break;
case ISI_HTML_COMMENT: // DONE
switch( actChar ) {
case '-':
lexerState = ISA_HTML_COMMENT_DASH;
break;
//create an HTML comment token for each line of the comment - a performance fix for #43532
case '\n':
//leave the some state - we are still in an HTML comment,
//we just need to create a token for each line.
return token(HTMLTokenId.BLOCK_COMMENT);
}
break;
case ISA_HTML_COMMENT_DASH:
switch( actChar ) {
case '-':
lexerState = ISI_HTML_COMMENT_WS;
break;
default:
lexerState = ISI_HTML_COMMENT;
continue;
}
break;
case ISI_HTML_COMMENT_WS: // DONE
switch( actChar ) {
case '>':
lexerState = INIT;
return token(HTMLTokenId.BLOCK_COMMENT);
default:
lexerState = ISI_HTML_COMMENT;
input.backup(2); //backup everything except the first comma
break;
}
break;
case ISI_SGML_DECL:
if(Character.isWhitespace(actChar)) {
lexerState = ISI_SGML_DECL_WS;
if(input.readLength() > 1) {
input.backup(1); //backup the whitespace
return token(HTMLTokenId.DECLARATION);
}
break;
}
switch( actChar ) {
case '>':
if(input.readLength() > 1) {
input.backup(1); //backup the '<' char
return token(HTMLTokenId.DECLARATION);
} else {
//just the symbol read - return it as a part of declaration
lexerState = INIT;
return token(HTMLTokenId.DECLARATION);
}
}
break;
case ISI_SGML_DECL_WS:
if(actChar == '-') {
if( input.readLength() == 1 ) {
lexerState = ISA_SGML_DECL_DASH;
break;
} else {
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
return token(HTMLTokenId.DECLARATION);
}
}
} else if(!Character.isWhitespace(actChar)) {
lexerState = ISI_SGML_DECL;
input.backup(1);
return token(HTMLTokenId.WS);
}
break;
case ISA_SGML_DECL_DASH:
if( actChar == '-' ) {
lexerState = ISI_SGML_COMMENT;
break;
} else {
lexerState = ISI_SGML_DECL;
input.backup(1);
continue;
}
case ISI_SGML_COMMENT:
switch( actChar ) {
case '-':
lexerState = ISA_SGML_COMMENT_DASH;
break;
}
break;
case ISA_SGML_COMMENT_DASH:
if( actChar == '-' ) {
lexerState = ISI_SGML_DECL;
return token(HTMLTokenId.SGML_COMMENT);
} else {
lexerState = ISI_SGML_COMMENT;
input.backup(1);
continue;
}
case ISA_REF:
if( isAZ( actChar ) ) {
lexerState = ISI_REF_NAME;
break;
}
if( actChar == '#' ) {
lexerState = ISA_REF_HASH;
break;
}
lexerState = lexerSubState;
input.backup(1);
continue;
case ISI_REF_NAME:
if( isName( actChar ) ) break;
lexerState = lexerSubState;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
if( actChar != ';' ) {
input.backup(1);
return token(HTMLTokenId.TEXT);
}
return token(HTMLTokenId.CHARACTER);
}
break;
case ISA_REF_HASH:
if( actChar >= '0' && actChar <= '9' ) {
lexerState = ISI_REF_DEC;
break;
}
if( actChar == 'x' || actChar == 'X' ) {
lexerState = ISA_REF_X;
break;
}
if( isAZ( actChar ) ) {
lexerState = lexerSubState;
return token(HTMLTokenId.ERROR);
}
lexerState = lexerSubState;
input.backup(1);
continue;
case ISI_REF_DEC:
if( actChar >= '0' && actChar <= '9' ) break;
lexerState = lexerSubState;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
if( actChar != ';' )
input.backup(1);
return token(HTMLTokenId.CHARACTER);
}
break;
case ISA_REF_X:
if( (actChar >= '0' && actChar <= '9') ||
(actChar >= 'a' && actChar <= 'f') ||
(actChar >= 'A' && actChar <= 'F')
) {
lexerState = ISI_REF_HEX;
break;
}
lexerState = lexerSubState;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
input.backup(1);
return token(HTMLTokenId.ERROR); // error on previous "&#x" sequence
}
break;
case ISI_REF_HEX:
if( (actChar >= '0' && actChar <= '9') ||
(actChar >= 'a' && actChar <= 'f') ||
(actChar >= 'A' && actChar <= 'F')
) break;
lexerState = lexerSubState;
if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
if( actChar != ';' )
input.backup(1);
return token(HTMLTokenId.CHARACTER);
}
break;
}
} // end of while(offset...)
/** At this stage there's no more text in the scanned buffer.
* Scanner first checks whether this is completely the last
* available buffer.
*/
switch( lexerState ) {
case INIT:
if (input.readLength() == 0) {
return null;
}
break;
case ISI_TEXT:
case ISA_LT:
case ISA_SLASH:
case ISA_SGML_ESCAPE:
case ISA_SGML_DASH:
case ISI_TAG_SLASH:
return token(HTMLTokenId.TEXT);
case ISI_XML_PI:
case ISI_XML_PI_QM:
return token(HTMLTokenId.XML_PI);
case ISA_REF:
case ISA_REF_HASH:
if( lexerSubState == ISI_TEXT ) return token(HTMLTokenId.TEXT);
else return token(HTMLTokenId.VALUE);
case ISI_HTML_COMMENT:
case ISA_HTML_COMMENT_DASH:
case ISI_HTML_COMMENT_WS:
return token(HTMLTokenId.BLOCK_COMMENT);
case ISI_TAG:
lexerState = ISP_TAG_X;
//test if the tagname is SCRIPT
if(equals(SCRIPT, input.readText(), true, true)) {
lexerEmbeddingState = ISI_SCRIPT;
}
if(equals(STYLE, input.readText(), true, true)) {
lexerEmbeddingState = ISI_STYLE;
}
return token(HTMLTokenId.TAG_OPEN);
case ISI_ENDTAG:
return token(HTMLTokenId.TAG_CLOSE);
case ISI_ARG:
return token(HTMLTokenId.ARGUMENT);
case ISI_ERROR:
case ISP_TAG_X_ERROR:
return token(HTMLTokenId.ERROR);
case ISP_ARG_WS:
case ISP_TAG_WS:
case ISP_ENDTAG_WS:
case ISP_EQ_WS:
return token(HTMLTokenId.WS);
case ISP_ARG_X:
case ISP_TAG_X:
case ISP_ENDTAG_X:
case ISP_EQ:
return token(HTMLTokenId.WS);
case ISI_VAL:
case ISI_VAL_QUOT:
case ISI_VAL_QUOT_ESC:
return resolveValueToken();
case ISI_SGML_DECL:
case ISA_SGML_DECL_DASH:
case ISI_SGML_DECL_WS:
return token(HTMLTokenId.DECLARATION);
case ISI_SGML_COMMENT:
case ISA_SGML_COMMENT_DASH:
return token(HTMLTokenId.SGML_COMMENT);
case ISI_REF_NAME:
case ISI_REF_DEC:
case ISA_REF_X:
case ISI_REF_HEX:
return token(HTMLTokenId.TEXT);
case ISI_SCRIPT_CONTENT:
case ISI_SCRIPT_CONTENT_ENDTAG:
case ISI_SCRIPT_CONTENT_AFTER_LT:
return token(HTMLTokenId.SCRIPT);
case ISI_STYLE_CONTENT:
case ISI_STYLE_CONTENT_ENDTAG:
case ISI_STYLE_CONTENT_AFTER_LT:
return token(HTMLTokenId.STYLE);
case ISI_EL:
case ISI_VAL_QUOT_EL:
return token(HTMLTokenId.EL_CONTENT, new HtmlTokenPropertyProvider(EL_CONTENT_PROVIDER_INDEX, (byte)(customELIndex - 1)));
}
assert input.readLength() == 0 : "Returning null even if some chars still needs to be tokenized! " +
"lexer state=" + lexerState + "; " +
"lexer substate=" + lexerSubState + "; " +
"lexer embedding state=" + lexerEmbeddingState + "; " +
"readtext='" + input.readText() + "'";
return null;
}