in ide/xml.text.obsolete90/src/org/netbeans/modules/xml/text/syntax/XMLDefaultSyntax.java [122:813]
protected TokenID parseToken() {
char actChar;
while(offset < stopOffset) {
actChar = buffer[offset];
switch( state ) {
case INIT: // DONE
switch( actChar ) {
case '<':
state = ISA_LT;
break;
case '&':
if (isInternalDTD() == false) {
state = ISA_REF;
subState = ISI_TEXT;
} else {
state = ISI_TEXT;
}
break;
case '%':
if (isInternalDTD()) {
state = ISA_REF;
subState = INIT;
} else {
state = ISI_TEXT;
}
break;
case ']':
if (isInternalDTD()) {
state = ISA_INIT_BR;
} else {
state = ISI_TEXT;
}
break;
default:
state = ISI_TEXT;
break;
}
break;
case ISI_TEXT: // DONE
switch( actChar ) {
case '<':
state = INIT;
return XMLDefaultTokenContext.TEXT;
case '&':
if (isInternalDTD() == false) {
state = INIT;
return XMLDefaultTokenContext.TEXT;
}
break;
case '%':
if (isInternalDTD()) {
state = INIT;
return XMLDefaultTokenContext.TEXT;
}
break;
case ']':
if (isInternalDTD()) {
state = ISA_INIT_BR;
}
break;
}
break;
case ISI_ERROR: // DONE
offset++;
state = INIT;
return XMLDefaultTokenContext.ERROR;
case ISA_LT: // DONE
if( UnicodeClasses.isXMLNameStartChar( actChar ) && isInternalDTD() == false) {
state = ISI_TAG;
break;
}
switch( actChar ) {
case '/': // ETAGO - </
state = ISA_SLASH;
break;
case '!':
state = ISA_SGML_ESCAPE;
break;
case '?':
state = ISI_PI;
offset++;
return XMLDefaultTokenContext.PI_START;
default:
state = ISI_TEXT; //RELAXED to allow editing in the middle of document
continue; // don't eat the char, maybe its '&'
}
break;
case ISI_PI:
if ( UnicodeClasses.isXMLNameStartChar( actChar )) {
state = ISI_PI_TARGET;
break;
}
state = ISI_ERROR;
break;
case ISI_PI_TARGET:
if ( UnicodeClasses.isXMLNameChar( actChar )) break;
if (isWS( actChar )) {
state = ISP_PI_TARGET_WS;
return XMLDefaultTokenContext.PI_TARGET;
}
state = ISI_ERROR;
break;
case ISP_PI_TARGET_WS:
if (isWS( actChar)) break;
state = ISI_PI_CONTENT;
return XMLDefaultTokenContext.WS;
case ISI_PI_CONTENT:
if (actChar != '?') break; // eat content
state = ISP_PI_CONTENT_QMARK;
return XMLDefaultTokenContext.PI_CONTENT; // may do extra break
case ISP_PI_CONTENT_QMARK:
if (actChar != '?') throw new IllegalStateException ("'?' expected in ISP_PI_CONTENT_QMARK");
state = ISA_PI_CONTENT_QMARK;
break;
case ISA_PI_CONTENT_QMARK:
if (actChar != '>') {
state = ISI_PI_CONTENT;
break;
}
state = INIT;
offset++;
return XMLDefaultTokenContext.PI_END;
case ISA_SLASH: // DONE
if( UnicodeClasses.isXMLNameStartChar( actChar )){
state = ISI_ENDTAG;
break;
}
switch( actChar ) {
case ' ':
state = ISI_TEXT;
continue;
case '\n':
state = ISI_TEXT;
continue;
case '\r':
state = ISI_TEXT;
continue;
default: // Part of text, e.g. </3, </'\n', RELAXED
state = ISI_TEXT;
continue; // don'e eat the char
}
//break;
case ISI_ENDTAG: // DONE
if( UnicodeClasses.isXMLNameChar( actChar )){
break; // Still in endtag identifier, eat next char
}
state = ISP_ENDTAG_X;
return XMLDefaultTokenContext.TAG;
case ISP_ENDTAG_X: // DONE
if( isWS( actChar ) ) {
state = ISP_ENDTAG_WS;
break;
}
switch( actChar ) {
case '>': // Closing of endtag, e.g. </H6 _>_
offset++;
state = INIT;
return XMLDefaultTokenContext.TAG;
default:
state = ISI_ERROR;
continue; //don't eat
}
//break;
case ISP_ENDTAG_WS: // DONE
if( isWS( actChar ) ) break; // eat all WS
state = ISP_ENDTAG_X;
return XMLDefaultTokenContext.WS;
case ISI_TAG: // DONE
if( UnicodeClasses.isXMLNameChar( actChar ) ) break; // Still in tag identifier, eat next char
state = ISP_TAG_X;
return XMLDefaultTokenContext.TAG;
case ISP_TAG_X: // DONE
if( isWS( actChar ) ) {
state = ISP_TAG_WS;
break;
}
if( UnicodeClasses.isXMLNameStartChar( actChar ) ) {
state = ISI_ARG;
break;
}
switch( actChar ) {
case '/':
offset++;
continue;
case '?': //Prolog and PI's now similar to Tag
offset++;
continue;
case '>':
offset++;
state = INIT;
return XMLDefaultTokenContext.TAG;
default:
state = ISI_ERROR;
continue;
}
//break;
case ISP_TAG_WS: // DONE
if( isWS( actChar ) ) break; // eat all WS
state = ISP_TAG_X;
return XMLDefaultTokenContext.WS;
case ISI_ARG: // DONE
if( UnicodeClasses.isXMLNameChar( actChar ) ) break; // eat next char
state = ISP_ARG_X;
return XMLDefaultTokenContext.ARGUMENT;
case ISP_ARG_X:
if( isWS( actChar ) ) {
state = ISP_ARG_WS;
break;
}
switch( actChar ) {
case '=':
offset++;
state = ISP_EQ;
return XMLDefaultTokenContext.OPERATOR;
default:
state = ISI_ERROR;
continue;
}
//break;
case ISP_ARG_WS:
if( isWS( actChar ) ) break; // Eat all WhiteSpace
state = ISP_ARG_X;
return XMLDefaultTokenContext.WS;
case ISP_EQ:
if( isWS( actChar ) ) {
state = ISP_EQ_WS;
break;
}
switch( actChar ) {
case '\'':
state = ISI_VAL_APOS;
break;
case '"':
state = ISI_VAL_QUOT;
break;
default:
state = ISI_ERROR;
continue;
}
break;
case ISP_EQ_WS:
if( isWS( actChar ) ) break; // Consume all WS
state = ISP_EQ;
return XMLDefaultTokenContext.WS;
case ISI_VAL_APOS:
switch( actChar ) {
case '\'':
offset++;
state = ISP_TAG_X;
return XMLDefaultTokenContext.VALUE;
case '&':
if( offset == tokenOffset ) {
subState = state;
state = ISA_REF;
break;
} else {
return XMLDefaultTokenContext.VALUE;
}
}
break; // else simply consume next char of VALUE
case ISI_VAL_QUOT:
switch( actChar ) {
case '"':
offset++;
state = ISP_TAG_X;
return XMLDefaultTokenContext.VALUE;
case '&':
if( offset == tokenOffset ) {
subState = state;
state = ISA_REF;
break;
} else {
return XMLDefaultTokenContext.VALUE;
}
}
break; // else simply consume next char of VALUE
case ISA_SGML_ESCAPE: // DONE
if (actChar == '[') {
state = ISA_LTEXBR;
break;
} else if( isAZ(actChar) ) {
state = ISI_SGML_DECL;
break;
}
switch( actChar ) {
case '-':
state = ISA_SGML_DASH;
break;
default:
state = ISI_TEXT;
continue;
}
break;
case ISA_LTEXBR:
if (actChar == 'C') {
state = ISA_LTEXBRC;
break;
} else {
state = ISI_TEXT;
continue;
}
case ISA_LTEXBRC:
if (actChar == 'D') {
state = ISA_LTEXBRCD;
break;
} else {
state = ISI_TEXT;
continue;
}
case ISA_LTEXBRCD:
if (actChar == 'A') {
state = ISA_LTEXBRCDA;
break;
} else {
state = ISI_TEXT;
continue;
}
case ISA_LTEXBRCDA:
if (actChar == 'T') {
state = ISA_LTEXBRCDAT;
break;
} else {
state = ISI_TEXT;
continue;
}
case ISA_LTEXBRCDAT:
if (actChar == 'A') {
state = ISA_LTEXBRCDATA;
break;
} else {
state = ISI_TEXT;
continue;
}
case ISA_LTEXBRCDATA:
if (actChar == '[') {
state = ISI_CDATA;
break;
} else {
state = ISI_TEXT;
continue;
}
case ISI_CDATA:
if (actChar == ']') {
state = ISA_CDATA_BR;
break;
}
case ISA_CDATA_BR:
if (actChar == ']') {
state = ISA_CDATA_BRBR;
break;
} else {
state = ISI_CDATA;
break;
}
case ISA_CDATA_BRBR:
if (actChar == '>') {
state = ISI_TEXT; //It s allowed only in content
offset++;
return XMLTokenIDs.CDATA_SECTION;
} else if (actChar == ']') {
// stay in the same state
break;
} else {
state = ISI_CDATA;
break;
}
case ISA_SGML_DASH: // DONE
switch( actChar ) {
case '-':
state = ISI_XML_COMMENT;
break;
default:
state=ISI_ERROR;
continue;
}
break;
case ISI_XML_COMMENT: // DONE
switch( actChar ) {
case '-':
state = ISA_XML_COMMENT_DASH;
break;
//create an XML comment token for each line of the comment - a workaround fix for performance bug #39446
//this also causes a SyntaxtElement to be created for each line of the comment - see XMLSyntaxSupport.createElement:277
//PENDING - this code can be removed after editor solve it somehow in their code
case '\n':
offset++;
//leave the some state - we are still in an XML comment,
//we just need to create a token for each line.
return XMLDefaultTokenContext.BLOCK_COMMENT;
}
break;
case ISA_XML_COMMENT_DASH:
switch( actChar ) {
case '-':
state = ISI_XML_COMMENT_WS;
break;
default:
state = ISI_XML_COMMENT;
continue;
}
break;
case ISI_XML_COMMENT_WS: // DONE
if( isWS( actChar ) ) break; // Consume all WS
switch( actChar ) {
case '>':
offset++;
state = INIT;
return XMLDefaultTokenContext.BLOCK_COMMENT;
default:
state = ISI_ERROR;
return XMLDefaultTokenContext.BLOCK_COMMENT;
}
case ISP_DECL_STRING:
if (actChar != '"') throw new IllegalStateException("Unexpected " + actChar);
state = ISI_DECL_STRING;
break;
case ISI_DECL_STRING:
if ( actChar == '"') {
state = ISI_SGML_DECL;
offset++;
return XMLDefaultTokenContext.VALUE;
}
break;
case ISP_DECL_CHARS:
if (actChar != '\'') throw new IllegalStateException("Unexpected " + actChar);
state = ISI_DECL_CHARS;
break;
case ISI_DECL_CHARS:
if ( actChar == '\'') {
state = ISI_SGML_DECL;
offset++;
return XMLDefaultTokenContext.VALUE;
}
break;
case ISI_SGML_DECL:
switch( actChar ) {
case '"':
state = ISP_DECL_STRING;
return XMLDefaultTokenContext.DECLARATION;
case '\'':
state = ISP_DECL_CHARS;
return XMLDefaultTokenContext.DECLARATION;
case '[':
offset++;
state = INIT;
enterInternalDTD();
return XMLDefaultTokenContext.DECLARATION;
case '>':
offset++;
state = INIT;
return XMLDefaultTokenContext.DECLARATION;
}
break;
case ISA_INIT_BR:
if (isWS(actChar)) break;
if (actChar == '>') {
offset++;
state = INIT;
leaveInternalDTD();
return XMLDefaultTokenContext.DECLARATION;
} else {
state = INIT;
return XMLDefaultTokenContext.ERROR;
}
case ISA_SGML_DECL_DASH:
if( actChar == '-' ) {
state = ISI_ERROR;
break;
} else {
if(isWS(actChar)){
state = ISI_ERROR;
continue;
} else {
state = ISI_SGML_DECL;
continue;
}
}
case ISA_REF:
if( UnicodeClasses.isXMLNameStartChar( actChar ) ) {
state = ISI_REF_NAME;
break;
}
if( actChar == '#') {
state = ISA_REF_HASH;
break;
}
state = subState;
continue;
case ISI_REF_NAME:
if( UnicodeClasses.isXMLNameChar( actChar ) ) break;
if( actChar == ';' ) offset++;
state = subState;
return XMLDefaultTokenContext.CHARACTER;
case ISA_REF_HASH:
if( actChar >= '0' && actChar <= '9' ) {
state = ISI_REF_DEC;
break;
}
if( actChar == 'x' || actChar == 'X' ) {
state = ISA_REF_X;
break;
}
if( isAZ( actChar ) ) {
offset++;
state = subState;
return XMLDefaultTokenContext.ERROR;
}
state = subState;
continue;
case ISI_REF_DEC:
if( actChar >= '0' && actChar <= '9' ) break;
if( actChar == ';' ) offset++;
state = subState;
return XMLDefaultTokenContext.CHARACTER;
case ISA_REF_X:
if (isHex(actChar)) {
state = ISI_REF_HEX;
break;
}
state = subState;
return XMLDefaultTokenContext.ERROR; // error on previous "&#x" sequence
case ISI_REF_HEX:
if (isHex(actChar)) break;
if (actChar == ';' ) offset++;
state = subState;
return XMLDefaultTokenContext.CHARACTER;
}
offset++;
} // end of while(offset...)
/** At this stage there's no more text in the scanned buffer.
* Scanner first checks whether this is completely the last
* available buffer.
*/
if( lastBuffer ) {
switch( state ) {
case INIT:
case ISI_TEXT:
case ISA_LT:
case ISA_SLASH:
case ISA_SGML_ESCAPE:
case ISA_SGML_DASH:
return XMLDefaultTokenContext.TEXT;
case ISA_REF:
case ISA_REF_HASH:
if( subState == ISI_TEXT ) return XMLDefaultTokenContext.TEXT;
else return XMLDefaultTokenContext.VALUE;
case ISI_XML_COMMENT:
case ISA_XML_COMMENT_DASH:
case ISI_XML_COMMENT_WS:
return XMLDefaultTokenContext.BLOCK_COMMENT;
case ISI_TAG:
case ISI_ENDTAG:
return XMLDefaultTokenContext.TAG;
case ISI_ARG:
return XMLDefaultTokenContext.ARGUMENT;
case ISI_ERROR:
return XMLDefaultTokenContext.ERROR;
case ISP_ARG_WS:
case ISP_TAG_WS:
case ISP_ENDTAG_WS:
case ISP_EQ_WS:
return XMLDefaultTokenContext.WS;
case ISP_ARG_X:
case ISP_TAG_X:
case ISP_ENDTAG_X:
case ISP_EQ:
return XMLDefaultTokenContext.WS;
case ISI_VAL_APOS:
case ISI_VAL_QUOT:
case ISI_DECL_CHARS:
case ISI_DECL_STRING:
return XMLDefaultTokenContext.VALUE;
case ISI_SGML_DECL:
case ISA_SGML_DECL_DASH:
case ISP_DECL_STRING:
case ISP_DECL_CHARS:
return XMLDefaultTokenContext.DECLARATION;
case ISI_REF_NAME:
case ISI_REF_DEC:
case ISA_REF_X:
case ISI_REF_HEX:
return XMLDefaultTokenContext.CHARACTER;
case ISI_PI:
return XMLDefaultTokenContext.PI_START;
case ISI_PI_TARGET:
return XMLDefaultTokenContext.PI_TARGET;
case ISP_PI_TARGET_WS:
return XMLDefaultTokenContext.WS;
case ISI_PI_CONTENT:
return XMLDefaultTokenContext.PI_CONTENT;
case ISA_PI_CONTENT_QMARK:
case ISP_PI_CONTENT_QMARK:
// we are at end of the last buffer and expect that next char will be '>'
return XMLDefaultTokenContext.PI_END;
case ISA_LTEXBR:
case ISA_LTEXBRC:
case ISA_LTEXBRCD:
case ISA_LTEXBRCDA:
case ISA_LTEXBRCDAT:
case ISA_LTEXBRCDATA:
return XMLDefaultTokenContext.TEXT;
case ISI_CDATA:
case ISA_CDATA_BR:
case ISA_CDATA_BRBR:
return XMLTokenIDs.CDATA_SECTION;
case ISA_INIT_BR:
return XMLDefaultTokenContext.TEXT;
default:
throw new IllegalStateException("Last buffer does not handle state " + state + "!"); //NOI18N
}
}
return null; // ask for next buffer
}