protected TokenID parseToken()

in ide/xml.text.obsolete90/src/org/netbeans/modules/xml/text/syntax/XMLDefaultSyntax.java [122:813]


    protected TokenID parseToken() {
        
        char actChar;
        while(offset < stopOffset) {
            actChar = buffer[offset];
            switch( state ) {
                case INIT:              //     DONE
                    switch( actChar ) {
                        case '<':
                            state = ISA_LT;
                            break;
                        case '&':
                            if (isInternalDTD() == false) {
                                state = ISA_REF;
                                subState = ISI_TEXT;                                
                            } else {
                                state = ISI_TEXT;
                            }
                            break;
                        case '%':
                            if (isInternalDTD()) {
                                state = ISA_REF;
                                subState = INIT;                                
                            } else {
                                state = ISI_TEXT;
                            }
                            break;
                        case ']':
                            if (isInternalDTD()) {
                                state = ISA_INIT_BR;                                
                            } else {
                                state = ISI_TEXT;
                            }
                            break;
                        default:
                            state = ISI_TEXT;
                            break;
                    }
                    break;
                    
                case ISI_TEXT:        // DONE
                    switch( actChar ) {
                        case '<':
                            state = INIT;
                            return XMLDefaultTokenContext.TEXT;                            
                        case '&':
                            if (isInternalDTD() == false) {
                                state = INIT;
                                return XMLDefaultTokenContext.TEXT;                                                            
                            }
                            break;
                        case '%':
                            if (isInternalDTD()) {
                                state = INIT;
                                return XMLDefaultTokenContext.TEXT;                                
                            }
                            break;
                        case ']':
                            if (isInternalDTD()) {
                                state = ISA_INIT_BR;
                            }
                            break;
                    }
                    break;
                    
                case ISI_ERROR:      // DONE
                    offset++;
                    state = INIT;
                    return XMLDefaultTokenContext.ERROR;
                    
                case ISA_LT:         // DONE
                    
                    if( UnicodeClasses.isXMLNameStartChar( actChar ) && isInternalDTD() == false) {
                        state = ISI_TAG;
                        break;
                    }
                    switch( actChar ) {
                        case '/':               // ETAGO - </
                            state = ISA_SLASH;
                            break;
                        case '!':
                            state = ISA_SGML_ESCAPE;
                            break;
                        case '?':
                            state = ISI_PI;
                            offset++;
                            return XMLDefaultTokenContext.PI_START;
                        default:
                            state = ISI_TEXT;  //RELAXED to allow editing in the  middle of document
                            continue;             // don't eat the char, maybe its '&'
                    }
                    break;

                case ISI_PI:
                    if ( UnicodeClasses.isXMLNameStartChar( actChar )) {
                        state = ISI_PI_TARGET;
                        break;
                    }
                    state = ISI_ERROR;
                    break;
                    
                case ISI_PI_TARGET:
                    if ( UnicodeClasses.isXMLNameChar( actChar )) break;
                    if (isWS( actChar )) {
                        state = ISP_PI_TARGET_WS;
                        return XMLDefaultTokenContext.PI_TARGET;
                    }
                    state = ISI_ERROR;
                    break;
                    
                case ISP_PI_TARGET_WS:
                    if (isWS( actChar)) break;
                    state = ISI_PI_CONTENT;
                    return XMLDefaultTokenContext.WS;

                case ISI_PI_CONTENT:
                    if (actChar != '?') break;  // eat content
                    state = ISP_PI_CONTENT_QMARK;
                    return XMLDefaultTokenContext.PI_CONTENT;  // may do extra break
                    
                case ISP_PI_CONTENT_QMARK:
                    if (actChar != '?') throw new IllegalStateException ("'?' expected in ISP_PI_CONTENT_QMARK");
                    state = ISA_PI_CONTENT_QMARK;
                    break;

                case ISA_PI_CONTENT_QMARK:
                    if (actChar != '>') {
                        state = ISI_PI_CONTENT;
                        break;
                    }
                    state = INIT;
                    offset++;
                    return XMLDefaultTokenContext.PI_END;                    
                    
                case ISA_SLASH:        // DONE
                    
                    if( UnicodeClasses.isXMLNameStartChar( actChar )){
                        state = ISI_ENDTAG;
                        break;
                    }
                    switch( actChar ) {
                        case ' ':
                            state = ISI_TEXT;
                            continue;
                        case '\n':
                            state = ISI_TEXT;
                            continue;
                        case '\r':
                            state = ISI_TEXT;
                            continue;
                        default:                // Part of text, e.g. </3, </'\n', RELAXED
                            state = ISI_TEXT;
                            continue;             // don'e eat the char
                    }
                    //break;
                    
                case ISI_ENDTAG:        // DONE
                    if( UnicodeClasses.isXMLNameChar( actChar )){
                        break;    // Still in endtag identifier, eat next char
                    }
                    
                    state = ISP_ENDTAG_X;
                    return XMLDefaultTokenContext.TAG;
                    
                    
                case ISP_ENDTAG_X:      // DONE
                    if( isWS( actChar ) ) {
                        state = ISP_ENDTAG_WS;
                        break;
                    }
                    switch( actChar ) {
                        case '>':               // Closing of endtag, e.g. </H6 _>_
                            offset++;
                            state = INIT;
                            return XMLDefaultTokenContext.TAG;
                        default:
                            state = ISI_ERROR;
                            continue; //don't eat
                    }
                    //break;
                    
                case ISP_ENDTAG_WS:      // DONE
                    if( isWS( actChar ) ) break;  // eat all WS
                    state = ISP_ENDTAG_X;
                    return XMLDefaultTokenContext.WS;
                    
                    
                case ISI_TAG:        // DONE
                    if( UnicodeClasses.isXMLNameChar( actChar ) ) break; // Still in tag identifier, eat next char
                    state = ISP_TAG_X;
                    return XMLDefaultTokenContext.TAG;
                    
                case ISP_TAG_X:     // DONE
                    if( isWS( actChar ) ) {
                        state = ISP_TAG_WS;
                        break;
                    }
                    if( UnicodeClasses.isXMLNameStartChar( actChar ) ) {
                        state = ISI_ARG;
                        break;
                    }
                    switch( actChar ) {
                        case '/':
                            offset++;
                            continue;
                        case '?': //Prolog and PI's now similar to Tag
                            offset++;
                            continue;
                        case '>':
                            offset++;
                            state = INIT;
                            return XMLDefaultTokenContext.TAG;
                        default:
                            state = ISI_ERROR;
                            continue;
                    }
                    //break;
                    
                    
                case ISP_TAG_WS:        // DONE
                    if( isWS( actChar ) ) break;    // eat all WS
                    state = ISP_TAG_X;
                    return XMLDefaultTokenContext.WS;
                    
                case ISI_ARG:           // DONE
                    if( UnicodeClasses.isXMLNameChar( actChar ) ) break; // eat next char
                    state = ISP_ARG_X;
                    return XMLDefaultTokenContext.ARGUMENT;
                    
                case ISP_ARG_X:
                    if( isWS( actChar ) ) {
                        state = ISP_ARG_WS;
                        break;
                    }
                    switch( actChar ) {
                        case '=':
                            offset++;
                            state = ISP_EQ;
                            return XMLDefaultTokenContext.OPERATOR;
                        default:
                            state = ISI_ERROR;
                            continue;
                    }
                    //break;
                    
                case ISP_ARG_WS:
                    if( isWS( actChar ) ) break;    // Eat all WhiteSpace
                    state = ISP_ARG_X;
                    return XMLDefaultTokenContext.WS;
                    
                case ISP_EQ:
                    if( isWS( actChar ) ) {
                        state = ISP_EQ_WS;
                        break;
                    }
                    switch( actChar ) {
                        case '\'':
                            state = ISI_VAL_APOS;
                            break;
                        case '"':
                            state = ISI_VAL_QUOT;
                            break;
                        default:
                            state = ISI_ERROR;
                            continue;
                    }
                    break;
                    
                case ISP_EQ_WS:
                    if( isWS( actChar ) ) break;    // Consume all WS
                    state = ISP_EQ;
                    return XMLDefaultTokenContext.WS;
                                        
                case ISI_VAL_APOS:
                    switch( actChar ) {
                        case '\'':
                            offset++;
                            state = ISP_TAG_X;
                            return XMLDefaultTokenContext.VALUE;
                        case '&':
                            if( offset == tokenOffset ) {
                                subState = state;
                                state = ISA_REF;
                                break;
                            } else {
                                return XMLDefaultTokenContext.VALUE;
                            }
                    }
                    break;  // else simply consume next char of VALUE
                    
                case ISI_VAL_QUOT:
                    switch( actChar ) {
                        case '"':
                            offset++;
                            state = ISP_TAG_X;
                            return XMLDefaultTokenContext.VALUE;
                        case '&':
                            if( offset == tokenOffset ) {
                                subState = state;
                                state = ISA_REF;
                                break;
                            } else {
                                return XMLDefaultTokenContext.VALUE;
                            }
                    }
                    break;  // else simply consume next char of VALUE
                    
                    
                case ISA_SGML_ESCAPE:       // DONE
                    if (actChar == '[') {
                        state = ISA_LTEXBR;
                        break;
                    } else if( isAZ(actChar) ) {
                        state = ISI_SGML_DECL;
                        break;
                    }
                    switch( actChar ) {
                        case '-':
                            state = ISA_SGML_DASH;
                            break;
                        default:
                            state = ISI_TEXT;
                            continue;
                    }
                    break;
                    
                case ISA_LTEXBR:
                    if (actChar == 'C') {
                        state = ISA_LTEXBRC;
                        break;
                    } else {
                        state = ISI_TEXT;
                        continue;
                    }

                case ISA_LTEXBRC:
                    if (actChar == 'D') {
                        state = ISA_LTEXBRCD;
                        break;
                    } else {
                        state = ISI_TEXT;
                        continue;
                    }

                case ISA_LTEXBRCD:
                    if (actChar == 'A') {
                        state = ISA_LTEXBRCDA;
                        break;
                    } else {
                        state = ISI_TEXT;
                        continue;
                    }

                case ISA_LTEXBRCDA:
                    if (actChar == 'T') {
                        state = ISA_LTEXBRCDAT;
                        break;
                    } else {
                        state = ISI_TEXT;
                        continue;
                    }

                case ISA_LTEXBRCDAT:
                    if (actChar == 'A') {
                        state = ISA_LTEXBRCDATA;
                        break;
                    } else {
                        state = ISI_TEXT;
                        continue;
                    }

                case ISA_LTEXBRCDATA:
                    if (actChar == '[') {
                        state = ISI_CDATA;
                        break;
                    } else {
                        state = ISI_TEXT;
                        continue;
                    }

                case ISI_CDATA:
                    if (actChar == ']') {
                        state = ISA_CDATA_BR;
                        break;
                    }

                case ISA_CDATA_BR:
                    if (actChar == ']') {
                        state = ISA_CDATA_BRBR;
                        break;
                    } else {
                        state = ISI_CDATA;
                        break;                        
                    }

                case ISA_CDATA_BRBR:
                    if (actChar == '>') {
                        state = ISI_TEXT;           //It s allowed only in content
                        offset++;
                        return XMLTokenIDs.CDATA_SECTION;
                    } else if (actChar == ']') {
                        // stay in the same state
                        break;
                    } else {
                        state = ISI_CDATA;
                        break;
                    }
                    
                    
                case ISA_SGML_DASH:       // DONE
                    switch( actChar ) {
                        case '-':
                            state = ISI_XML_COMMENT;
                            break;
                        default:
                            state=ISI_ERROR;
                            continue;
                    }
                    break;
                    
                case ISI_XML_COMMENT:        // DONE
                    switch( actChar ) {
                        case '-':
                            state = ISA_XML_COMMENT_DASH;
                            break;
                        //create an XML comment token for each line of the comment - a workaround fix for performance bug #39446
                        //this also causes a SyntaxtElement to be created for each line of the comment - see XMLSyntaxSupport.createElement:277
                        //PENDING - this code can be removed after editor solve it somehow in their code
                        case '\n':
                            offset++;
                            //leave the some state - we are still in an XML comment,
                            //we just need to create a token for each line.
                            return XMLDefaultTokenContext.BLOCK_COMMENT;
                    }
                    break;
                    
                case ISA_XML_COMMENT_DASH:
                    switch( actChar ) {
                        case '-':
                            state = ISI_XML_COMMENT_WS;
                            break;
                        default:
                            state = ISI_XML_COMMENT;
                            continue;
                    }
                    break;
                    
                case ISI_XML_COMMENT_WS:       // DONE
                    if( isWS( actChar ) ) break;  // Consume all WS
                    switch( actChar ) {
                        case '>':
                            offset++;
                            state = INIT;
                            return XMLDefaultTokenContext.BLOCK_COMMENT;
                        default:
                            state = ISI_ERROR;
                            return XMLDefaultTokenContext.BLOCK_COMMENT;
                    }
                    
                case ISP_DECL_STRING:
                    if (actChar != '"') throw new IllegalStateException("Unexpected " + actChar);
                    state = ISI_DECL_STRING;
                    break;
                    
                case ISI_DECL_STRING:
                    if ( actChar == '"') {
                            state = ISI_SGML_DECL;
                            offset++;
                            return XMLDefaultTokenContext.VALUE;
                    }
                    break;

                case ISP_DECL_CHARS:
                    if (actChar != '\'') throw new IllegalStateException("Unexpected " + actChar);
                    state = ISI_DECL_CHARS;
                    break;
                    
                case ISI_DECL_CHARS:
                    if ( actChar == '\'') {
                            state = ISI_SGML_DECL;
                            offset++;
                            return XMLDefaultTokenContext.VALUE;
                    }
                    break;
                    
                case ISI_SGML_DECL:
                    switch( actChar ) {
                        case '"':
                            state = ISP_DECL_STRING;
                            return XMLDefaultTokenContext.DECLARATION;
                        case '\'':
                            state = ISP_DECL_CHARS;
                            return XMLDefaultTokenContext.DECLARATION;
                        case '[':
                            offset++;
                            state = INIT;
                            enterInternalDTD();
                            return XMLDefaultTokenContext.DECLARATION;
                        case '>':
                            offset++;
                            state = INIT;
                            return XMLDefaultTokenContext.DECLARATION;
                    }
                    break;

                case ISA_INIT_BR:
                    if (isWS(actChar)) break;
                    if (actChar == '>') {
                        offset++;
                        state = INIT;
                        leaveInternalDTD();
                        return XMLDefaultTokenContext.DECLARATION;
                    } else {
                        state = INIT;
                        return XMLDefaultTokenContext.ERROR;
                    }
                    
                case ISA_SGML_DECL_DASH:
                    if( actChar == '-' ) {
                        state = ISI_ERROR;
                        break;
                    } else {
                        if(isWS(actChar)){
                            state = ISI_ERROR;
                            continue;
                        } else {
                            state = ISI_SGML_DECL;
                            continue;
                        }
                    }
                    
                case ISA_REF:
                    if( UnicodeClasses.isXMLNameStartChar( actChar ) ) {
                        state = ISI_REF_NAME;
                        break;
                    }
                    if( actChar == '#') {
                        state = ISA_REF_HASH;
                        break;
                    }
                    state = subState;
                    continue;
                    
                case ISI_REF_NAME:
                    if( UnicodeClasses.isXMLNameChar( actChar ) ) break;
                    if( actChar == ';' ) offset++;
                    state = subState;
                    return XMLDefaultTokenContext.CHARACTER;
                    
                case ISA_REF_HASH:
                    if( actChar >= '0' && actChar <= '9' ) {
                        state = ISI_REF_DEC;
                        break;
                    }
                    if( actChar == 'x' || actChar == 'X' ) {
                        state = ISA_REF_X;
                        break;
                    }
                    if( isAZ( actChar ) ) {
                        offset++;
                        state = subState;
                        return XMLDefaultTokenContext.ERROR;
                    }
                    state = subState;
                    continue;
                    
                case ISI_REF_DEC:
                    if( actChar >= '0' && actChar <= '9' ) break;
                    if( actChar == ';' ) offset++;
                    state = subState;
                    return XMLDefaultTokenContext.CHARACTER;
                    
                case ISA_REF_X:
                    if (isHex(actChar)) {
                        state = ISI_REF_HEX;
                        break;
                    }
                    state = subState;
                    return XMLDefaultTokenContext.ERROR;       // error on previous "&#x" sequence
                    
                case ISI_REF_HEX:
                    if (isHex(actChar)) break;
                    if (actChar == ';' ) offset++;
                    state = subState;
                    return XMLDefaultTokenContext.CHARACTER;
            }
            
            
            offset++;
        } // end of while(offset...)
        
        /** At this stage there's no more text in the scanned buffer.
         * Scanner first checks whether this is completely the last
         * available buffer.
         */
        if( lastBuffer ) {
            switch( state ) {
                case INIT:
                case ISI_TEXT:
                case ISA_LT:
                case ISA_SLASH:
                case ISA_SGML_ESCAPE:
                case ISA_SGML_DASH:
                    return XMLDefaultTokenContext.TEXT;
                    
                case ISA_REF:
                case ISA_REF_HASH:
                    if( subState == ISI_TEXT ) return XMLDefaultTokenContext.TEXT;
                    else return XMLDefaultTokenContext.VALUE;
                    
                case ISI_XML_COMMENT:
                case ISA_XML_COMMENT_DASH:
                case ISI_XML_COMMENT_WS:
                    return XMLDefaultTokenContext.BLOCK_COMMENT;
                    
                case ISI_TAG:
                case ISI_ENDTAG:
                    return XMLDefaultTokenContext.TAG;
                    
                case ISI_ARG:
                    return XMLDefaultTokenContext.ARGUMENT;
                    
                case ISI_ERROR:
                    return XMLDefaultTokenContext.ERROR;
                    
                case ISP_ARG_WS:
                case ISP_TAG_WS:
                case ISP_ENDTAG_WS:
                case ISP_EQ_WS:
                    return XMLDefaultTokenContext.WS;
                    
                case ISP_ARG_X:
                case ISP_TAG_X:
                case ISP_ENDTAG_X:
                case ISP_EQ:
                    return XMLDefaultTokenContext.WS;
                    
                case ISI_VAL_APOS:
                case ISI_VAL_QUOT:
                case ISI_DECL_CHARS:
                case ISI_DECL_STRING:
                    return XMLDefaultTokenContext.VALUE;
                    
                case ISI_SGML_DECL:
                case ISA_SGML_DECL_DASH:
                case ISP_DECL_STRING:
                case ISP_DECL_CHARS:
                    return XMLDefaultTokenContext.DECLARATION;
                    
                case ISI_REF_NAME:
                case ISI_REF_DEC:
                case ISA_REF_X:
                case ISI_REF_HEX:
                    return XMLDefaultTokenContext.CHARACTER;
                    
                case ISI_PI:
                    return XMLDefaultTokenContext.PI_START;
                case ISI_PI_TARGET:
                    return XMLDefaultTokenContext.PI_TARGET;
                case ISP_PI_TARGET_WS:
                    return XMLDefaultTokenContext.WS;
                case ISI_PI_CONTENT:
                    return XMLDefaultTokenContext.PI_CONTENT;
                case ISA_PI_CONTENT_QMARK:                    
                case ISP_PI_CONTENT_QMARK:
                    // we are at end of the last buffer and expect that next char will be '>'
                    return XMLDefaultTokenContext.PI_END;  

                case ISA_LTEXBR:
                case ISA_LTEXBRC:
                case ISA_LTEXBRCD:
                case ISA_LTEXBRCDA:
                case ISA_LTEXBRCDAT:
                case ISA_LTEXBRCDATA:                    
                    return XMLDefaultTokenContext.TEXT;

                case ISI_CDATA:                    
                case ISA_CDATA_BR:
                case ISA_CDATA_BRBR:                    
                    return XMLTokenIDs.CDATA_SECTION;

                case ISA_INIT_BR:
                    return XMLDefaultTokenContext.TEXT;
                    
                default:
                    throw new IllegalStateException("Last buffer does not handle state " + state + "!");    //NOI18N
            }
        }
        
        return null;  // ask for next buffer
        
    }