private TokenID doParseToken()

in netbeansintegration/tags/3.0.0.beta/src/org/openoffice/extensions/util/UnoIdlSyntax.java [106:254]


    private TokenID doParseToken() {
        char actChar;
        startOffset = offset;
        // get the state with a lookahead function
        int[] offsetAndState = getNextState(buffer, offset, stopOffset, state);
        // set the token for debugging purposes
        token = new String(buffer, offset, offsetAndState[0]);
        // add the count of chars used for determining the lookahead 
        // to the offset
        offset += offsetAndState[0];
        // set the state
        state = offsetAndState[1];
        // go to the end of the state and return a token.
        while (offset < stopOffset) {
            actChar = buffer[offset];
            switch(state) {
                case ISI_LINE_COMMENT:
                    switch (actChar) {
                        case '\n': // NOI18N
                            offset++;
                            return TokenID.COMMENT;
                    }
                break;    
                case ISI_BLOCK_COMMENT:
                    switch (actChar) {
                        case '*': // NOI18N
                            state = ISI_STAR_IN_BLOCK_COMMENT;
                            break;
                    }
                break;
                case ISI_STAR_IN_BLOCK_COMMENT:
                    switch (actChar) {
                        case '/': // NOI18N
                            offset++;
                            return TokenID.COMMENT;
                        case '*': // NOI18N
                            // state stays the same; just here to show it
                        break;
                        default:
                            state = ISI_BLOCK_COMMENT;
                    }
                break;
                case ISI_PRECOMPILER: //have read this already
                    offset++;
                    return TokenID.PRECOMPILE;
                case ISI_KEYWORD:
                    // offset++ is not necessary; we're one further because the length
                    // of the keyword is added to offset.
                    return TokenID.KEYWORD;
                case ISI_TEXT:
                    int[] nextStuff = getNextState(buffer, offset, stopOffset, state);
                    int newState = nextStuff[1];  // not really interested in offset
                    // switch between text and whitespace: only in whitespace mode
                    // are numbers allowed; this way they always start with a whitespace
                    if (newState == ISI_WHITESPACE) {
                        state = ISI_WHITESPACE;
                    }
                    else if (newState != ISI_TEXT) { 
                        return TokenID.TEXT;
                    }
                break;
                case ISI_WHITESPACE:
                    nextStuff = getNextState(buffer, offset, stopOffset, state);
                    newState = nextStuff[1];  // not really interested in offset
                    if (newState == ISI_TEXT) {
                        state = ISI_TEXT;
                    }
                    else if (newState != ISI_WHITESPACE) { 
                        return TokenID.TEXT;
                    }
                break;
                case ISI_OPENING_BRACKET:
                    offset++;
                    state = ISI_TEXT_IN_BRACKETS;
                    return TokenID.TEXT;
                case ISI_TEXT_IN_BRACKETS:
                    switch (actChar) {
                        case ']': // NOI18N // closing bracket found
                            state = ISI_TEXT;
                            // no offset++ because ']' is not part of the token, it's only
                            // the text inside
                            return TokenID.BRACKET;
                        // default: state stays the same...
                    }
                break;
                case ISI_DOUBLE_QUOTE:
                    switch (actChar) {
                        case '\"': // NOI18N // closing double quote found.
                            offset++;
                            return TokenID.STRING;
                        case '\\': // NOI18N
                            state = ISI_BACKSLASH_IN_DOUBLE_QUOTE;
                            break;
                        // default: state stays the same...
                    }
                break;
                case ISI_BACKSLASH_IN_DOUBLE_QUOTE: // do not react on any char
                    state = ISI_DOUBLE_QUOTE;
                break;
                case ISI_SINGLE_QUOTE:
                    switch (actChar) {
                        case '\'': // NOI18N // closing single quote found.
                            offset++;
                            return TokenID.CHAR;
                        case '\\': // NOI18N
                            state = ISI_BACKSLASH_IN_SINGLE_QUOTE;
                            break;
                        // default: state stays the same...
                    }
                break;
                case ISI_BACKSLASH_IN_SINGLE_QUOTE: // do not react on any char
                    state = ISI_SINGLE_QUOTE;
                break;
                case ISI_NUMBER:
                    if ((actChar > '9' || actChar < '0') && actChar != '.') {
                        // offset++ is not needed: now we're after the number
                        return TokenID.NUMBER;
                    }
                    // do nothing in every other case
                break;
            }
            offset++;
        }
        // exit with this state
        if (offset == stopOffset) return null;
        // starting state sign (like /* for comment), but no end before eof.
        switch(state) {
            case ISI_BLOCK_COMMENT:
            case ISI_LINE_COMMENT:
                return TokenID.COMMENT;
            case ISI_WHITESPACE:
            case ISI_TEXT:
            case ISI_FUNCTION:
                return TokenID.TEXT;
            case ISI_KEYWORD:
                return TokenID.KEYWORD;
            case ISI_PRECOMPILER:
                return TokenID.PRECOMPILE;
            case ISI_TEXT_IN_BRACKETS:
                return TokenID.BRACKET;
            case ISI_DOUBLE_QUOTE:
                return TokenID.STRING;
            case ISI_SINGLE_QUOTE:
                return TokenID.CHAR;
            case ISI_NUMBER:
                return TokenID.NUMBER;
        }
        return null;
    }