public Token next()

in modules/domain-search/src/main/java/org/apache/tuscany/sca/domain/search/impl/DomainPathAnalyzer.java [77:216]


        public Token next(Token reusableToken) throws IOException {
            assert reusableToken != null;
            reusableToken.clear();
            int length = 0;
            int start = bufferIndex;
            char[] buffer = reusableToken.termBuffer();

            boolean lowercaseCharFound = false;
            boolean digitFound = false;

            while (true) {

                if (bufferIndex >= dataLen) {
                    offset += dataLen;
                    int incr;

                    if (lowercaseCharFound || length == 0) {
                        incr = 0;

                    } else {
                        incr = 2;
                        ioBuffer[0] = ioBuffer[bufferIndex - 1];
                        ioBuffer[1] = ioBuffer[bufferIndex];

                    }

                    dataLen = input.read(ioBuffer, incr, ioBuffer.length - incr);
                    if (dataLen == -1) {
                        if (length > 0)
                            break;
                        else
                            return null;
                    }
                    bufferIndex = incr;
                    dataLen += incr;

                }

                final char c = ioBuffer[bufferIndex++];
                boolean breakChar = true;
                boolean includeChar = false;

                if (c == PATH_START || c == PATH_SEPARATOR) {

                    if (length == 0) {
                        includeChar = true;

                    } else {
                        bufferIndex--;
                    }

                    typeCharFound = false;
                    uriCharFound = false;

                } else if (c == TYPE_SEPARATOR && !typeCharFound || c == URI_SEPARATOR && !uriCharFound) {
                    length = 0;
                    breakChar = false;
                    lowercaseCharFound = false;
                    digitFound = false;

                } else {

                    if (Character.isDigit(c)) {

                        if (digitFound || length == 0) {
                            breakChar = false;
                            digitFound = true;

                        } else {
                            bufferIndex--;
                        }

                        // TODO: normalize accent, it does not index accents for
                        // now
                    } else if (c >= 65 && c <= 90 || c >= 97 && c <= 122) {

                        if (digitFound) {
                            bufferIndex--;

                        } else if (Character.isLowerCase(c)) {

                            if (!(lowercaseCharFound || length <= 1)) {
                                length--;
                                bufferIndex -= 2;

                            } else {
                                lowercaseCharFound = true;
                                breakChar = false;

                            }

                        } else if (!lowercaseCharFound) { // && uppercase
                            breakChar = false;

                        } else {
                            bufferIndex--;
                        }

                    }

                }

                if (!breakChar || includeChar) {

                    if (length == 0) // start of token
                        start = offset + bufferIndex - 1;
                    else if (length == buffer.length)
                        buffer = reusableToken.resizeTermBuffer(1 + length);

                    if (c == TYPE_SEPARATOR && !typeCharFound) {
                        typeCharFound = true;

                    } else if (c == URI_SEPARATOR && !uriCharFound) {
                        typeCharFound = true;

                    } else {
                        buffer[length++] = Character.toLowerCase(c); // buffer
                        // it,
                        // normalized
                    }

                    if (length == MAX_WORD_LEN || (breakChar && length > 0)) // buffer
                        // overflow!
                        break;

                } else if (length > 0) {// at non-Letter w/ chars

                    break; // return 'em

                }

            }

            reusableToken.setTermLength(length);
            reusableToken.setStartOffset(start);
            reusableToken.setEndOffset(start + length);

            return reusableToken;

        }