in modules/domain-search/src/main/java/org/apache/tuscany/sca/domain/search/impl/DomainPathAnalyzer.java [77:216]
public Token next(Token reusableToken) throws IOException {
assert reusableToken != null;
reusableToken.clear();
int length = 0;
int start = bufferIndex;
char[] buffer = reusableToken.termBuffer();
boolean lowercaseCharFound = false;
boolean digitFound = false;
while (true) {
if (bufferIndex >= dataLen) {
offset += dataLen;
int incr;
if (lowercaseCharFound || length == 0) {
incr = 0;
} else {
incr = 2;
ioBuffer[0] = ioBuffer[bufferIndex - 1];
ioBuffer[1] = ioBuffer[bufferIndex];
}
dataLen = input.read(ioBuffer, incr, ioBuffer.length - incr);
if (dataLen == -1) {
if (length > 0)
break;
else
return null;
}
bufferIndex = incr;
dataLen += incr;
}
final char c = ioBuffer[bufferIndex++];
boolean breakChar = true;
boolean includeChar = false;
if (c == PATH_START || c == PATH_SEPARATOR) {
if (length == 0) {
includeChar = true;
} else {
bufferIndex--;
}
typeCharFound = false;
uriCharFound = false;
} else if (c == TYPE_SEPARATOR && !typeCharFound || c == URI_SEPARATOR && !uriCharFound) {
length = 0;
breakChar = false;
lowercaseCharFound = false;
digitFound = false;
} else {
if (Character.isDigit(c)) {
if (digitFound || length == 0) {
breakChar = false;
digitFound = true;
} else {
bufferIndex--;
}
// TODO: normalize accent, it does not index accents for
// now
} else if (c >= 65 && c <= 90 || c >= 97 && c <= 122) {
if (digitFound) {
bufferIndex--;
} else if (Character.isLowerCase(c)) {
if (!(lowercaseCharFound || length <= 1)) {
length--;
bufferIndex -= 2;
} else {
lowercaseCharFound = true;
breakChar = false;
}
} else if (!lowercaseCharFound) { // && uppercase
breakChar = false;
} else {
bufferIndex--;
}
}
}
if (!breakChar || includeChar) {
if (length == 0) // start of token
start = offset + bufferIndex - 1;
else if (length == buffer.length)
buffer = reusableToken.resizeTermBuffer(1 + length);
if (c == TYPE_SEPARATOR && !typeCharFound) {
typeCharFound = true;
} else if (c == URI_SEPARATOR && !uriCharFound) {
typeCharFound = true;
} else {
buffer[length++] = Character.toLowerCase(c); // buffer
// it,
// normalized
}
if (length == MAX_WORD_LEN || (breakChar && length > 0)) // buffer
// overflow!
break;
} else if (length > 0) {// at non-Letter w/ chars
break; // return 'em
}
}
reusableToken.setTermLength(length);
reusableToken.setStartOffset(start);
reusableToken.setEndOffset(start + length);
return reusableToken;
}