in oak-lucene/src/main/java/org/apache/lucene/codecs/BlockTreeTermsReader.java [1804:2054]
public SeekStatus seekCeil(final BytesRef target) throws IOException {
if (index == null) {
throw new IllegalStateException("terms index was not loaded");
}
if (term.bytes.length <= target.length) {
term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
}
assert clearEOF();
//if (DEBUG) {
//System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
//printSeekState();
//}
FST.Arc<BytesRef> arc;
int targetUpto;
BytesRef output;
targetBeforeCurrentLength = currentFrame.ord;
if (currentFrame != staticFrame) {
// We are already seek'd; find the common
// prefix of new seek term vs current term and
// re-use the corresponding seek state. For
// example, if app first seeks to foobar, then
// seeks to foobaz, we can re-use the seek state
// for the first 5 bytes.
//if (DEBUG) {
//System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
//}
arc = arcs[0];
assert arc.isFinal();
output = arc.output;
targetUpto = 0;
Frame lastFrame = stack[0];
assert validIndexPrefix <= term.length;
final int targetLimit = Math.min(target.length, validIndexPrefix);
int cmp = 0;
// TOOD: we should write our vLong backwards (MSB
// first) to get better sharing from the FST
// First compare up to valid seek frames:
while (targetUpto < targetLimit) {
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
//if (DEBUG) {
//System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
//}
if (cmp != 0) {
break;
}
arc = arcs[1+targetUpto];
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
// TOOD: we could save the outputs in local
// byte[][] instead of making new objs ever
// seek; but, often the FST doesn't have any
// shared bytes (but this could change if we
// reverse vLong byte order)
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
}
if (arc.isFinal()) {
lastFrame = stack[1+lastFrame.ord];
}
targetUpto++;
}
if (cmp == 0) {
final int targetUptoMid = targetUpto;
// Second compare the rest of the term, but
// don't save arc/output/frame:
final int targetLimit2 = Math.min(target.length, term.length);
while (targetUpto < targetLimit2) {
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
//if (DEBUG) {
//System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
//}
if (cmp != 0) {
break;
}
targetUpto++;
}
if (cmp == 0) {
cmp = term.length - target.length;
}
targetUpto = targetUptoMid;
}
if (cmp < 0) {
// Common case: target term is after current
// term, ie, app is seeking multiple terms
// in sorted order
//if (DEBUG) {
//System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); clear frame.scanned ord=" + lastFrame.ord);
//}
currentFrame = lastFrame;
} else if (cmp > 0) {
// Uncommon case: target term
// is before current term; this means we can
// keep the currentFrame but we must rewind it
// (so we scan from the start)
targetBeforeCurrentLength = 0;
//if (DEBUG) {
//System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
//}
currentFrame = lastFrame;
currentFrame.rewind();
} else {
// Target is exactly the same as current term
assert term.length == target.length;
if (termExists) {
//if (DEBUG) {
//System.out.println(" target is same as current; return FOUND");
//}
return SeekStatus.FOUND;
} else {
//if (DEBUG) {
//System.out.println(" target is same as current but term doesn't exist");
//}
}
}
} else {
targetBeforeCurrentLength = -1;
arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output (block) in the index!
assert arc.isFinal();
assert arc.output != null;
//if (DEBUG) {
//System.out.println(" no seek state; push root frame");
//}
output = arc.output;
currentFrame = staticFrame;
//term.length = 0;
targetUpto = 0;
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
}
//if (DEBUG) {
//System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
//}
while (targetUpto < target.length) {
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
if (nextArc == null) {
// Index is exhausted
// if (DEBUG) {
// System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
// }
validIndexPrefix = currentFrame.prefix;
//validIndexPrefix = targetUpto;
currentFrame.scanToFloorFrame(target);
currentFrame.loadBlock();
final SeekStatus result = currentFrame.scanToTerm(target, false);
if (result == SeekStatus.END) {
term.copyBytes(target);
termExists = false;
if (next() != null) {
//if (DEBUG) {
//System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
//}
return SeekStatus.NOT_FOUND;
} else {
//if (DEBUG) {
//System.out.println(" return END");
//}
return SeekStatus.END;
}
} else {
//if (DEBUG) {
//System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
//}
return result;
}
} else {
// Follow this arc
term.bytes[targetUpto] = (byte) targetLabel;
arc = nextArc;
// Aggregate output as we go:
assert arc.output != null;
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
}
//if (DEBUG) {
//System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
//}
targetUpto++;
if (arc.isFinal()) {
//if (DEBUG) System.out.println(" arc is final!");
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
}
}
}
//validIndexPrefix = targetUpto;
validIndexPrefix = currentFrame.prefix;
currentFrame.scanToFloorFrame(target);
currentFrame.loadBlock();
final SeekStatus result = currentFrame.scanToTerm(target, false);
if (result == SeekStatus.END) {
term.copyBytes(target);
termExists = false;
if (next() != null) {
//if (DEBUG) {
//System.out.println(" return NOT_FOUND term=" + term.utf8ToString() + " " + term);
//}
return SeekStatus.NOT_FOUND;
} else {
//if (DEBUG) {
//System.out.println(" return END");
//}
return SeekStatus.END;
}
} else {
return result;
}
}