in lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnum.java [237:635]
public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {
if (fr.index == null) {
throw new IllegalStateException("terms index was not loaded");
}
term.grow(1 + target.length);
assert clearEOF();
// if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " minIDVersion=" +
// minIDVersion + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" +
// termExists + ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
FST.Arc<Pair<BytesRef, Long>> arc;
int targetUpto;
Pair<BytesRef, Long> output;
long startFrameFP = currentFrame.fp;
targetBeforeCurrentLength = currentFrame.ord;
boolean changed = false;
// TODO: we could stop earlier w/ the version check, every time we traverse an index arc we can
// check?
if (currentFrame != staticFrame) {
// We are already seek'd; find the common
// prefix of new seek term vs current term and
// re-use the corresponding seek state. For
// example, if app first seeks to foobar, then
// seeks to foobaz, we can re-use the seek state
// for the first 5 bytes.
// if (DEBUG) {
// System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
// }
arc = arcs[0];
assert arc.isFinal();
output = arc.output();
targetUpto = 0;
IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
assert validIndexPrefix <= term.length()
: "validIndexPrefix="
+ validIndexPrefix
+ " term.length="
+ term.length()
+ " seg="
+ fr.parent;
final int targetLimit = Math.min(target.length, validIndexPrefix);
int cmp = 0;
// TODO: reverse vLong byte order for better FST
// prefix output sharing
// First compare up to valid seek frames:
while (targetUpto < targetLimit) {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit
// + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
// + " output=" + output);
// }
if (cmp != 0) {
break;
}
arc = arcs[1 + targetUpto];
// if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) {
// System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char)
// (target.bytes[target.offset + targetUpto] & 0xFF));
// }
assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF)
: "arc.label="
+ (char) arc.label()
+ " targetLabel="
+ (char) (target.bytes[target.offset + targetUpto] & 0xFF);
if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
}
if (arc.isFinal()) {
lastFrame = stack[1 + lastFrame.ord];
}
targetUpto++;
}
if (cmp == 0) {
final int targetUptoMid = targetUpto;
// Second compare the rest of the term, but
// don't save arc/output/frame; we only do this
// to find out if the target term is before,
// equal or after the current term
final int targetLimit2 = Math.min(target.length, term.length());
while (targetUpto < targetLimit2) {
cmp =
(term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" +
// targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset +
// targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
// }
if (cmp != 0) {
break;
}
targetUpto++;
}
if (cmp == 0) {
cmp = term.length() - target.length;
}
targetUpto = targetUptoMid;
}
if (cmp < 0) {
// Common case: target term is after current
// term, ie, app is seeking multiple terms
// in sorted order
// if (DEBUG) {
// System.out.println(" target is after current (shares prefixLen=" + targetUpto + ");
// frame.ord=" + lastFrame.ord + "; targetUpto=" + targetUpto);
// }
currentFrame = lastFrame;
} else if (cmp > 0) {
// Uncommon case: target term
// is before current term; this means we can
// keep the currentFrame but we must rewind it
// (so we scan from the start)
targetBeforeCurrentLength = 0;
changed = true;
// if (DEBUG) {
// System.out.println(" target is before current (shares prefixLen=" + targetUpto + ");
// rewind frame ord=" + lastFrame.ord);
// }
currentFrame = lastFrame;
currentFrame.rewind();
} else {
// Target is exactly the same as current term
assert term.length() == target.length;
if (termExists) {
if (currentFrame.maxIDVersion < minIDVersion) {
// The max version for all terms in this block is lower than the minVersion
// if (DEBUG) {
// System.out.println(" target is same as current maxIDVersion=" +
// currentFrame.maxIDVersion + " is < minIDVersion=" + minIDVersion + "; return false");
// }
return false;
}
currentFrame.decodeMetaData();
if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
// This term's version is lower than the minVersion
// if (DEBUG) {
// System.out.println(" target is same as current but version=" +
// ((IDVersionTermState) currentFrame.state).idVersion + " is < minIDVersion=" +
// minIDVersion + "; return false");
// }
return false;
}
// System.out.println(" term version=" + ((IDVersionTermState)
// currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame
// ord=" + currentFrame.ord);
// if (DEBUG) {
// System.out.println(" target is same as current; return true");
// }
return true;
} else {
// if (DEBUG) {
// System.out.println(" target is same as current but term doesn't exist");
// }
}
// validIndexPrefix = currentFrame.depth;
// term.length = target.length;
// return termExists;
}
} else {
targetBeforeCurrentLength = -1;
arc = fr.index.getFirstArc(arcs[0]);
// System.out.println("first arc=" + arc);
// Empty string prefix must have an output (block) in the index!
assert arc.isFinal();
assert arc.output() != null;
// if (DEBUG) {
// System.out.println(" no seek state; push root frame");
// }
output = arc.output();
currentFrame = staticFrame;
// term.length = 0;
targetUpto = 0;
currentFrame =
pushFrame(
arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength + " termExists=" + termExists);
// }
// We are done sharing the common prefix with the incoming target and where we are currently
// seek'd; now continue walking the index:
while (targetUpto < target.length) {
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
final FST.Arc<Pair<BytesRef, Long>> nextArc =
fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader);
if (nextArc == null) {
// Index is exhausted
// if (DEBUG) {
// System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " +
// Integer.toHexString(targetLabel) + " termExists=" + termExists);
// }
validIndexPrefix = currentFrame.prefix;
// validIndexPrefix = targetUpto;
currentFrame.scanToFloorFrame(target);
if (!currentFrame.hasTerms) {
termExists = false;
term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
// System.out.println(" check maxVersion=" + currentFrame.maxIDVersion + " vs " +
// minIDVersion);
// if (DEBUG) {
// System.out.println(" frame.maxIDVersion=" + currentFrame.maxIDVersion + " vs
// minIDVersion=" + minIDVersion);
// }
if (currentFrame.maxIDVersion < minIDVersion) {
// The max version for all terms in this block is lower than the minVersion
if (currentFrame.fp != startFrameFP || changed) {
// if (targetUpto+1 > term.length) {
termExists = false;
term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" reset current term");
// }
validIndexPrefix = Math.min(validIndexPrefix, term.length());
}
// if (currentFrame.ord != startFrameOrd) {
// termExists = false;
// }
// if (DEBUG) {
// System.out.println(" FAST version NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term) + " targetUpto=" + targetUpto +
// " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" +
// validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp +
// " termExists=" + termExists);
// }
return false;
}
currentFrame.loadBlock();
// if (DEBUG) {
// System.out.println(" scan currentFrame ord=" + currentFrame.ord);
// }
final SeekStatus result = currentFrame.scanToTerm(target, true);
if (result == SeekStatus.FOUND) {
currentFrame.decodeMetaData();
if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
// This term's version is lower than the minVersion
// if (DEBUG) {
// System.out.println(" return NOT_FOUND: idVersion=" + ((IDVersionTermState)
// currentFrame.state).idVersion + " vs minIDVersion=" + minIDVersion);
// }
return false;
}
// if (DEBUG) {
// System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
// }
return true;
} else {
// if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// }
return false;
}
} else {
// Follow this arc
arc = nextArc;
if (term.byteAt(targetUpto) != (byte) targetLabel) {
// if (DEBUG) {
// System.out.println(" now set termExists=false targetUpto=" + targetUpto + " term=" +
// term.bytes[targetUpto] + " targetLabel=" + targetLabel);
// }
changed = true;
term.setByteAt(targetUpto, (byte) targetLabel);
termExists = false;
}
// Aggregate output as we go:
assert arc.output() != null;
if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
}
// if (DEBUG) {
// System.out.println(" index: follow label=" + (char) ((target.bytes[target.offset +
// targetUpto]&0xff)) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
// }
targetUpto++;
if (arc.isFinal()) {
// if (DEBUG) System.out.println(" arc is final!");
currentFrame =
pushFrame(
arc,
VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()),
targetUpto);
// if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" +
// currentFrame.hasTerms);
}
}
}
// validIndexPrefix = targetUpto;
validIndexPrefix = currentFrame.prefix;
currentFrame.scanToFloorFrame(target);
// Target term is entirely contained in the index:
if (!currentFrame.hasTerms) {
termExists = false;
term.setLength(targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
// if (DEBUG) {
// System.out.println(" frame.maxIDVersion=" + currentFrame.maxIDVersion + " vs
// minIDVersion=" + minIDVersion);
// }
if (currentFrame.maxIDVersion < minIDVersion) {
// The max version for all terms in this block is lower than the minVersion
termExists = false;
term.setLength(targetUpto);
return false;
}
currentFrame.loadBlock();
final SeekStatus result = currentFrame.scanToTerm(target, true);
if (result == SeekStatus.FOUND) {
// if (DEBUG) {
// System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
// }
currentFrame.decodeMetaData();
if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
// This term's version is lower than the minVersion
return false;
}
return true;
} else {
// if (DEBUG) {
// System.out.println(" got result " + result + "; return NOT_FOUND term=" +
// term.utf8ToString());
// }
return false;
}
}