in lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java [920:1103]
private void build(
int nodeID,
int leafNodeOffset,
MutablePointTree reader,
int from,
int to,
IndexOutput out,
byte[] minPackedValue,
byte[] maxPackedValue,
byte[] splitPackedValues,
long[] leafBlockFPs,
int[] spareDocIds)
throws IOException {
if (nodeID >= leafNodeOffset) {
// leaf node
final int count = to - from;
assert count <= config.maxPointsInLeafNode();
// Compute common prefixes
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
reader.getValue(from, scratchBytesRef1);
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, scratchBytesRef2);
for (int dim = 0; dim < config.numDims(); dim++) {
final int offset = dim * config.bytesPerDim();
for (int j = 0; j < commonPrefixLengths[dim]; j++) {
if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j]
!= scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
commonPrefixLengths[dim] = j;
break;
}
}
}
}
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims()];
for (int dim = 0; dim < config.numDims(); ++dim) {
if (commonPrefixLengths[dim] < config.bytesPerDim()) {
usedBytes[dim] = new FixedBitSet(256);
}
}
for (int i = from + 1; i < to; ++i) {
for (int dim = 0; dim < config.numDims(); dim++) {
if (usedBytes[dim] != null) {
byte b = reader.getByteAt(i, dim * config.bytesPerDim() + commonPrefixLengths[dim]);
usedBytes[dim].set(Byte.toUnsignedInt(b));
}
}
}
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < config.numDims(); ++dim) {
if (usedBytes[dim] != null) {
final int cardinality = usedBytes[dim].cardinality();
if (cardinality < sortedDimCardinality) {
sortedDim = dim;
sortedDimCardinality = cardinality;
}
}
}
// sort by sortedDim
MutablePointTreeReaderUtils.sortByDim(
config,
sortedDim,
commonPrefixLengths,
reader,
from,
to,
scratchBytesRef1,
scratchBytesRef2);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
// Write doc IDs
int[] docIDs = spareDocIds;
for (int i = from; i < to; ++i) {
docIDs[i - from] = reader.getDocID(i);
}
writeLeafBlockDocs(out, docIDs, 0, count);
// Write the common prefixes:
reader.getValue(from, scratchBytesRef1);
System.arraycopy(
scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, config.packedBytesLength());
// Write the full values:
IntFunction<BytesRef> packedValues =
new IntFunction<BytesRef>() {
@Override
public BytesRef apply(int i) {
reader.getValue(from + i, scratchBytesRef1);
return scratchBytesRef1;
}
};
assert valuesInOrderAndBounds(
count, sortedDim, minPackedValue, maxPackedValue, packedValues, docIDs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else {
// inner node
// compute the split dimension and partition around it
final int splitDim = split(minPackedValue, maxPackedValue);
final int mid = (from + to + 1) >>> 1;
int commonPrefixLen = config.bytesPerDim();
for (int i = 0; i < config.bytesPerDim(); ++i) {
if (minPackedValue[splitDim * config.bytesPerDim() + i]
!= maxPackedValue[splitDim * config.bytesPerDim() + i]) {
commonPrefixLen = i;
break;
}
}
MutablePointTreeReaderUtils.partition(
config,
maxDoc,
splitDim,
commonPrefixLen,
reader,
from,
to,
mid,
scratchBytesRef1,
scratchBytesRef2);
// set the split value
final int address = nodeID * (1 + config.bytesPerDim());
splitPackedValues[address] = (byte) splitDim;
reader.getValue(mid, scratchBytesRef1);
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
splitPackedValues,
address + 1,
config.bytesPerDim());
byte[] minSplitPackedValue =
ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength());
byte[] maxSplitPackedValue =
ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength());
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
minSplitPackedValue,
splitDim * config.bytesPerDim(),
config.bytesPerDim());
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
maxSplitPackedValue,
splitDim * config.bytesPerDim(),
config.bytesPerDim());
// recurse
build(
nodeID * 2,
leafNodeOffset,
reader,
from,
mid,
out,
minPackedValue,
maxSplitPackedValue,
splitPackedValues,
leafBlockFPs,
spareDocIds);
build(
nodeID * 2 + 1,
leafNodeOffset,
reader,
mid,
to,
out,
minSplitPackedValue,
maxPackedValue,
splitPackedValues,
leafBlockFPs,
spareDocIds);
}
}