in lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java [1641:1876]
private void build(
int leavesOffset,
int numLeaves,
MutablePointTree reader,
int from,
int to,
IndexOutput out,
byte[] minPackedValue,
byte[] maxPackedValue,
int[] parentSplits,
byte[] splitPackedValues,
byte[] splitDimensionValues,
long[] leafBlockFPs,
int[] spareDocIds)
throws IOException {
if (numLeaves == 1) {
// leaf node
final int count = to - from;
assert count <= config.maxPointsInLeafNode();
// Compute common prefixes
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
reader.getValue(from, scratchBytesRef1);
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, scratchBytesRef2);
for (int dim = 0; dim < config.numDims(); dim++) {
final int offset = dim * config.bytesPerDim();
int dimensionPrefixLength = commonPrefixLengths[dim];
commonPrefixLengths[dim] =
Math.min(
dimensionPrefixLength,
commonPrefixComparator.compare(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + offset,
scratchBytesRef2.bytes,
scratchBytesRef2.offset + offset));
}
}
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims()];
for (int dim = 0; dim < config.numDims(); ++dim) {
if (commonPrefixLengths[dim] < config.bytesPerDim()) {
usedBytes[dim] = new FixedBitSet(256);
}
}
for (int i = from + 1; i < to; ++i) {
for (int dim = 0; dim < config.numDims(); dim++) {
if (usedBytes[dim] != null) {
byte b = reader.getByteAt(i, dim * config.bytesPerDim() + commonPrefixLengths[dim]);
usedBytes[dim].set(Byte.toUnsignedInt(b));
}
}
}
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < config.numDims(); ++dim) {
if (usedBytes[dim] != null) {
final int cardinality = usedBytes[dim].cardinality();
if (cardinality < sortedDimCardinality) {
sortedDim = dim;
sortedDimCardinality = cardinality;
}
}
}
// sort by sortedDim
MutablePointTreeReaderUtils.sortByDim(
config,
sortedDim,
commonPrefixLengths,
reader,
from,
to,
scratchBytesRef1,
scratchBytesRef2);
BytesRef comparator = scratchBytesRef1;
BytesRef collector = scratchBytesRef2;
reader.getValue(from, comparator);
int leafCardinality = 1;
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, collector);
for (int dim = 0; dim < config.numDims(); dim++) {
final int start = dim * config.bytesPerDim();
if (equalsPredicate.test(
collector.bytes,
collector.offset + start,
comparator.bytes,
comparator.offset + start)
== false) {
leafCardinality++;
BytesRef scratch = collector;
collector = comparator;
comparator = scratch;
break;
}
}
}
// Save the block file pointer:
leafBlockFPs[leavesOffset] = out.getFilePointer();
// Write doc IDs
int[] docIDs = spareDocIds;
for (int i = from; i < to; ++i) {
docIDs[i - from] = reader.getDocID(i);
}
// System.out.println("writeLeafBlock pos=" + out.getFilePointer());
writeLeafBlockDocs(out, docIDs, 0, count);
// Write the common prefixes:
reader.getValue(from, scratchBytesRef1);
System.arraycopy(
scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch, 0, config.packedBytesLength());
writeCommonPrefixes(out, commonPrefixLengths, scratch);
// Write the full values:
IntFunction<BytesRef> packedValues =
i -> {
reader.getValue(from + i, scratchBytesRef1);
return scratchBytesRef1;
};
assert valuesInOrderAndBounds(
config, count, sortedDim, minPackedValue, maxPackedValue, packedValues, docIDs, 0);
writeLeafBlockPackedValues(
out, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality);
} else {
// inner node
final int splitDim;
// compute the split dimension and partition around it
if (config.numIndexDims() == 1) {
splitDim = 0;
} else {
// for dimensions > 2 we recompute the bounds for the current inner node to help the
// algorithm choose best
// split dimensions. Because it is an expensive operation, the frequency we recompute the
// bounds is given
// by SPLITS_BEFORE_EXACT_BOUNDS.
if (numLeaves != leafBlockFPs.length
&& config.numIndexDims() > 2
&& Arrays.stream(parentSplits).sum() % SPLITS_BEFORE_EXACT_BOUNDS == 0) {
computePackedValueBounds(
reader, from, to, minPackedValue, maxPackedValue, scratchBytesRef1);
}
splitDim = split(minPackedValue, maxPackedValue, parentSplits);
}
// How many leaves will be in the left tree:
int numLeftLeafNodes = getNumLeftLeafNodes(numLeaves);
// How many points will be in the left tree:
final int mid = from + numLeftLeafNodes * config.maxPointsInLeafNode();
final int commonPrefixLen =
commonPrefixComparator.compare(
minPackedValue,
splitDim * config.bytesPerDim(),
maxPackedValue,
splitDim * config.bytesPerDim());
MutablePointTreeReaderUtils.partition(
config,
maxDoc,
splitDim,
commonPrefixLen,
reader,
from,
to,
mid,
scratchBytesRef1,
scratchBytesRef2);
final int rightOffset = leavesOffset + numLeftLeafNodes;
final int splitOffset = rightOffset - 1;
// set the split value
final int address = splitOffset * config.bytesPerDim();
splitDimensionValues[splitOffset] = (byte) splitDim;
reader.getValue(mid, scratchBytesRef1);
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
splitPackedValues,
address,
config.bytesPerDim());
byte[] minSplitPackedValue =
ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength());
byte[] maxSplitPackedValue =
ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength());
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
minSplitPackedValue,
splitDim * config.bytesPerDim(),
config.bytesPerDim());
System.arraycopy(
scratchBytesRef1.bytes,
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
maxSplitPackedValue,
splitDim * config.bytesPerDim(),
config.bytesPerDim());
// recurse
parentSplits[splitDim]++;
build(
leavesOffset,
numLeftLeafNodes,
reader,
from,
mid,
out,
minPackedValue,
maxSplitPackedValue,
parentSplits,
splitPackedValues,
splitDimensionValues,
leafBlockFPs,
spareDocIds);
build(
rightOffset,
numLeaves - numLeftLeafNodes,
reader,
mid,
to,
out,
minSplitPackedValue,
maxPackedValue,
parentSplits,
splitPackedValues,
splitDimensionValues,
leafBlockFPs,
spareDocIds);
parentSplits[splitDim]--;
}
}