private void build()

in lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java [1641:1876]


  private void build(
      int leavesOffset,
      int numLeaves,
      MutablePointTree reader,
      int from,
      int to,
      IndexOutput out,
      byte[] minPackedValue,
      byte[] maxPackedValue,
      int[] parentSplits,
      byte[] splitPackedValues,
      byte[] splitDimensionValues,
      long[] leafBlockFPs,
      int[] spareDocIds)
      throws IOException {

    if (numLeaves == 1) {
      // leaf node
      final int count = to - from;
      assert count <= config.maxPointsInLeafNode();

      // Compute common prefixes
      Arrays.fill(commonPrefixLengths, config.bytesPerDim());
      reader.getValue(from, scratchBytesRef1);
      for (int i = from + 1; i < to; ++i) {
        reader.getValue(i, scratchBytesRef2);
        for (int dim = 0; dim < config.numDims(); dim++) {
          final int offset = dim * config.bytesPerDim();
          int dimensionPrefixLength = commonPrefixLengths[dim];
          commonPrefixLengths[dim] =
              Math.min(
                  dimensionPrefixLength,
                  commonPrefixComparator.compare(
                      scratchBytesRef1.bytes,
                      scratchBytesRef1.offset + offset,
                      scratchBytesRef2.bytes,
                      scratchBytesRef2.offset + offset));
        }
      }

      // Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
      FixedBitSet[] usedBytes = new FixedBitSet[config.numDims()];
      for (int dim = 0; dim < config.numDims(); ++dim) {
        if (commonPrefixLengths[dim] < config.bytesPerDim()) {
          usedBytes[dim] = new FixedBitSet(256);
        }
      }
      for (int i = from + 1; i < to; ++i) {
        for (int dim = 0; dim < config.numDims(); dim++) {
          if (usedBytes[dim] != null) {
            byte b = reader.getByteAt(i, dim * config.bytesPerDim() + commonPrefixLengths[dim]);
            usedBytes[dim].set(Byte.toUnsignedInt(b));
          }
        }
      }
      int sortedDim = 0;
      int sortedDimCardinality = Integer.MAX_VALUE;
      for (int dim = 0; dim < config.numDims(); ++dim) {
        if (usedBytes[dim] != null) {
          final int cardinality = usedBytes[dim].cardinality();
          if (cardinality < sortedDimCardinality) {
            sortedDim = dim;
            sortedDimCardinality = cardinality;
          }
        }
      }

      // sort by sortedDim
      MutablePointTreeReaderUtils.sortByDim(
          config,
          sortedDim,
          commonPrefixLengths,
          reader,
          from,
          to,
          scratchBytesRef1,
          scratchBytesRef2);

      BytesRef comparator = scratchBytesRef1;
      BytesRef collector = scratchBytesRef2;
      reader.getValue(from, comparator);
      int leafCardinality = 1;
      for (int i = from + 1; i < to; ++i) {
        reader.getValue(i, collector);
        for (int dim = 0; dim < config.numDims(); dim++) {
          final int start = dim * config.bytesPerDim();
          if (equalsPredicate.test(
                  collector.bytes,
                  collector.offset + start,
                  comparator.bytes,
                  comparator.offset + start)
              == false) {
            leafCardinality++;
            BytesRef scratch = collector;
            collector = comparator;
            comparator = scratch;
            break;
          }
        }
      }
      // Save the block file pointer:
      leafBlockFPs[leavesOffset] = out.getFilePointer();

      // Write doc IDs
      int[] docIDs = spareDocIds;
      for (int i = from; i < to; ++i) {
        docIDs[i - from] = reader.getDocID(i);
      }
      // System.out.println("writeLeafBlock pos=" + out.getFilePointer());
      writeLeafBlockDocs(out, docIDs, 0, count);

      // Write the common prefixes:
      reader.getValue(from, scratchBytesRef1);
      System.arraycopy(
          scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch, 0, config.packedBytesLength());
      writeCommonPrefixes(out, commonPrefixLengths, scratch);

      // Write the full values:
      IntFunction<BytesRef> packedValues =
          i -> {
            reader.getValue(from + i, scratchBytesRef1);
            return scratchBytesRef1;
          };
      assert valuesInOrderAndBounds(
          config, count, sortedDim, minPackedValue, maxPackedValue, packedValues, docIDs, 0);
      writeLeafBlockPackedValues(
          out, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality);
    } else {
      // inner node

      final int splitDim;
      // compute the split dimension and partition around it
      if (config.numIndexDims() == 1) {
        splitDim = 0;
      } else {
        // for dimensions > 2 we recompute the bounds for the current inner node to help the
        // algorithm choose best
        // split dimensions. Because it is an expensive operation, the frequency we recompute the
        // bounds is given
        // by SPLITS_BEFORE_EXACT_BOUNDS.
        if (numLeaves != leafBlockFPs.length
            && config.numIndexDims() > 2
            && Arrays.stream(parentSplits).sum() % SPLITS_BEFORE_EXACT_BOUNDS == 0) {
          computePackedValueBounds(
              reader, from, to, minPackedValue, maxPackedValue, scratchBytesRef1);
        }
        splitDim = split(minPackedValue, maxPackedValue, parentSplits);
      }

      // How many leaves will be in the left tree:
      int numLeftLeafNodes = getNumLeftLeafNodes(numLeaves);
      // How many points will be in the left tree:
      final int mid = from + numLeftLeafNodes * config.maxPointsInLeafNode();

      final int commonPrefixLen =
          commonPrefixComparator.compare(
              minPackedValue,
              splitDim * config.bytesPerDim(),
              maxPackedValue,
              splitDim * config.bytesPerDim());

      MutablePointTreeReaderUtils.partition(
          config,
          maxDoc,
          splitDim,
          commonPrefixLen,
          reader,
          from,
          to,
          mid,
          scratchBytesRef1,
          scratchBytesRef2);

      final int rightOffset = leavesOffset + numLeftLeafNodes;
      final int splitOffset = rightOffset - 1;
      // set the split value
      final int address = splitOffset * config.bytesPerDim();
      splitDimensionValues[splitOffset] = (byte) splitDim;
      reader.getValue(mid, scratchBytesRef1);
      System.arraycopy(
          scratchBytesRef1.bytes,
          scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
          splitPackedValues,
          address,
          config.bytesPerDim());

      byte[] minSplitPackedValue =
          ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength());
      byte[] maxSplitPackedValue =
          ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength());
      System.arraycopy(
          scratchBytesRef1.bytes,
          scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
          minSplitPackedValue,
          splitDim * config.bytesPerDim(),
          config.bytesPerDim());
      System.arraycopy(
          scratchBytesRef1.bytes,
          scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
          maxSplitPackedValue,
          splitDim * config.bytesPerDim(),
          config.bytesPerDim());

      // recurse
      parentSplits[splitDim]++;
      build(
          leavesOffset,
          numLeftLeafNodes,
          reader,
          from,
          mid,
          out,
          minPackedValue,
          maxSplitPackedValue,
          parentSplits,
          splitPackedValues,
          splitDimensionValues,
          leafBlockFPs,
          spareDocIds);
      build(
          rightOffset,
          numLeaves - numLeftLeafNodes,
          reader,
          mid,
          to,
          out,
          minSplitPackedValue,
          maxPackedValue,
          parentSplits,
          splitPackedValues,
          splitDimensionValues,
          leafBlockFPs,
          spareDocIds);
      parentSplits[splitDim]--;
    }
  }