private void build()

in lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java [920:1103]


  private void build(
      int nodeID,
      int leafNodeOffset,
      MutablePointTree reader,
      int from,
      int to,
      IndexOutput out,
      byte[] minPackedValue,
      byte[] maxPackedValue,
      byte[] splitPackedValues,
      long[] leafBlockFPs,
      int[] spareDocIds)
      throws IOException {

    if (nodeID >= leafNodeOffset) {
      // leaf node
      final int count = to - from;
      assert count <= config.maxPointsInLeafNode();

      // Compute common prefixes
      Arrays.fill(commonPrefixLengths, config.bytesPerDim());
      reader.getValue(from, scratchBytesRef1);
      for (int i = from + 1; i < to; ++i) {
        reader.getValue(i, scratchBytesRef2);
        for (int dim = 0; dim < config.numDims(); dim++) {
          final int offset = dim * config.bytesPerDim();
          for (int j = 0; j < commonPrefixLengths[dim]; j++) {
            if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j]
                != scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
              commonPrefixLengths[dim] = j;
              break;
            }
          }
        }
      }

      // Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
      FixedBitSet[] usedBytes = new FixedBitSet[config.numDims()];
      for (int dim = 0; dim < config.numDims(); ++dim) {
        if (commonPrefixLengths[dim] < config.bytesPerDim()) {
          usedBytes[dim] = new FixedBitSet(256);
        }
      }
      for (int i = from + 1; i < to; ++i) {
        for (int dim = 0; dim < config.numDims(); dim++) {
          if (usedBytes[dim] != null) {
            byte b = reader.getByteAt(i, dim * config.bytesPerDim() + commonPrefixLengths[dim]);
            usedBytes[dim].set(Byte.toUnsignedInt(b));
          }
        }
      }
      int sortedDim = 0;
      int sortedDimCardinality = Integer.MAX_VALUE;
      for (int dim = 0; dim < config.numDims(); ++dim) {
        if (usedBytes[dim] != null) {
          final int cardinality = usedBytes[dim].cardinality();
          if (cardinality < sortedDimCardinality) {
            sortedDim = dim;
            sortedDimCardinality = cardinality;
          }
        }
      }

      // sort by sortedDim
      MutablePointTreeReaderUtils.sortByDim(
          config,
          sortedDim,
          commonPrefixLengths,
          reader,
          from,
          to,
          scratchBytesRef1,
          scratchBytesRef2);

      // Save the block file pointer:
      leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();

      // Write doc IDs
      int[] docIDs = spareDocIds;
      for (int i = from; i < to; ++i) {
        docIDs[i - from] = reader.getDocID(i);
      }
      writeLeafBlockDocs(out, docIDs, 0, count);

      // Write the common prefixes:
      reader.getValue(from, scratchBytesRef1);
      System.arraycopy(
          scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, config.packedBytesLength());

      // Write the full values:
      IntFunction<BytesRef> packedValues =
          new IntFunction<BytesRef>() {
            @Override
            public BytesRef apply(int i) {
              reader.getValue(from + i, scratchBytesRef1);
              return scratchBytesRef1;
            }
          };
      assert valuesInOrderAndBounds(
          count, sortedDim, minPackedValue, maxPackedValue, packedValues, docIDs, 0);
      writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);

    } else {
      // inner node

      // compute the split dimension and partition around it
      final int splitDim = split(minPackedValue, maxPackedValue);
      final int mid = (from + to + 1) >>> 1;

      int commonPrefixLen = config.bytesPerDim();
      for (int i = 0; i < config.bytesPerDim(); ++i) {
        if (minPackedValue[splitDim * config.bytesPerDim() + i]
            != maxPackedValue[splitDim * config.bytesPerDim() + i]) {
          commonPrefixLen = i;
          break;
        }
      }
      MutablePointTreeReaderUtils.partition(
          config,
          maxDoc,
          splitDim,
          commonPrefixLen,
          reader,
          from,
          to,
          mid,
          scratchBytesRef1,
          scratchBytesRef2);

      // set the split value
      final int address = nodeID * (1 + config.bytesPerDim());
      splitPackedValues[address] = (byte) splitDim;
      reader.getValue(mid, scratchBytesRef1);
      System.arraycopy(
          scratchBytesRef1.bytes,
          scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
          splitPackedValues,
          address + 1,
          config.bytesPerDim());

      byte[] minSplitPackedValue =
          ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength());
      byte[] maxSplitPackedValue =
          ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength());
      System.arraycopy(
          scratchBytesRef1.bytes,
          scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
          minSplitPackedValue,
          splitDim * config.bytesPerDim(),
          config.bytesPerDim());
      System.arraycopy(
          scratchBytesRef1.bytes,
          scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
          maxSplitPackedValue,
          splitDim * config.bytesPerDim(),
          config.bytesPerDim());

      // recurse
      build(
          nodeID * 2,
          leafNodeOffset,
          reader,
          from,
          mid,
          out,
          minPackedValue,
          maxSplitPackedValue,
          splitPackedValues,
          leafBlockFPs,
          spareDocIds);
      build(
          nodeID * 2 + 1,
          leafNodeOffset,
          reader,
          mid,
          to,
          out,
          minSplitPackedValue,
          maxPackedValue,
          splitPackedValues,
          leafBlockFPs,
          spareDocIds);
    }
  }