SimpleOrderedMap findTopSlots()

in solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java [324:537]


  SimpleOrderedMap<Object> findTopSlots(
      final int numSlots,
      final int slotCardinality,
      @SuppressWarnings("rawtypes") IntFunction<Comparable> bucketValFromSlotNumFunc,
      @SuppressWarnings("rawtypes") Function<Comparable, String> fieldQueryValFunc)
      throws IOException {
    assert this.sortAcc != null;
    long numBuckets = 0;

    final int off = fcontext.isShard() ? 0 : (int) freq.offset;

    long effectiveLimit = Integer.MAX_VALUE; // use max-int instead of max-long to avoid overflow
    if (freq.limit >= 0) {
      effectiveLimit = freq.limit;
      if (fcontext.isShard()) {
        if (freq.overrequest > 0) {
          // NOTE: although _default_ distrib overrequest is disabled for the "index sort" case (see
          // below), we _do_ want to respect an _explicit_ `overrequest` value, if present.
          // Overrequest is always relevant (regardless of prelim sort) for the `resort` case; but
          // even in the case of "index sort, no resort", overrequest can be relevant in some edge
          // cases of the "shard" case, where it can affect the behavior of `isBucketComplete()`
          // (see SOLR-14595).
          effectiveLimit += freq.overrequest;
        } else {
          switch (freq.overrequest) {
            case 0:
              // no-op (overrequest explicitly disabled)
              break;
            case -1:
              // default
              if (!"index".equals(this.sort.sortVariable)) {
                // NOTE: even for distrib requests, `overrequest` is not directly relevant for
                // "index" sort, hence there is no default/implicit overrequest for "index sort"
                // (even if `resort` is also specified -- overrequest that is exclusively for
                // `resort` must be explicit, even in a distrib context)
                effectiveLimit = applyDefaultOverrequest(freq.offset, effectiveLimit);
              }
              break;
            default:
              // other negative values are not supported
              throw new IllegalArgumentException(
                  "Illegal `overrequest` specified: " + freq.overrequest);
          }
        }
      } else if (null != resort && 0 < freq.overrequest) {
        // in non-shard situations, if we have a 'resort' we check for explicit overrequest > 0
        effectiveLimit += freq.overrequest;
      }
    }

    final int sortMul = sort.sortDirection.getMultiplier();

    int maxTopVals =
        (int)
            (effectiveLimit >= 0
                ? Math.min(freq.offset + effectiveLimit, Integer.MAX_VALUE - 1)
                : Integer.MAX_VALUE - 1);
    maxTopVals = Math.min(maxTopVals, slotCardinality);
    final SlotAcc sortAcc = this.sortAcc, indexOrderAcc = this.indexOrderAcc;
    final BiPredicate<Slot, Slot> orderPredicate;
    if (indexOrderAcc != null && indexOrderAcc != sortAcc) {
      orderPredicate =
          (a, b) -> {
            int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
            return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0;
          };
    } else {
      orderPredicate =
          (a, b) -> {
            int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
            return cmp == 0 ? b.slot < a.slot : cmp < 0;
          };
    }
    final PriorityQueue<Slot> queue =
        new PriorityQueue<>(maxTopVals) {
          @Override
          protected boolean lessThan(Slot a, Slot b) {
            return orderPredicate.test(a, b);
          }
        };

    // note: We avoid object allocation by having a Slot and re-using the 'bottom'.
    Slot bottom = null;
    Slot scratchSlot = new Slot();
    boolean shardHasMoreBuckets = false; // This shard has more buckets than were returned
    for (int slotNum = 0; slotNum < numSlots; slotNum++) {

      // screen out buckets not matching mincount
      if (effectiveMincount > 0) {
        long count = countAcc.getCount(slotNum);
        if (count < effectiveMincount) {
          if (count > 0) {
            // Still increment numBuckets as long as we have some count. This is for
            // consistency between distrib and non-distrib mode.
            numBuckets++;
          }
          continue;
        }
      }

      numBuckets++;

      if (bottom != null) {
        shardHasMoreBuckets = true;
        // scratchSlot is only used to hold this slotNum for the following line
        scratchSlot.slot = slotNum;
        if (orderPredicate.test(bottom, scratchSlot)) {
          bottom.slot = slotNum;
          bottom = queue.updateTop();
        }
      } else if (effectiveLimit > 0) {
        // queue not full
        Slot s = new Slot();
        s.slot = slotNum;
        queue.add(s);
        if (queue.size() >= maxTopVals) {
          bottom = queue.top();
        }
      }
    }

    assert queue.size() <= numBuckets;

    SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
    if (freq.numBuckets) {
      if (!fcontext.isShard()) {
        res.add("numBuckets", numBuckets);
      } else {
        calculateNumBuckets(res);
      }
    }

    FacetDebugInfo fdebug = fcontext.getDebugInfo();
    if (fdebug != null) fdebug.putInfoItem("numBuckets", numBuckets);

    if (freq.allBuckets) {
      SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
      // countAcc.setValues(allBuckets, allBucketsSlot);
      allBuckets.add("count", allBucketsAcc.getSpecialCount());
      allBucketsAcc.setValues(allBuckets, -1); // -1 slotNum is unused for SpecialSlotAcc
      // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
      res.add("allBuckets", allBuckets);
    }

    SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
    if (freq.missing) {
      res.add("missing", missingBucket);
      // moved missing fillBucket after we fill facet since it will reset all the accumulators.
    }

    final boolean needFilter = (!deferredAggs.isEmpty()) || freq.getSubFacets().size() > 0;
    if (needFilter) {
      createOtherAccs(-1, 1);
    }

    // if we are deep paging, we don't have to order the highest "offset" counts...
    // ...unless we need to resort.
    int collectCount = Math.max(0, queue.size() - (null == this.resort ? off : 0));
    //
    assert collectCount <= maxTopVals;
    Slot[] sortedSlots = new Slot[collectCount];
    for (int i = collectCount - 1; i >= 0; i--) {
      Slot slot = sortedSlots[i] = queue.pop();
      // At this point we know we're either returning this Slot as a Bucket, or resorting it,
      // so definitely fill in the bucket value -- we'll need it either way
      slot.bucketVal = bucketValFromSlotNumFunc.apply(slot.slot);

      if (needFilter || null != this.resort) {
        slot.bucketFilter = makeBucketQuery(fieldQueryValFunc.apply(slot.bucketVal));
      }
    }

    final SlotAcc resortAccForFill = resortSlots(sortedSlots); // No-Op if not needed

    if (null != this.resort) {
      // now that we've completely resorted, throw away extra docs from possible
      // offset/overrequest...
      final int endOffset =
          (int)
              Math.min(
                  (long) sortedSlots.length,
                  // NOTE: freq.limit is long, so no risk of overflow here
                  off + (freq.limit < 0 ? Integer.MAX_VALUE : freq.limit));
      if (0 < off || endOffset < sortedSlots.length) {
        sortedSlots = Arrays.copyOfRange(sortedSlots, off, endOffset);
      }
    }
    List<SimpleOrderedMap<?>> bucketList = new ArrayList<>(sortedSlots.length);

    for (Slot slot : sortedSlots) {
      SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
      bucket.add("val", slot.bucketVal);

      fillBucketFromSlot(bucket, slot, resortAccForFill);

      bucketList.add(bucket);
    }

    res.add("buckets", bucketList);

    if (fcontext.isShard() && shardHasMoreBuckets) {
      // Currently, "more" is an internal implementation detail and only returned for distributed
      // sub-requests
      res.add("more", true);
    }

    if (freq.missing) {
      // TODO: it would be more efficient to build up a missing DocSet if we need it here anyway.
      fillBucket(
          missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, false, null);
    }

    return res;
  }