public Map getRefinement()

in solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java [166:323]
84 lines of code
39 McCabe index (conditional complexity)

  public Map<String, Object> getRefinement(Context mcontext) {
    // step 1) If this facet request has refining, then we need to fully request top buckets that
    // were not seen by this shard.
    // step 2) If this facet does not have refining, but some sub-facets do, we need to
    // check/recurse those sub-facets in *every* top bucket.
    // A combination of the two is possible and makes step 2 redundant for any buckets we fully
    // requested in step 1.

    Map<String, Object> refinement = null;

    Collection<String> tags = mcontext.getSubsWithRefinement(freq);
    if (tags.isEmpty() && !freq.doRefine()) {
      // we don't have refining, and neither do our subs
      return null;
    }

    final FacetRequest.FacetSort initial_sort =
        null == freq.prelim_sort ? freq.sort : freq.prelim_sort;

    // Tags for sub facets that have partial facets somewhere in their children.
    // If we are missing a bucket for this shard, we'll need to get the specific buckets that need
    // refining.
    Collection<String> tagsWithPartial = mcontext.getSubsWithPartial(freq);

    // Was this whole facet missing (i.e. inside a bucket that was missing)?
    boolean thisMissing = mcontext.bucketWasMissing();
    // shard indicated it has more buckets
    boolean shardHasMore =
        shardHasMoreBuckets != null && shardHasMoreBuckets.get(mcontext.shardNum);
    // if we didn't hear from the shard at all, assume it as more buckets
    shardHasMore |= thisMissing;

    // If we know we've seen all the buckets from a shard, then we don't have to add to leafBuckets
    // or partialBuckets, only skipBuckets
    // TODO: should returnsPartial() check processEmpty internally?
    boolean isCommandPartial = freq.returnsPartial() || freq.processEmpty;
    // did the shard return all of the possible buckets at this level? (pretend it didn't if
    // processEmpty is set)
    boolean returnedAllBuckets = !shardHasMore && !freq.processEmpty;

    if (returnedAllBuckets && tags.isEmpty() && tagsWithPartial.isEmpty()) {
      // this shard returned all of its possible buckets, and there were no sub-facets with partial
      // results or sub-facets that require refining
      return null;
    }

    long numBucketsToCheck = Integer.MAX_VALUE; // use max-int instead of max-long to avoid overflow
    if (freq.limit >= 0) {
      numBucketsToCheck = freq.offset + freq.limit; // effective limit
      if (-1 == freq.overrefine) { // DEFAULT: use heuristic for overrefinement

        // when we don't have to worry about mincount pruning, there is no need for any
        // over refinement for these sorts..
        if (freq.mincount <= 1
            && ("index".equals(initial_sort.sortVariable)
                || ("count".equals(initial_sort.sortVariable)
                    && FacetRequest.SortDirection.desc == initial_sort.sortDirection))) {
          // No-Op
        } else if (0 <= freq.overrequest) {
          // if user asked for an explicit amount of overrequesting,
          // (but did not provide an explicit amount of overrefinement)
          // then use the same amount for overrefinement
          numBucketsToCheck += freq.overrequest;
        } else {
          // default: add 10% plus 4
          numBucketsToCheck = (long) (numBucketsToCheck * 1.1 + 4);
        }

        // TODO: should we scale our 'overrefine' (heuristic) value based on 'mincount' ?
        //
        // If mincount=M > 1 should we be doing something like numBucketsToCheck *= M ?
        // Perhaps that would make more sense in the 'overrequest' heuristic calc?
        //
        // Maybe we should look at how many buckets were fully populated in phase#1 AND
        // already meet the 'mincount', and use the the difference between that number
        // and 'limit' to decide a scaling factor for 'overrefine' ?

      } else { // user requested an explicit amount of overrefinement
        numBucketsToCheck += freq.overrefine;
      }
    }
    numBucketsToCheck = Math.min(buckets.size(), numBucketsToCheck);

    Collection<FacetBucket> bucketList;
    if (buckets.size() < numBucketsToCheck) {
      // no need to sort (yet)
      // todo: but we may need to filter.... simplify by always sorting?
      bucketList = buckets.values();
    } else {
      // don't re-sort (the prerefinement values) if our subclass already did it
      if (sortedBuckets == null) {
        sortBuckets(initial_sort); // todo: make sure this filters buckets as well
      }
      bucketList = sortedBuckets;
    }

    // "_l" missing buckets specified by bucket value only (no need to specify anything further)
    ArrayList<Object> leafBuckets = null;
    // "_p" missing buckets that have a partial sub-facet that need to specify those bucket
    // values... each entry is [bucketval, subs]
    ArrayList<Object> partialBuckets = null;
    // "_s" present buckets that we need to recurse into because children facets have refinement
    // requirements. each entry is [bucketval, subs]
    ArrayList<Object> skipBuckets = null;

    for (FacetBucket bucket : bucketList) {
      if (numBucketsToCheck-- <= 0) break;
      // if this bucket is missing,
      assert !thisMissing || !mcontext.getShardFlag(bucket.bucketNumber);
      boolean saw = !thisMissing && mcontext.getShardFlag(bucket.bucketNumber);
      if (!saw && !returnedAllBuckets) {
        // we didn't see the bucket for this shard, and it's possible that the shard has it
        Map<String, Object> bucketRefinement = null;

        // find facets that we need to fill in buckets for
        if (!tagsWithPartial.isEmpty()) {
          boolean prev = mcontext.setBucketWasMissing(true);
          bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial);
          mcontext.setBucketWasMissing(prev);

          if (bucketRefinement != null) {
            if (partialBuckets == null) partialBuckets = new ArrayList<>();
            partialBuckets.add(Arrays.asList(bucket.bucketValue, bucketRefinement));
          }
        }

        // if we didn't add to "_p" (missing with partial sub-facets), then we should add to "_l"
        // (missing leaf)
        if (bucketRefinement == null) {
          if (leafBuckets == null) leafBuckets = new ArrayList<>();
          leafBuckets.add(bucket.bucketValue);
        }

      } else if (!tags.isEmpty()) {
        // we had this bucket, but we need to recurse to certain children that have refinements
        Map<String, Object> bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial);
        if (bucketRefinement != null) {
          if (skipBuckets == null) skipBuckets = new ArrayList<>();
          skipBuckets.add(Arrays.asList(bucket.bucketValue, bucketRefinement));
        }
      }
    }

    // TODO: what if we don't need to refine any variable buckets, but we do need to contribute to
    // numBuckets, missing, allBuckets, etc... because we were "partial".  That will be handled at a
    // higher level (i.e. we'll be in someone's missing bucket?)
    // TODO: test with a sub-facet with a limit of 0 and something like a missing bucket
    if (leafBuckets != null || partialBuckets != null || skipBuckets != null) {
      refinement = CollectionUtil.newHashMap(3);
      if (leafBuckets != null) refinement.put("_l", leafBuckets);
      if (partialBuckets != null) refinement.put("_p", partialBuckets);
      if (skipBuckets != null) refinement.put("_s", skipBuckets);
    }

    refinement = getRefinementSpecial(mcontext, refinement, tagsWithPartial);

    return refinement;
  }