in solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java [166:323]
public Map<String, Object> getRefinement(Context mcontext) {
// step 1) If this facet request has refining, then we need to fully request top buckets that
// were not seen by this shard.
// step 2) If this facet does not have refining, but some sub-facets do, we need to
// check/recurse those sub-facets in *every* top bucket.
// A combination of the two is possible and makes step 2 redundant for any buckets we fully
// requested in step 1.
Map<String, Object> refinement = null;
Collection<String> tags = mcontext.getSubsWithRefinement(freq);
if (tags.isEmpty() && !freq.doRefine()) {
// we don't have refining, and neither do our subs
return null;
}
final FacetRequest.FacetSort initial_sort =
null == freq.prelim_sort ? freq.sort : freq.prelim_sort;
// Tags for sub facets that have partial facets somewhere in their children.
// If we are missing a bucket for this shard, we'll need to get the specific buckets that need
// refining.
Collection<String> tagsWithPartial = mcontext.getSubsWithPartial(freq);
// Was this whole facet missing (i.e. inside a bucket that was missing)?
boolean thisMissing = mcontext.bucketWasMissing();
// shard indicated it has more buckets
boolean shardHasMore =
shardHasMoreBuckets != null && shardHasMoreBuckets.get(mcontext.shardNum);
// if we didn't hear from the shard at all, assume it as more buckets
shardHasMore |= thisMissing;
// If we know we've seen all the buckets from a shard, then we don't have to add to leafBuckets
// or partialBuckets, only skipBuckets
// TODO: should returnsPartial() check processEmpty internally?
boolean isCommandPartial = freq.returnsPartial() || freq.processEmpty;
// did the shard return all of the possible buckets at this level? (pretend it didn't if
// processEmpty is set)
boolean returnedAllBuckets = !shardHasMore && !freq.processEmpty;
if (returnedAllBuckets && tags.isEmpty() && tagsWithPartial.isEmpty()) {
// this shard returned all of its possible buckets, and there were no sub-facets with partial
// results or sub-facets that require refining
return null;
}
long numBucketsToCheck = Integer.MAX_VALUE; // use max-int instead of max-long to avoid overflow
if (freq.limit >= 0) {
numBucketsToCheck = freq.offset + freq.limit; // effective limit
if (-1 == freq.overrefine) { // DEFAULT: use heuristic for overrefinement
// when we don't have to worry about mincount pruning, there is no need for any
// over refinement for these sorts..
if (freq.mincount <= 1
&& ("index".equals(initial_sort.sortVariable)
|| ("count".equals(initial_sort.sortVariable)
&& FacetRequest.SortDirection.desc == initial_sort.sortDirection))) {
// No-Op
} else if (0 <= freq.overrequest) {
// if user asked for an explicit amount of overrequesting,
// (but did not provide an explicit amount of overrefinement)
// then use the same amount for overrefinement
numBucketsToCheck += freq.overrequest;
} else {
// default: add 10% plus 4
numBucketsToCheck = (long) (numBucketsToCheck * 1.1 + 4);
}
// TODO: should we scale our 'overrefine' (heuristic) value based on 'mincount' ?
//
// If mincount=M > 1 should we be doing something like numBucketsToCheck *= M ?
// Perhaps that would make more sense in the 'overrequest' heuristic calc?
//
// Maybe we should look at how many buckets were fully populated in phase#1 AND
// already meet the 'mincount', and use the the difference between that number
// and 'limit' to decide a scaling factor for 'overrefine' ?
} else { // user requested an explicit amount of overrefinement
numBucketsToCheck += freq.overrefine;
}
}
numBucketsToCheck = Math.min(buckets.size(), numBucketsToCheck);
Collection<FacetBucket> bucketList;
if (buckets.size() < numBucketsToCheck) {
// no need to sort (yet)
// todo: but we may need to filter.... simplify by always sorting?
bucketList = buckets.values();
} else {
// don't re-sort (the prerefinement values) if our subclass already did it
if (sortedBuckets == null) {
sortBuckets(initial_sort); // todo: make sure this filters buckets as well
}
bucketList = sortedBuckets;
}
// "_l" missing buckets specified by bucket value only (no need to specify anything further)
ArrayList<Object> leafBuckets = null;
// "_p" missing buckets that have a partial sub-facet that need to specify those bucket
// values... each entry is [bucketval, subs]
ArrayList<Object> partialBuckets = null;
// "_s" present buckets that we need to recurse into because children facets have refinement
// requirements. each entry is [bucketval, subs]
ArrayList<Object> skipBuckets = null;
for (FacetBucket bucket : bucketList) {
if (numBucketsToCheck-- <= 0) break;
// if this bucket is missing,
assert !thisMissing || !mcontext.getShardFlag(bucket.bucketNumber);
boolean saw = !thisMissing && mcontext.getShardFlag(bucket.bucketNumber);
if (!saw && !returnedAllBuckets) {
// we didn't see the bucket for this shard, and it's possible that the shard has it
Map<String, Object> bucketRefinement = null;
// find facets that we need to fill in buckets for
if (!tagsWithPartial.isEmpty()) {
boolean prev = mcontext.setBucketWasMissing(true);
bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial);
mcontext.setBucketWasMissing(prev);
if (bucketRefinement != null) {
if (partialBuckets == null) partialBuckets = new ArrayList<>();
partialBuckets.add(Arrays.asList(bucket.bucketValue, bucketRefinement));
}
}
// if we didn't add to "_p" (missing with partial sub-facets), then we should add to "_l"
// (missing leaf)
if (bucketRefinement == null) {
if (leafBuckets == null) leafBuckets = new ArrayList<>();
leafBuckets.add(bucket.bucketValue);
}
} else if (!tags.isEmpty()) {
// we had this bucket, but we need to recurse to certain children that have refinements
Map<String, Object> bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial);
if (bucketRefinement != null) {
if (skipBuckets == null) skipBuckets = new ArrayList<>();
skipBuckets.add(Arrays.asList(bucket.bucketValue, bucketRefinement));
}
}
}
// TODO: what if we don't need to refine any variable buckets, but we do need to contribute to
// numBuckets, missing, allBuckets, etc... because we were "partial". That will be handled at a
// higher level (i.e. we'll be in someone's missing bucket?)
// TODO: test with a sub-facet with a limit of 0 and something like a missing bucket
if (leafBuckets != null || partialBuckets != null || skipBuckets != null) {
refinement = CollectionUtil.newHashMap(3);
if (leafBuckets != null) refinement.put("_l", leafBuckets);
if (partialBuckets != null) refinement.put("_p", partialBuckets);
if (skipBuckets != null) refinement.put("_s", skipBuckets);
}
refinement = getRefinementSpecial(mcontext, refinement, tagsWithPartial);
return refinement;
}