in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java [1591:1789]
private QueryResult getDocListC(QueryResult qr, QueryCommand cmd) throws IOException {
// TODO don't take QueryResult as arg; create one here
if (cmd.getSegmentTerminateEarly()) {
qr.setSegmentTerminatedEarly(Boolean.FALSE);
}
DocListAndSet out = new DocListAndSet();
qr.setDocListAndSet(out);
QueryResultKey key = null;
int maxDocRequested = cmd.getOffset() + cmd.getLen();
// check for overflow, and check for # docs in index
if (maxDocRequested < 0 || maxDocRequested > maxDoc()) maxDocRequested = maxDoc();
int supersetMaxDoc = maxDocRequested;
DocList superset = null;
int flags = cmd.getFlags();
Query q = cmd.getQuery();
if (q instanceof ExtendedQuery eq) {
if (!eq.getCache()) {
flags |= (NO_CHECK_QCACHE | NO_SET_QCACHE | NO_CHECK_FILTERCACHE);
}
}
// we can try and look up the complete query in the cache.
if (queryResultCache != null
&& (flags & (NO_CHECK_QCACHE | NO_SET_QCACHE)) != ((NO_CHECK_QCACHE | NO_SET_QCACHE))) {
// all the current flags can be reused during warming,
// so set all of them on the cache key.
key =
new QueryResultKey(
q,
cmd.getFilterList(),
cmd.getSort(),
flags,
cmd.getMinExactCount(),
cmd.isDistribStatsDisabled());
if ((flags & NO_CHECK_QCACHE) == 0) {
superset = queryResultCache.get(key);
if (superset != null) {
// check that the cache entry has scores recorded if we need them
if ((flags & GET_SCORES) == 0 || superset.hasScores()) {
// NOTE: subset() returns null if the DocList has fewer docs than
// requested
out.docList = superset.subset(cmd.getOffset(), cmd.getLen());
}
}
if (out.docList != null) {
// found the docList in the cache... now check if we need the docset too.
// OPT: possible future optimization - if the doclist contains all the matches,
// use it to make the docset instead of rerunning the query.
if (out.docSet == null && ((flags & GET_DOCSET) != 0)) {
if (cmd.getFilterList() == null) {
out.docSet = getDocSet(cmd.getQuery());
} else {
List<Query> newList = new ArrayList<>(cmd.getFilterList().size() + 1);
newList.add(cmd.getQuery());
newList.addAll(cmd.getFilterList());
out.docSet = getDocSet(newList);
}
}
return qr;
}
}
// If we are going to generate the result, bump up to the
// next resultWindowSize for better caching.
if ((flags & NO_SET_QCACHE) == 0) {
// handle 0 special case as well as avoid idiv in the common case.
if (maxDocRequested < queryResultWindowSize) {
supersetMaxDoc = queryResultWindowSize;
} else {
supersetMaxDoc =
((maxDocRequested - 1) / queryResultWindowSize + 1) * queryResultWindowSize;
if (supersetMaxDoc < 0) supersetMaxDoc = maxDocRequested;
}
} else {
key = null; // we won't be caching the result
}
}
cmd.setSupersetMaxDoc(supersetMaxDoc);
// OK, so now we need to generate an answer.
// One way to do that would be to check if we have an unordered list
// of results for the base query. If so, we can apply the filters and then
// sort by the resulting set. This can only be used if:
// - the sort doesn't contain score
// - we don't want score returned.
// check if we should try and use the filter cache
final boolean needSort;
final boolean useFilterCache;
if ((flags & (GET_SCORES | NO_CHECK_FILTERCACHE)) != 0 || filterCache == null) {
needSort = true; // this value should be irrelevant when `useFilterCache=false`
useFilterCache = false;
} else if (q instanceof MatchAllDocsQuery
|| (useFilterForSortedQuery && QueryUtils.isConstantScoreQuery(q))) {
// special-case MatchAllDocsQuery: implicit default useFilterForSortedQuery=true;
// otherwise, default behavior should not risk filterCache thrashing, so require
// `useFilterForSortedQuery==true`
// We only need to sort if we're returning results AND sorting by something other than SCORE
// (sort by "score" alone is pointless for these constant score queries)
final Sort sort = cmd.getSort();
needSort = cmd.getLen() > 0 && sortIncludesOtherThanScore(sort);
if (!needSort) {
useFilterCache = true;
} else {
/*
NOTE: if `sort:score` is specified, it will have no effect, so we really _could_ in
principle always use filterCache; but this would be a user request misconfiguration,
and supporting it would require us to mess with user sort, or ignore the fact that sort
expects `score` to be present ... so just make the optimization contingent on the absence
of `score` in the requested sort.
*/
useFilterCache =
Arrays.stream(sort.getSort()).noneMatch((sf) -> sf.getType() == SortField.Type.SCORE);
}
} else {
// for non-constant-score queries, must sort unless no docs requested
needSort = cmd.getLen() > 0;
useFilterCache = useFilterCacheForDynamicScoreQuery(needSort, cmd);
}
if (useFilterCache) {
// now actually use the filter cache.
// for large filters that match few documents, this may be
// slower than simply re-executing the query.
if (out.docSet == null) {
out.docSet = getDocSet(cmd.getQuery());
List<Query> filterList = cmd.getFilterList();
if (filterList != null && !filterList.isEmpty()) {
out.docSet = DocSetUtil.getDocSet(out.docSet.intersection(getDocSet(filterList)), this);
}
}
// todo: there could be a sortDocSet that could take a list of
// the filters instead of anding them first...
// perhaps there should be a multi-docset-iterator
if (needSort) {
fullSortCount.increment();
sortDocSet(qr, cmd);
} else {
skipSortCount.increment();
// put unsorted list in place
out.docList = constantScoreDocList(cmd.getOffset(), cmd.getLen(), out.docSet);
if (0 == cmd.getSupersetMaxDoc()) {
// this is the only case where `cursorMark && !needSort`
qr.setNextCursorMark(cmd.getCursorMark());
} else {
// cursorMark should always add a `uniqueKey` sort field tie-breaker, which
// should prevent `needSort` from ever being false in conjunction with
// cursorMark, _except_ in the event of `rows=0` (accounted for in the clause
// above)
assert cmd.getCursorMark() == null;
}
}
} else {
fullSortCount.increment();
// do it the normal way...
if ((flags & GET_DOCSET) != 0) {
// this currently conflates returning the docset for the base query vs
// the base query and all filters.
DocSet qDocSet = getDocListAndSetNC(qr, cmd);
// cache the docSet matching the query w/o filtering
if (qDocSet != null && filterCache != null && !qr.isPartialResults())
filterCache.put(cmd.getQuery(), qDocSet);
} else {
getDocListNC(qr, cmd);
}
assert null != out.docList : "docList is null";
}
if (null == cmd.getCursorMark()) {
// Kludge...
// we can't use DocSlice.subset, even though it should be an identity op
// because it gets confused by situations where there are lots of matches, but
// less docs in the slice then were requested, (due to the cursor)
// so we have to short circuit the call.
// None of which is really a problem since we can't use caching with
// cursors anyway, but it still looks weird to have to special case this
// behavior based on this condition - hence the long explanation.
superset = out.docList;
out.docList = superset.subset(cmd.getOffset(), cmd.getLen());
} else {
// sanity check our cursor assumptions
assert null == superset : "cursor: superset isn't null";
assert 0 == cmd.getOffset() : "cursor: command offset mismatch";
assert 0 == out.docList.offset() : "cursor: docList offset mismatch";
assert cmd.getLen() >= supersetMaxDoc
: "cursor: superset len mismatch: " + cmd.getLen() + " vs " + supersetMaxDoc;
}
// lastly, put the superset in the cache if the size is less than or equal
// to queryResultMaxDocsCached
if (key != null && superset.size() <= queryResultMaxDocsCached && !qr.isPartialResults()) {
queryResultCache.put(key, superset);
}
return qr;
}