in lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java [269:647]
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
if (clauses.size() == 0) {
return new MatchNoDocsQuery("empty BooleanQuery");
}
// Queries with no positive clauses have no matches
if (clauses.size() == clauseSets.get(Occur.MUST_NOT).size()) {
return new MatchNoDocsQuery("pure negative BooleanQuery");
}
// optimize 1-clause queries
if (clauses.size() == 1) {
BooleanClause c = clauses.get(0);
Query query = c.query();
if (minimumNumberShouldMatch == 1 && c.occur() == Occur.SHOULD) {
return query;
} else if (minimumNumberShouldMatch == 0) {
switch (c.occur()) {
case SHOULD:
case MUST:
return query;
case FILTER:
// no scoring clauses, so return a score of 0
return new BoostQuery(new ConstantScoreQuery(query), 0);
case MUST_NOT:
default:
throw new AssertionError();
}
}
}
// recursively rewrite
{
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
boolean actuallyRewritten = false;
for (BooleanClause clause : this) {
Query query = clause.query();
BooleanClause.Occur occur = clause.occur();
Query rewritten;
if (occur == Occur.FILTER || occur == Occur.MUST_NOT) {
// Clauses that are not involved in scoring can get some extra simplifications
rewritten = new ConstantScoreQuery(query).rewrite(indexSearcher);
if (rewritten instanceof ConstantScoreQuery) {
rewritten = ((ConstantScoreQuery) rewritten).getQuery();
}
} else {
rewritten = query.rewrite(indexSearcher);
}
if (rewritten != query || query.getClass() == MatchNoDocsQuery.class) {
// rewrite clause
actuallyRewritten = true;
if (rewritten.getClass() == MatchNoDocsQuery.class) {
switch (occur) {
case SHOULD:
case MUST_NOT:
// the clause can be safely ignored
break;
case MUST:
case FILTER:
return rewritten;
}
} else {
builder.add(rewritten, occur);
}
} else {
// leave as-is
builder.add(clause);
}
}
if (actuallyRewritten) {
return builder.build();
}
}
// remove duplicate FILTER and MUST_NOT clauses
{
int clauseCount = 0;
for (Collection<Query> queries : clauseSets.values()) {
clauseCount += queries.size();
}
if (clauseCount != clauses.size()) {
// since clauseSets implicitly deduplicates FILTER and MUST_NOT
// clauses, this means there were duplicates
BooleanQuery.Builder rewritten = new BooleanQuery.Builder();
rewritten.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
for (Map.Entry<Occur, Collection<Query>> entry : clauseSets.entrySet()) {
final Occur occur = entry.getKey();
for (Query query : entry.getValue()) {
rewritten.add(query, occur);
}
}
return rewritten.build();
}
}
// Check whether some clauses are both required and excluded
final Collection<Query> mustNotClauses = clauseSets.get(Occur.MUST_NOT);
if (!mustNotClauses.isEmpty()) {
final Predicate<Query> p = clauseSets.get(Occur.MUST)::contains;
if (mustNotClauses.stream().anyMatch(p.or(clauseSets.get(Occur.FILTER)::contains))) {
return new MatchNoDocsQuery("FILTER or MUST clause also in MUST_NOT");
}
if (mustNotClauses.contains(new MatchAllDocsQuery())) {
return new MatchNoDocsQuery("MUST_NOT clause is MatchAllDocsQuery");
}
}
// remove FILTER clauses that are also MUST clauses or that match all documents
if (clauseSets.get(Occur.FILTER).size() > 0) {
final Set<Query> filters = new HashSet<>(clauseSets.get(Occur.FILTER));
boolean modified = false;
if (filters.size() > 1 || clauseSets.get(Occur.MUST).isEmpty() == false) {
modified = filters.remove(new MatchAllDocsQuery());
}
modified |= filters.removeAll(clauseSets.get(Occur.MUST));
if (modified) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
for (BooleanClause clause : clauses) {
if (clause.occur() != Occur.FILTER) {
builder.add(clause);
}
}
for (Query filter : filters) {
builder.add(filter, Occur.FILTER);
}
return builder.build();
}
}
// convert FILTER clauses that are also SHOULD clauses to MUST clauses
if (clauseSets.get(Occur.SHOULD).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) {
final Collection<Query> filters = clauseSets.get(Occur.FILTER);
final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
Set<Query> intersection = new HashSet<>(filters);
intersection.retainAll(shoulds);
if (intersection.isEmpty() == false) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
int minShouldMatch = getMinimumNumberShouldMatch();
for (BooleanClause clause : clauses) {
if (intersection.contains(clause.query())) {
if (clause.occur() == Occur.SHOULD) {
builder.add(new BooleanClause(clause.query(), Occur.MUST));
minShouldMatch--;
}
} else {
builder.add(clause);
}
}
builder.setMinimumNumberShouldMatch(Math.max(0, minShouldMatch));
return builder.build();
}
}
// Deduplicate SHOULD clauses by summing up their boosts
if (clauseSets.get(Occur.SHOULD).size() > 0 && minimumNumberShouldMatch <= 1) {
Map<Query, Double> shouldClauses = new HashMap<>();
for (Query query : clauseSets.get(Occur.SHOULD)) {
double boost = 1;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
boost *= bq.getBoost();
query = bq.getQuery();
}
shouldClauses.put(query, shouldClauses.getOrDefault(query, 0d) + boost);
}
if (shouldClauses.size() != clauseSets.get(Occur.SHOULD).size()) {
BooleanQuery.Builder builder =
new BooleanQuery.Builder().setMinimumNumberShouldMatch(minimumNumberShouldMatch);
for (Map.Entry<Query, Double> entry : shouldClauses.entrySet()) {
Query query = entry.getKey();
float boost = entry.getValue().floatValue();
if (boost != 1f) {
query = new BoostQuery(query, boost);
}
builder.add(query, Occur.SHOULD);
}
for (BooleanClause clause : clauses) {
if (clause.occur() != Occur.SHOULD) {
builder.add(clause);
}
}
return builder.build();
}
}
// Deduplicate MUST clauses by summing up their boosts
if (clauseSets.get(Occur.MUST).size() > 0) {
Map<Query, Double> mustClauses = new HashMap<>();
for (Query query : clauseSets.get(Occur.MUST)) {
double boost = 1;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
boost *= bq.getBoost();
query = bq.getQuery();
}
mustClauses.put(query, mustClauses.getOrDefault(query, 0d) + boost);
}
if (mustClauses.size() != clauseSets.get(Occur.MUST).size()) {
BooleanQuery.Builder builder =
new BooleanQuery.Builder().setMinimumNumberShouldMatch(minimumNumberShouldMatch);
for (Map.Entry<Query, Double> entry : mustClauses.entrySet()) {
Query query = entry.getKey();
float boost = entry.getValue().floatValue();
if (boost != 1f) {
query = new BoostQuery(query, boost);
}
builder.add(query, Occur.MUST);
}
for (BooleanClause clause : clauses) {
if (clause.occur() != Occur.MUST) {
builder.add(clause);
}
}
return builder.build();
}
}
// Rewrite queries whose single scoring clause is a MUST clause on a
// MatchAllDocsQuery to a ConstantScoreQuery
{
final Collection<Query> musts = clauseSets.get(Occur.MUST);
final Collection<Query> filters = clauseSets.get(Occur.FILTER);
if (musts.size() == 1 && filters.size() > 0) {
Query must = musts.iterator().next();
float boost = 1f;
if (must instanceof BoostQuery boostQuery) {
must = boostQuery.getQuery();
boost = boostQuery.getBoost();
}
if (must.getClass() == MatchAllDocsQuery.class) {
// our single scoring clause matches everything: rewrite to a CSQ on the filter
// ignore SHOULD clause for now
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (BooleanClause clause : clauses) {
switch (clause.occur()) {
case FILTER:
case MUST_NOT:
builder.add(clause);
break;
case MUST:
case SHOULD:
default:
// ignore
break;
}
}
Query rewritten = builder.build();
rewritten = new ConstantScoreQuery(rewritten);
if (boost != 1f) {
rewritten = new BoostQuery(rewritten, boost);
}
// now add back the SHOULD clauses
builder =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch())
.add(rewritten, Occur.MUST);
for (Query query : clauseSets.get(Occur.SHOULD)) {
builder.add(query, Occur.SHOULD);
}
rewritten = builder.build();
return rewritten;
}
}
}
// Flatten nested disjunctions, this is important for block-max WAND to perform well
if (minimumNumberShouldMatch <= 1) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
boolean actuallyRewritten = false;
for (BooleanClause clause : clauses) {
if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery innerQuery) {
if (innerQuery.isPureDisjunction()) {
actuallyRewritten = true;
for (BooleanClause innerClause : innerQuery.clauses()) {
builder.add(innerClause);
}
} else {
builder.add(clause);
}
} else {
builder.add(clause);
}
}
if (actuallyRewritten) {
return builder.build();
}
}
// Inline required / prohibited clauses. This helps run filtered conjunctive queries more
// efficiently by providing all clauses to the block-max AND scorer.
{
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
boolean actuallyRewritten = false;
for (BooleanClause outerClause : clauses) {
if (outerClause.isRequired() && outerClause.query() instanceof BooleanQuery innerQuery) {
// Inlining prohibited clauses is not legal if the query is a pure negation, since pure
// negations have no matches. It works because the inner BooleanQuery would have first
// rewritten to a MatchNoDocsQuery if it only had prohibited clauses.
assert innerQuery.getClauses(Occur.MUST_NOT).size() != innerQuery.clauses().size();
if (innerQuery.getMinimumNumberShouldMatch() == 0
&& innerQuery.getClauses(Occur.SHOULD).isEmpty()) {
actuallyRewritten = true;
for (BooleanClause innerClause : innerQuery) {
Occur innerOccur = innerClause.occur();
if (innerOccur == Occur.FILTER
|| innerOccur == Occur.MUST_NOT
|| outerClause.occur() == Occur.MUST) {
builder.add(innerClause);
} else {
assert outerClause.occur() == Occur.FILTER && innerOccur == Occur.MUST;
// In this case we need to change the occur of the inner query from MUST to FILTER.
builder.add(innerClause.query(), Occur.FILTER);
}
}
} else {
builder.add(outerClause);
}
} else {
builder.add(outerClause);
}
}
if (actuallyRewritten) {
return builder.build();
}
}
// SHOULD clause count less than or equal to minimumNumberShouldMatch
// Important(this can only be processed after nested clauses have been flattened)
{
final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
if (shoulds.size() < minimumNumberShouldMatch) {
return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch");
}
if (shoulds.size() > 0 && shoulds.size() == minimumNumberShouldMatch) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (BooleanClause clause : clauses) {
if (clause.occur() == Occur.SHOULD) {
builder.add(clause.query(), Occur.MUST);
} else {
builder.add(clause);
}
}
return builder.build();
}
}
// Inline SHOULD clauses from the only MUST clause
{
if (clauseSets.get(Occur.SHOULD).isEmpty()
&& clauseSets.get(Occur.MUST).size() == 1
&& clauseSets.get(Occur.MUST).iterator().next() instanceof BooleanQuery inner
&& inner.clauses.size() == inner.clauseSets.get(Occur.SHOULD).size()) {
BooleanQuery.Builder rewritten = new BooleanQuery.Builder();
for (BooleanClause clause : clauses) {
if (clause.occur() != Occur.MUST) {
rewritten.add(clause);
}
}
for (BooleanClause innerClause : inner.clauses()) {
rewritten.add(innerClause);
}
rewritten.setMinimumNumberShouldMatch(Math.max(1, inner.getMinimumNumberShouldMatch()));
return rewritten.build();
}
}
return super.rewrite(indexSearcher);
}