public Query rewrite()

in lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java [269:647]


  public Query rewrite(IndexSearcher indexSearcher) throws IOException {
    if (clauses.size() == 0) {
      return new MatchNoDocsQuery("empty BooleanQuery");
    }

    // Queries with no positive clauses have no matches
    if (clauses.size() == clauseSets.get(Occur.MUST_NOT).size()) {
      return new MatchNoDocsQuery("pure negative BooleanQuery");
    }

    // optimize 1-clause queries
    if (clauses.size() == 1) {
      BooleanClause c = clauses.get(0);
      Query query = c.query();
      if (minimumNumberShouldMatch == 1 && c.occur() == Occur.SHOULD) {
        return query;
      } else if (minimumNumberShouldMatch == 0) {
        switch (c.occur()) {
          case SHOULD:
          case MUST:
            return query;
          case FILTER:
            // no scoring clauses, so return a score of 0
            return new BoostQuery(new ConstantScoreQuery(query), 0);
          case MUST_NOT:
          default:
            throw new AssertionError();
        }
      }
    }

    // recursively rewrite
    {
      BooleanQuery.Builder builder = new BooleanQuery.Builder();
      builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
      boolean actuallyRewritten = false;
      for (BooleanClause clause : this) {
        Query query = clause.query();
        BooleanClause.Occur occur = clause.occur();
        Query rewritten;
        if (occur == Occur.FILTER || occur == Occur.MUST_NOT) {
          // Clauses that are not involved in scoring can get some extra simplifications
          rewritten = new ConstantScoreQuery(query).rewrite(indexSearcher);
          if (rewritten instanceof ConstantScoreQuery) {
            rewritten = ((ConstantScoreQuery) rewritten).getQuery();
          }
        } else {
          rewritten = query.rewrite(indexSearcher);
        }
        if (rewritten != query || query.getClass() == MatchNoDocsQuery.class) {
          // rewrite clause
          actuallyRewritten = true;
          if (rewritten.getClass() == MatchNoDocsQuery.class) {
            switch (occur) {
              case SHOULD:
              case MUST_NOT:
                // the clause can be safely ignored
                break;
              case MUST:
              case FILTER:
                return rewritten;
            }
          } else {
            builder.add(rewritten, occur);
          }
        } else {
          // leave as-is
          builder.add(clause);
        }
      }
      if (actuallyRewritten) {
        return builder.build();
      }
    }

    // remove duplicate FILTER and MUST_NOT clauses
    {
      int clauseCount = 0;
      for (Collection<Query> queries : clauseSets.values()) {
        clauseCount += queries.size();
      }
      if (clauseCount != clauses.size()) {
        // since clauseSets implicitly deduplicates FILTER and MUST_NOT
        // clauses, this means there were duplicates
        BooleanQuery.Builder rewritten = new BooleanQuery.Builder();
        rewritten.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
        for (Map.Entry<Occur, Collection<Query>> entry : clauseSets.entrySet()) {
          final Occur occur = entry.getKey();
          for (Query query : entry.getValue()) {
            rewritten.add(query, occur);
          }
        }
        return rewritten.build();
      }
    }

    // Check whether some clauses are both required and excluded
    final Collection<Query> mustNotClauses = clauseSets.get(Occur.MUST_NOT);
    if (!mustNotClauses.isEmpty()) {
      final Predicate<Query> p = clauseSets.get(Occur.MUST)::contains;
      if (mustNotClauses.stream().anyMatch(p.or(clauseSets.get(Occur.FILTER)::contains))) {
        return new MatchNoDocsQuery("FILTER or MUST clause also in MUST_NOT");
      }
      if (mustNotClauses.contains(new MatchAllDocsQuery())) {
        return new MatchNoDocsQuery("MUST_NOT clause is MatchAllDocsQuery");
      }
    }

    // remove FILTER clauses that are also MUST clauses or that match all documents
    if (clauseSets.get(Occur.FILTER).size() > 0) {
      final Set<Query> filters = new HashSet<>(clauseSets.get(Occur.FILTER));
      boolean modified = false;
      if (filters.size() > 1 || clauseSets.get(Occur.MUST).isEmpty() == false) {
        modified = filters.remove(new MatchAllDocsQuery());
      }
      modified |= filters.removeAll(clauseSets.get(Occur.MUST));
      if (modified) {
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
        for (BooleanClause clause : clauses) {
          if (clause.occur() != Occur.FILTER) {
            builder.add(clause);
          }
        }
        for (Query filter : filters) {
          builder.add(filter, Occur.FILTER);
        }
        return builder.build();
      }
    }

    // convert FILTER clauses that are also SHOULD clauses to MUST clauses
    if (clauseSets.get(Occur.SHOULD).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) {
      final Collection<Query> filters = clauseSets.get(Occur.FILTER);
      final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);

      Set<Query> intersection = new HashSet<>(filters);
      intersection.retainAll(shoulds);

      if (intersection.isEmpty() == false) {
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        int minShouldMatch = getMinimumNumberShouldMatch();

        for (BooleanClause clause : clauses) {
          if (intersection.contains(clause.query())) {
            if (clause.occur() == Occur.SHOULD) {
              builder.add(new BooleanClause(clause.query(), Occur.MUST));
              minShouldMatch--;
            }
          } else {
            builder.add(clause);
          }
        }

        builder.setMinimumNumberShouldMatch(Math.max(0, minShouldMatch));
        return builder.build();
      }
    }

    // Deduplicate SHOULD clauses by summing up their boosts
    if (clauseSets.get(Occur.SHOULD).size() > 0 && minimumNumberShouldMatch <= 1) {
      Map<Query, Double> shouldClauses = new HashMap<>();
      for (Query query : clauseSets.get(Occur.SHOULD)) {
        double boost = 1;
        while (query instanceof BoostQuery) {
          BoostQuery bq = (BoostQuery) query;
          boost *= bq.getBoost();
          query = bq.getQuery();
        }
        shouldClauses.put(query, shouldClauses.getOrDefault(query, 0d) + boost);
      }
      if (shouldClauses.size() != clauseSets.get(Occur.SHOULD).size()) {
        BooleanQuery.Builder builder =
            new BooleanQuery.Builder().setMinimumNumberShouldMatch(minimumNumberShouldMatch);
        for (Map.Entry<Query, Double> entry : shouldClauses.entrySet()) {
          Query query = entry.getKey();
          float boost = entry.getValue().floatValue();
          if (boost != 1f) {
            query = new BoostQuery(query, boost);
          }
          builder.add(query, Occur.SHOULD);
        }
        for (BooleanClause clause : clauses) {
          if (clause.occur() != Occur.SHOULD) {
            builder.add(clause);
          }
        }
        return builder.build();
      }
    }

    // Deduplicate MUST clauses by summing up their boosts
    if (clauseSets.get(Occur.MUST).size() > 0) {
      Map<Query, Double> mustClauses = new HashMap<>();
      for (Query query : clauseSets.get(Occur.MUST)) {
        double boost = 1;
        while (query instanceof BoostQuery) {
          BoostQuery bq = (BoostQuery) query;
          boost *= bq.getBoost();
          query = bq.getQuery();
        }
        mustClauses.put(query, mustClauses.getOrDefault(query, 0d) + boost);
      }
      if (mustClauses.size() != clauseSets.get(Occur.MUST).size()) {
        BooleanQuery.Builder builder =
            new BooleanQuery.Builder().setMinimumNumberShouldMatch(minimumNumberShouldMatch);
        for (Map.Entry<Query, Double> entry : mustClauses.entrySet()) {
          Query query = entry.getKey();
          float boost = entry.getValue().floatValue();
          if (boost != 1f) {
            query = new BoostQuery(query, boost);
          }
          builder.add(query, Occur.MUST);
        }
        for (BooleanClause clause : clauses) {
          if (clause.occur() != Occur.MUST) {
            builder.add(clause);
          }
        }
        return builder.build();
      }
    }

    // Rewrite queries whose single scoring clause is a MUST clause on a
    // MatchAllDocsQuery to a ConstantScoreQuery
    {
      final Collection<Query> musts = clauseSets.get(Occur.MUST);
      final Collection<Query> filters = clauseSets.get(Occur.FILTER);
      if (musts.size() == 1 && filters.size() > 0) {
        Query must = musts.iterator().next();
        float boost = 1f;
        if (must instanceof BoostQuery boostQuery) {
          must = boostQuery.getQuery();
          boost = boostQuery.getBoost();
        }
        if (must.getClass() == MatchAllDocsQuery.class) {
          // our single scoring clause matches everything: rewrite to a CSQ on the filter
          // ignore SHOULD clause for now
          BooleanQuery.Builder builder = new BooleanQuery.Builder();
          for (BooleanClause clause : clauses) {
            switch (clause.occur()) {
              case FILTER:
              case MUST_NOT:
                builder.add(clause);
                break;
              case MUST:
              case SHOULD:
              default:
                // ignore
                break;
            }
          }
          Query rewritten = builder.build();
          rewritten = new ConstantScoreQuery(rewritten);
          if (boost != 1f) {
            rewritten = new BoostQuery(rewritten, boost);
          }

          // now add back the SHOULD clauses
          builder =
              new BooleanQuery.Builder()
                  .setMinimumNumberShouldMatch(getMinimumNumberShouldMatch())
                  .add(rewritten, Occur.MUST);
          for (Query query : clauseSets.get(Occur.SHOULD)) {
            builder.add(query, Occur.SHOULD);
          }
          rewritten = builder.build();
          return rewritten;
        }
      }
    }

    // Flatten nested disjunctions, this is important for block-max WAND to perform well
    if (minimumNumberShouldMatch <= 1) {
      BooleanQuery.Builder builder = new BooleanQuery.Builder();
      builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
      boolean actuallyRewritten = false;
      for (BooleanClause clause : clauses) {
        if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery innerQuery) {
          if (innerQuery.isPureDisjunction()) {
            actuallyRewritten = true;
            for (BooleanClause innerClause : innerQuery.clauses()) {
              builder.add(innerClause);
            }
          } else {
            builder.add(clause);
          }
        } else {
          builder.add(clause);
        }
      }
      if (actuallyRewritten) {
        return builder.build();
      }
    }

    // Inline required / prohibited clauses. This helps run filtered conjunctive queries more
    // efficiently by providing all clauses to the block-max AND scorer.
    {
      BooleanQuery.Builder builder = new BooleanQuery.Builder();
      builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
      boolean actuallyRewritten = false;
      for (BooleanClause outerClause : clauses) {
        if (outerClause.isRequired() && outerClause.query() instanceof BooleanQuery innerQuery) {
          // Inlining prohibited clauses is not legal if the query is a pure negation, since pure
          // negations have no matches. It works because the inner BooleanQuery would have first
          // rewritten to a MatchNoDocsQuery if it only had prohibited clauses.
          assert innerQuery.getClauses(Occur.MUST_NOT).size() != innerQuery.clauses().size();
          if (innerQuery.getMinimumNumberShouldMatch() == 0
              && innerQuery.getClauses(Occur.SHOULD).isEmpty()) {

            actuallyRewritten = true;
            for (BooleanClause innerClause : innerQuery) {
              Occur innerOccur = innerClause.occur();
              if (innerOccur == Occur.FILTER
                  || innerOccur == Occur.MUST_NOT
                  || outerClause.occur() == Occur.MUST) {
                builder.add(innerClause);
              } else {
                assert outerClause.occur() == Occur.FILTER && innerOccur == Occur.MUST;
                // In this case we need to change the occur of the inner query from MUST to FILTER.
                builder.add(innerClause.query(), Occur.FILTER);
              }
            }
          } else {
            builder.add(outerClause);
          }
        } else {
          builder.add(outerClause);
        }
      }
      if (actuallyRewritten) {
        return builder.build();
      }
    }

    // SHOULD clause count less than or equal to minimumNumberShouldMatch
    // Important(this can only be processed after nested clauses have been flattened)
    {
      final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
      if (shoulds.size() < minimumNumberShouldMatch) {
        return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch");
      }
      if (shoulds.size() > 0 && shoulds.size() == minimumNumberShouldMatch) {
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        for (BooleanClause clause : clauses) {
          if (clause.occur() == Occur.SHOULD) {
            builder.add(clause.query(), Occur.MUST);
          } else {
            builder.add(clause);
          }
        }

        return builder.build();
      }
    }

    // Inline SHOULD clauses from the only MUST clause
    {
      if (clauseSets.get(Occur.SHOULD).isEmpty()
          && clauseSets.get(Occur.MUST).size() == 1
          && clauseSets.get(Occur.MUST).iterator().next() instanceof BooleanQuery inner
          && inner.clauses.size() == inner.clauseSets.get(Occur.SHOULD).size()) {
        BooleanQuery.Builder rewritten = new BooleanQuery.Builder();
        for (BooleanClause clause : clauses) {
          if (clause.occur() != Occur.MUST) {
            rewritten.add(clause);
          }
        }
        for (BooleanClause innerClause : inner.clauses()) {
          rewritten.add(innerClause);
        }
        rewritten.setMinimumNumberShouldMatch(Math.max(1, inner.getMinimumNumberShouldMatch()));
        return rewritten.build();
      }
    }

    return super.rewrite(indexSearcher);
  }