public Cursor query()

in oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java [259:594]
271 lines of code
61 McCabe index (conditional complexity)

    public Cursor query(final IndexPlan plan, NodeState rootState) {
        if (plan.isDeprecated()) {
            LOG.warn("This index is deprecated: {}; it is used for query {}. " +
                    "Please change the query or the index definitions.", plan.getPlanName(), plan.getFilter());
        }
        final Filter filter = plan.getFilter();
        final Sort sort = getSort(plan);
        final PlanResult pr = getPlanResult(plan);
        QueryLimits settings = filter.getQueryLimits();
        LuceneResultRowIterator rItr = new LuceneResultRowIterator() {
            private final Deque<FulltextResultRow> queue = Queues.newArrayDeque();
            private final Set<String> seenPaths = Sets.newHashSet();
            private ScoreDoc lastDoc;
            private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
            private boolean noDocs = false;
            private IndexSearcher indexSearcher;
            private int indexNodeId = -1;
            private FacetProvider facetProvider;
            private int rewoundCount = 0;

            @Override
            protected FulltextResultRow computeNext() {
                while (!queue.isEmpty() || loadDocs()) {
                    return queue.remove();
                }
                releaseSearcher();
                return endOfData();
            }

            @Override
            public int rewoundCount() {
                return rewoundCount;
            }

            private FulltextResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, Map<String, String> excerpts,
                                                   FacetProvider facetProvider,
                                                   String explanation) throws IOException {
                IndexReader reader = searcher.getIndexReader();
                //TODO Look into usage of field cache for retrieving the path
                //instead of reading via reader if no of docs in index are limited
                PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
                reader.document(doc.doc, visitor);
                String path = visitor.getPath();
                if (path != null) {
                    if ("".equals(path)) {
                        path = "/";
                    }
                    if (pr.isPathTransformed()) {
                        String originalPath = path;
                        path = pr.transformPath(path);

                        if (path == null) {
                            LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
                            return null;
                        }

                        // avoid duplicate entries
                        if (seenPaths.contains(path)) {
                            LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
                            return null;
                        }
                        seenPaths.add(path);
                    }

                    boolean shouldIncludeForHierarchy = shouldInclude(path, plan);
                    LOG.trace("Matched path {}; shouldIncludeForHierarchy: {}", path, shouldIncludeForHierarchy);
                    return shouldIncludeForHierarchy ? new FulltextResultRow(path, doc.score, excerpts,
                            facetProvider, explanation)
                            : null;
                }
                return null;
            }

            /**
             * Loads the lucene documents in batches
             * @return true if any document is loaded
             */
            private boolean loadDocs() {

                if (noDocs) {
                    return false;
                }

                ScoreDoc lastDocToRecord = null;

                final LuceneIndexNode indexNode = acquireIndexNode(plan);
                checkState(indexNode != null);
                try {
                    IndexSearcher searcher = getCurrentSearcher(indexNode);
                    LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
                    if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                        Query query = (Query) luceneRequestFacade.getLuceneRequest();

                        TopDocs docs;
                        long start = PERF_LOGGER.start();
                        long startLoop = System.currentTimeMillis();
                        for (int repeated = 0;; repeated++) {
                            if (repeated > 0) {
                                long now = System.currentTimeMillis();
                                if (now > startLoop + LOAD_DOCS_WARN) {
                                    LOG.warn("loadDocs lastDoc {} repeated {} times for query {}", lastDoc, repeated, query);
                                    if (repeated > 1 && now > startLoop + LOAD_DOCS_STOP) {
                                        LOG.error("loadDocs stops", new Exception());
                                        break;
                                    }
                                }
                            }
                            if (lastDoc != null) {
                                LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                                if (sort == null) {
                                    docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                                } else {
                                    docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
                                }
                            } else {
                                LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                                if (sort == null) {
                                    docs = searcher.search(query, nextBatchSize);
                                } else {
                                    docs = searcher.search(query, nextBatchSize, sort);
                                }
                            }
                            PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
                            nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);

                            if (facetProvider == null) {
                                long f = PERF_LOGGER.start();
                                if (OLD_FACET_PROVIDER) {
                                    // here the current searcher gets referenced for later
                                    // but the searcher might get closed in the meantime
                                    facetProvider = new LuceneFacetProvider(
                                            FacetHelper.getFacets(searcher, query, plan, indexNode.getDefinition().getSecureFacetConfiguration())
                                    );
                                } else {
                                    // a new searcher is opened and closed when needed
                                    facetProvider = new DelayedLuceneFacetProvider(LucenePropertyIndex.this, query, plan, indexNode.getDefinition().getSecureFacetConfiguration());
                                }
                                PERF_LOGGER.end(f, -1, "facets retrieved");
                            }

                            Set<String> excerptFields = Sets.newHashSet();
                            for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
                                if (QueryConstants.REP_EXCERPT.equals(pr.propertyName)) {
                                    String value = pr.first.getValue(Type.STRING);
                                    excerptFields.add(value);
                                }
                            }
                            boolean addExcerpt = excerptFields.size() > 0;

                            PropertyRestriction restriction = filter.getPropertyRestriction(QueryConstants.OAK_SCORE_EXPLANATION);
                            boolean addExplain = restriction != null && restriction.isNotNullRestriction();

                            Analyzer analyzer = indexNode.getDefinition().getAnalyzer();

                            FieldInfos mergedFieldInfos = null;
                            if (addExcerpt) {
                                // setup highlighter
                                QueryScorer scorer = new QueryScorer(query);
                                scorer.setExpandMultiTermQuery(true);
                                highlighter.setFragmentScorer(scorer);
                                mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
                            }

                            boolean earlyStop = false;
                            if (docs.scoreDocs.length > 1) {
                                // reranking step for fv sim search
                                PropertyRestriction pr = null;
                                LuceneIndexDefinition defn = indexNode.getDefinition();
                                if (defn.hasFunctionDefined()) {
                                    pr = filter.getPropertyRestriction(defn.getFunctionName());
                                }
                                if (pr != null) {
                                    String queryString = String.valueOf(pr.first.getValue(pr.first.getType()));
                                    if (queryString.startsWith("mlt?")) {
                                        List<PropertyDefinition> sp = new LinkedList<>();
                                        for (IndexingRule r : defn.getDefinedRules()) {
                                            List<PropertyDefinition> similarityProperties = r.getSimilarityProperties();
                                            for (PropertyDefinition pd : similarityProperties) {
                                                if (pd.similarityRerank) {
                                                    sp.add(pd);
                                                }
                                            }
                                        }
                                        if (!sp.isEmpty()) {
                                            long fvs = PERF_LOGGER.start();
                                            SimSearchUtils.bruteForceFVRerank(sp, docs, indexSearcher);
                                            PERF_LOGGER.end(fvs, -1, "fv reranking done");
                                            earlyStop = true;
                                        }
                                    }
                                }
                            }

                            for (ScoreDoc doc : docs.scoreDocs) {
                                Map<String, String> excerpts = null;
                                if (addExcerpt) {
                                    excerpts = getExcerpt(query, excerptFields, analyzer, searcher, doc, mergedFieldInfos);
                                }

                                String explanation = null;
                                if (addExplain) {
                                    explanation = searcher.explain(query, doc.doc).toString();
                                }

                                FulltextResultRow row = convertToRow(doc, searcher, excerpts, facetProvider, explanation);
                                if (row != null) {
                                    queue.add(row);
                                }
                                lastDocToRecord = doc;
                            }

                            if (earlyStop) {
                                noDocs = true;
                                break;
                            }
                            if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                                //queue is still empty but more results can be fetched
                                //from Lucene so still continue
                                lastDoc = lastDocToRecord;
                            } else {
                                break;
                            }
                        }
                    } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                        String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK;
                        noDocs = true;
                        SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
                        SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);

                        // ACL filter spellchecks
                        QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer());
                        for (SuggestWord suggestion : suggestWords) {
                            Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string));

                            query = addDescendantClauseIfRequired(query, plan);

                            TopDocs topDocs = searcher.search(query, 100);
                            if (topDocs.totalHits > 0) {
                                for (ScoreDoc doc : topDocs.scoreDocs) {
                                    Document retrievedDoc = searcher.doc(doc.doc);
                                    String prefix = filter.getPath();
                                    if (prefix.length() == 1) {
                                        prefix = "";
                                    }
                                    if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                        queue.add(new FulltextResultRow(suggestion.string));
                                        break;
                                    }
                                }
                            }
                        }

                    } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                        SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
                        noDocs = true;

                        List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery);

                        QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST,
                                indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() :
                                SuggestHelper.getAnalyzer());

                        // ACL filter suggestions
                        for (Lookup.LookupResult suggestion : lookupResults) {
                            Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");

                            query = addDescendantClauseIfRequired(query, plan);

                            TopDocs topDocs = searcher.search(query, 100);
                            if (topDocs.totalHits > 0) {
                                for (ScoreDoc doc : topDocs.scoreDocs) {
                                    Document retrievedDoc = searcher.doc(doc.doc);
                                    String prefix = filter.getPath();
                                    if (prefix.length() == 1) {
                                        prefix = "";
                                    }
                                    if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                        queue.add(new FulltextResultRow(suggestion.key.toString(), suggestion.value));
                                        break;
                                    }
                                }
                            }
                        }
                    }
                } catch (Exception e) {
                    LOG.warn("query [{}] via {} failed.", plan.getFilter(), LucenePropertyIndex.this.getClass().getCanonicalName(), e);
                } finally {
                    indexNode.release();
                }

                if (lastDocToRecord != null) {
                    this.lastDoc = lastDocToRecord;
                }

                return !queue.isEmpty();
            }

            private IndexSearcher getCurrentSearcher(LuceneIndexNode indexNode) {
                //The searcher once obtained is held till either cursor is finished
                //or if the index gets updated. It needs to be ensured that
                //searcher is obtained via this method only in this iterator

                //Refresh the searcher if change in indexNode is detected
                //For NRT case its fine to keep a reference to searcher i.e. not
                //acquire it for every loadDocs call otherwise with frequent change
                //the reset of lastDoc would happen very frequently.
                //Upon LuceneIndexNode change i.e. when new async index update is detected
                //the searcher would be refreshed as done earlier
                if (indexNodeId != indexNode.getIndexNodeId()) {
                    //if already initialized then log about change
                    if (indexNodeId > 0) {
                        LOG.info("Change in index version detected. Query would be performed without offset");
                        rewoundCount++;
                    }

                    indexSearcher = indexNode.getSearcher();
                    indexNodeId = indexNode.getIndexNodeId();
                    lastDoc = null;
                }
                return indexSearcher;
            }

            private void releaseSearcher() {
                //For now nullifying it.
                indexSearcher = null;
            }
        };
        Iterator<FulltextResultRow> itr = rItr;
        SizeEstimator sizeEstimator = getSizeEstimator(plan);

        if (pr.hasPropertyIndexResult() || pr.evaluateSyncNodeTypeRestriction()) {
            itr = mergePropertyIndexResult(plan, rootState, itr);
        }

        return new FulltextPathCursor(itr, rItr, plan, settings, sizeEstimator);
    }