in oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java [259:594]
public Cursor query(final IndexPlan plan, NodeState rootState) {
if (plan.isDeprecated()) {
LOG.warn("This index is deprecated: {}; it is used for query {}. " +
"Please change the query or the index definitions.", plan.getPlanName(), plan.getFilter());
}
final Filter filter = plan.getFilter();
final Sort sort = getSort(plan);
final PlanResult pr = getPlanResult(plan);
QueryLimits settings = filter.getQueryLimits();
LuceneResultRowIterator rItr = new LuceneResultRowIterator() {
private final Deque<FulltextResultRow> queue = Queues.newArrayDeque();
private final Set<String> seenPaths = Sets.newHashSet();
private ScoreDoc lastDoc;
private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
private boolean noDocs = false;
private IndexSearcher indexSearcher;
private int indexNodeId = -1;
private FacetProvider facetProvider;
private int rewoundCount = 0;
@Override
protected FulltextResultRow computeNext() {
while (!queue.isEmpty() || loadDocs()) {
return queue.remove();
}
releaseSearcher();
return endOfData();
}
@Override
public int rewoundCount() {
return rewoundCount;
}
private FulltextResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, Map<String, String> excerpts,
FacetProvider facetProvider,
String explanation) throws IOException {
IndexReader reader = searcher.getIndexReader();
//TODO Look into usage of field cache for retrieving the path
//instead of reading via reader if no of docs in index are limited
PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
reader.document(doc.doc, visitor);
String path = visitor.getPath();
if (path != null) {
if ("".equals(path)) {
path = "/";
}
if (pr.isPathTransformed()) {
String originalPath = path;
path = pr.transformPath(path);
if (path == null) {
LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
return null;
}
// avoid duplicate entries
if (seenPaths.contains(path)) {
LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
return null;
}
seenPaths.add(path);
}
boolean shouldIncludeForHierarchy = shouldInclude(path, plan);
LOG.trace("Matched path {}; shouldIncludeForHierarchy: {}", path, shouldIncludeForHierarchy);
return shouldIncludeForHierarchy ? new FulltextResultRow(path, doc.score, excerpts,
facetProvider, explanation)
: null;
}
return null;
}
/**
* Loads the lucene documents in batches
* @return true if any document is loaded
*/
private boolean loadDocs() {
if (noDocs) {
return false;
}
ScoreDoc lastDocToRecord = null;
final LuceneIndexNode indexNode = acquireIndexNode(plan);
checkState(indexNode != null);
try {
IndexSearcher searcher = getCurrentSearcher(indexNode);
LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
Query query = (Query) luceneRequestFacade.getLuceneRequest();
TopDocs docs;
long start = PERF_LOGGER.start();
long startLoop = System.currentTimeMillis();
for (int repeated = 0;; repeated++) {
if (repeated > 0) {
long now = System.currentTimeMillis();
if (now > startLoop + LOAD_DOCS_WARN) {
LOG.warn("loadDocs lastDoc {} repeated {} times for query {}", lastDoc, repeated, query);
if (repeated > 1 && now > startLoop + LOAD_DOCS_STOP) {
LOG.error("loadDocs stops", new Exception());
break;
}
}
}
if (lastDoc != null) {
LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
if (sort == null) {
docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
} else {
docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
}
} else {
LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
if (sort == null) {
docs = searcher.search(query, nextBatchSize);
} else {
docs = searcher.search(query, nextBatchSize, sort);
}
}
PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
if (facetProvider == null) {
long f = PERF_LOGGER.start();
if (OLD_FACET_PROVIDER) {
// here the current searcher gets referenced for later
// but the searcher might get closed in the meantime
facetProvider = new LuceneFacetProvider(
FacetHelper.getFacets(searcher, query, plan, indexNode.getDefinition().getSecureFacetConfiguration())
);
} else {
// a new searcher is opened and closed when needed
facetProvider = new DelayedLuceneFacetProvider(LucenePropertyIndex.this, query, plan, indexNode.getDefinition().getSecureFacetConfiguration());
}
PERF_LOGGER.end(f, -1, "facets retrieved");
}
Set<String> excerptFields = Sets.newHashSet();
for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
if (QueryConstants.REP_EXCERPT.equals(pr.propertyName)) {
String value = pr.first.getValue(Type.STRING);
excerptFields.add(value);
}
}
boolean addExcerpt = excerptFields.size() > 0;
PropertyRestriction restriction = filter.getPropertyRestriction(QueryConstants.OAK_SCORE_EXPLANATION);
boolean addExplain = restriction != null && restriction.isNotNullRestriction();
Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
FieldInfos mergedFieldInfos = null;
if (addExcerpt) {
// setup highlighter
QueryScorer scorer = new QueryScorer(query);
scorer.setExpandMultiTermQuery(true);
highlighter.setFragmentScorer(scorer);
mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
}
boolean earlyStop = false;
if (docs.scoreDocs.length > 1) {
// reranking step for fv sim search
PropertyRestriction pr = null;
LuceneIndexDefinition defn = indexNode.getDefinition();
if (defn.hasFunctionDefined()) {
pr = filter.getPropertyRestriction(defn.getFunctionName());
}
if (pr != null) {
String queryString = String.valueOf(pr.first.getValue(pr.first.getType()));
if (queryString.startsWith("mlt?")) {
List<PropertyDefinition> sp = new LinkedList<>();
for (IndexingRule r : defn.getDefinedRules()) {
List<PropertyDefinition> similarityProperties = r.getSimilarityProperties();
for (PropertyDefinition pd : similarityProperties) {
if (pd.similarityRerank) {
sp.add(pd);
}
}
}
if (!sp.isEmpty()) {
long fvs = PERF_LOGGER.start();
SimSearchUtils.bruteForceFVRerank(sp, docs, indexSearcher);
PERF_LOGGER.end(fvs, -1, "fv reranking done");
earlyStop = true;
}
}
}
}
for (ScoreDoc doc : docs.scoreDocs) {
Map<String, String> excerpts = null;
if (addExcerpt) {
excerpts = getExcerpt(query, excerptFields, analyzer, searcher, doc, mergedFieldInfos);
}
String explanation = null;
if (addExplain) {
explanation = searcher.explain(query, doc.doc).toString();
}
FulltextResultRow row = convertToRow(doc, searcher, excerpts, facetProvider, explanation);
if (row != null) {
queue.add(row);
}
lastDocToRecord = doc;
}
if (earlyStop) {
noDocs = true;
break;
}
if (queue.isEmpty() && docs.scoreDocs.length > 0) {
//queue is still empty but more results can be fetched
//from Lucene so still continue
lastDoc = lastDocToRecord;
} else {
break;
}
}
} else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK;
noDocs = true;
SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);
// ACL filter spellchecks
QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer());
for (SuggestWord suggestion : suggestWords) {
Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string));
query = addDescendantClauseIfRequired(query, plan);
TopDocs topDocs = searcher.search(query, 100);
if (topDocs.totalHits > 0) {
for (ScoreDoc doc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.doc(doc.doc);
String prefix = filter.getPath();
if (prefix.length() == 1) {
prefix = "";
}
if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
queue.add(new FulltextResultRow(suggestion.string));
break;
}
}
}
}
} else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
noDocs = true;
List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery);
QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST,
indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() :
SuggestHelper.getAnalyzer());
// ACL filter suggestions
for (Lookup.LookupResult suggestion : lookupResults) {
Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");
query = addDescendantClauseIfRequired(query, plan);
TopDocs topDocs = searcher.search(query, 100);
if (topDocs.totalHits > 0) {
for (ScoreDoc doc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.doc(doc.doc);
String prefix = filter.getPath();
if (prefix.length() == 1) {
prefix = "";
}
if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
queue.add(new FulltextResultRow(suggestion.key.toString(), suggestion.value));
break;
}
}
}
}
}
} catch (Exception e) {
LOG.warn("query [{}] via {} failed.", plan.getFilter(), LucenePropertyIndex.this.getClass().getCanonicalName(), e);
} finally {
indexNode.release();
}
if (lastDocToRecord != null) {
this.lastDoc = lastDocToRecord;
}
return !queue.isEmpty();
}
private IndexSearcher getCurrentSearcher(LuceneIndexNode indexNode) {
//The searcher once obtained is held till either cursor is finished
//or if the index gets updated. It needs to be ensured that
//searcher is obtained via this method only in this iterator
//Refresh the searcher if change in indexNode is detected
//For NRT case its fine to keep a reference to searcher i.e. not
//acquire it for every loadDocs call otherwise with frequent change
//the reset of lastDoc would happen very frequently.
//Upon LuceneIndexNode change i.e. when new async index update is detected
//the searcher would be refreshed as done earlier
if (indexNodeId != indexNode.getIndexNodeId()) {
//if already initialized then log about change
if (indexNodeId > 0) {
LOG.info("Change in index version detected. Query would be performed without offset");
rewoundCount++;
}
indexSearcher = indexNode.getSearcher();
indexNodeId = indexNode.getIndexNodeId();
lastDoc = null;
}
return indexSearcher;
}
private void releaseSearcher() {
//For now nullifying it.
indexSearcher = null;
}
};
Iterator<FulltextResultRow> itr = rItr;
SizeEstimator sizeEstimator = getSizeEstimator(plan);
if (pr.hasPropertyIndexResult() || pr.evaluateSyncNodeTypeRestriction()) {
itr = mergePropertyIndexResult(plan, rootState, itr);
}
return new FulltextPathCursor(itr, rItr, plan, settings, sizeEstimator);
}