in static/api/python/3.1.x/lunr.js [1960:2242]
lunr.Index.prototype.query = function (fn) {
// for each query clause
// * process terms
// * expand terms from token set
// * find matching documents and metadata
// * get document vectors
// * score documents
var query = new lunr.Query(this.fields),
matchingFields = Object.create(null),
queryVectors = Object.create(null),
termFieldCache = Object.create(null),
requiredMatches = Object.create(null),
prohibitedMatches = Object.create(null)
/*
* To support field level boosts a query vector is created per
* field. An empty vector is eagerly created to support negated
* queries.
*/
for (var i = 0; i < this.fields.length; i++) {
queryVectors[this.fields[i]] = new lunr.Vector
}
fn.call(query, query)
for (var i = 0; i < query.clauses.length; i++) {
/*
* Unless the pipeline has been disabled for this term, which is
* the case for terms with wildcards, we need to pass the clause
* term through the search pipeline. A pipeline returns an array
* of processed terms. Pipeline functions may expand the passed
* term, which means we may end up performing multiple index lookups
* for a single query term.
*/
var clause = query.clauses[i],
terms = null,
clauseMatches = lunr.Set.empty
if (clause.usePipeline) {
terms = this.pipeline.runString(clause.term, {
fields: clause.fields
})
} else {
terms = [clause.term]
}
for (var m = 0; m < terms.length; m++) {
var term = terms[m]
/*
* Each term returned from the pipeline needs to use the same query
* clause object, e.g. the same boost and or edit distance. The
* simplest way to do this is to re-use the clause object but mutate
* its term property.
*/
clause.term = term
/*
* From the term in the clause we create a token set which will then
* be used to intersect the indexes token set to get a list of terms
* to lookup in the inverted index
*/
var termTokenSet = lunr.TokenSet.fromClause(clause),
expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()
/*
* If a term marked as required does not exist in the tokenSet it is
* impossible for the search to return any matches. We set all the field
* scoped required matches set to empty and stop examining any further
* clauses.
*/
if (expandedTerms.length === 0 && clause.presence === lunr.Query.presence.REQUIRED) {
for (var k = 0; k < clause.fields.length; k++) {
var field = clause.fields[k]
requiredMatches[field] = lunr.Set.empty
}
break
}
for (var j = 0; j < expandedTerms.length; j++) {
/*
* For each term get the posting and termIndex, this is required for
* building the query vector.
*/
var expandedTerm = expandedTerms[j],
posting = this.invertedIndex[expandedTerm],
termIndex = posting._index
for (var k = 0; k < clause.fields.length; k++) {
/*
* For each field that this query term is scoped by (by default
* all fields are in scope) we need to get all the document refs
* that have this term in that field.
*
* The posting is the entry in the invertedIndex for the matching
* term from above.
*/
var field = clause.fields[k],
fieldPosting = posting[field],
matchingDocumentRefs = Object.keys(fieldPosting),
termField = expandedTerm + "/" + field,
matchingDocumentsSet = new lunr.Set(matchingDocumentRefs)
/*
* if the presence of this term is required ensure that the matching
* documents are added to the set of required matches for this clause.
*
*/
if (clause.presence == lunr.Query.presence.REQUIRED) {
clauseMatches = clauseMatches.union(matchingDocumentsSet)
if (requiredMatches[field] === undefined) {
requiredMatches[field] = lunr.Set.complete
}
}
/*
* if the presence of this term is prohibited ensure that the matching
* documents are added to the set of prohibited matches for this field,
* creating that set if it does not yet exist.
*/
if (clause.presence == lunr.Query.presence.PROHIBITED) {
if (prohibitedMatches[field] === undefined) {
prohibitedMatches[field] = lunr.Set.empty
}
prohibitedMatches[field] = prohibitedMatches[field].union(matchingDocumentsSet)
/*
* Prohibited matches should not be part of the query vector used for
* similarity scoring and no metadata should be extracted so we continue
* to the next field
*/
continue
}
/*
* The query field vector is populated using the termIndex found for
* the term and a unit value with the appropriate boost applied.
* Using upsert because there could already be an entry in the vector
* for the term we are working with. In that case we just add the scores
* together.
*/
queryVectors[field].upsert(termIndex, clause.boost, function (a, b) { return a + b })
/**
* If we've already seen this term, field combo then we've already collected
* the matching documents and metadata, no need to go through all that again
*/
if (termFieldCache[termField]) {
continue
}
for (var l = 0; l < matchingDocumentRefs.length; l++) {
/*
* All metadata for this term/field/document triple
* are then extracted and collected into an instance
* of lunr.MatchData ready to be returned in the query
* results
*/
var matchingDocumentRef = matchingDocumentRefs[l],
matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field),
metadata = fieldPosting[matchingDocumentRef],
fieldMatch
if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) {
matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata)
} else {
fieldMatch.add(expandedTerm, field, metadata)
}
}
termFieldCache[termField] = true
}
}
}
/**
* If the presence was required we need to update the requiredMatches field sets.
* We do this after all fields for the term have collected their matches because
* the clause terms presence is required in _any_ of the fields not _all_ of the
* fields.
*/
if (clause.presence === lunr.Query.presence.REQUIRED) {
for (var k = 0; k < clause.fields.length; k++) {
var field = clause.fields[k]
requiredMatches[field] = requiredMatches[field].intersect(clauseMatches)
}
}
}
/**
* Need to combine the field scoped required and prohibited
* matching documents into a global set of required and prohibited
* matches
*/
var allRequiredMatches = lunr.Set.complete,
allProhibitedMatches = lunr.Set.empty
for (var i = 0; i < this.fields.length; i++) {
var field = this.fields[i]
if (requiredMatches[field]) {
allRequiredMatches = allRequiredMatches.intersect(requiredMatches[field])
}
if (prohibitedMatches[field]) {
allProhibitedMatches = allProhibitedMatches.union(prohibitedMatches[field])
}
}
var matchingFieldRefs = Object.keys(matchingFields),
results = [],
matches = Object.create(null)
/*
* If the query is negated (contains only prohibited terms)
* we need to get _all_ fieldRefs currently existing in the
* index. This is only done when we know that the query is
* entirely prohibited terms to avoid any cost of getting all
* fieldRefs unnecessarily.
*
* Additionally, blank MatchData must be created to correctly
* populate the results.
*/
if (query.isNegated()) {
matchingFieldRefs = Object.keys(this.fieldVectors)
for (var i = 0; i < matchingFieldRefs.length; i++) {
var matchingFieldRef = matchingFieldRefs[i]
var fieldRef = lunr.FieldRef.fromString(matchingFieldRef)
matchingFields[matchingFieldRef] = new lunr.MatchData
}
}
for (var i = 0; i < matchingFieldRefs.length; i++) {
/*
* Currently we have document fields that match the query, but we
* need to return documents. The matchData and scores are combined
* from multiple fields belonging to the same document.
*
* Scores are calculated by field, using the query vectors created
* above, and combined into a final document score using addition.
*/
var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]),
docRef = fieldRef.docRef
if (!allRequiredMatches.contains(docRef)) {
continue
}
if (allProhibitedMatches.contains(docRef)) {
continue
}
var fieldVector = this.fieldVectors[fieldRef],
score = queryVectors[fieldRef.fieldName].similarity(fieldVector),
docMatch
if ((docMatch = matches[docRef]) !== undefined) {
docMatch.score += score
docMatch.matchData.combine(matchingFields[fieldRef])
} else {
var match = {
ref: docRef,
score: score,
matchData: matchingFields[fieldRef]
}
matches[docRef] = match
results.push(match)
}
}
/*
* Sort the results objects by score, highest first.
*/
return results.sort(function (a, b) {
return b.score - a.score
})
}