lunr.Index.prototype.query = function()

in static/api/python/3.3.x/lunr.js [1960:2242]
134 lines of code
28 McCabe index (conditional complexity)

    lunr.Index.prototype.query = function (fn) {
      // for each query clause
      // * process terms
      // * expand terms from token set
      // * find matching documents and metadata
      // * get document vectors
      // * score documents
    
      var query = new lunr.Query(this.fields),
          matchingFields = Object.create(null),
          queryVectors = Object.create(null),
          termFieldCache = Object.create(null),
          requiredMatches = Object.create(null),
          prohibitedMatches = Object.create(null)
    
      /*
       * To support field level boosts a query vector is created per
       * field. An empty vector is eagerly created to support negated
       * queries.
       */
      for (var i = 0; i < this.fields.length; i++) {
        queryVectors[this.fields[i]] = new lunr.Vector
      }
    
      fn.call(query, query)
    
      for (var i = 0; i < query.clauses.length; i++) {
        /*
         * Unless the pipeline has been disabled for this term, which is
         * the case for terms with wildcards, we need to pass the clause
         * term through the search pipeline. A pipeline returns an array
         * of processed terms. Pipeline functions may expand the passed
         * term, which means we may end up performing multiple index lookups
         * for a single query term.
         */
        var clause = query.clauses[i],
            terms = null,
            clauseMatches = lunr.Set.empty
    
        if (clause.usePipeline) {
          terms = this.pipeline.runString(clause.term, {
            fields: clause.fields
          })
        } else {
          terms = [clause.term]
        }
    
        for (var m = 0; m < terms.length; m++) {
          var term = terms[m]
    
          /*
           * Each term returned from the pipeline needs to use the same query
           * clause object, e.g. the same boost and or edit distance. The
           * simplest way to do this is to re-use the clause object but mutate
           * its term property.
           */
          clause.term = term
    
          /*
           * From the term in the clause we create a token set which will then
           * be used to intersect the indexes token set to get a list of terms
           * to lookup in the inverted index
           */
          var termTokenSet = lunr.TokenSet.fromClause(clause),
              expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()
    
          /*
           * If a term marked as required does not exist in the tokenSet it is
           * impossible for the search to return any matches. We set all the field
           * scoped required matches set to empty and stop examining any further
           * clauses.
           */
          if (expandedTerms.length === 0 && clause.presence === lunr.Query.presence.REQUIRED) {
            for (var k = 0; k < clause.fields.length; k++) {
              var field = clause.fields[k]
              requiredMatches[field] = lunr.Set.empty
            }
    
            break
          }
    
          for (var j = 0; j < expandedTerms.length; j++) {
            /*
             * For each term get the posting and termIndex, this is required for
             * building the query vector.
             */
            var expandedTerm = expandedTerms[j],
                posting = this.invertedIndex[expandedTerm],
                termIndex = posting._index
    
            for (var k = 0; k < clause.fields.length; k++) {
              /*
               * For each field that this query term is scoped by (by default
               * all fields are in scope) we need to get all the document refs
               * that have this term in that field.
               *
               * The posting is the entry in the invertedIndex for the matching
               * term from above.
               */
              var field = clause.fields[k],
                  fieldPosting = posting[field],
                  matchingDocumentRefs = Object.keys(fieldPosting),
                  termField = expandedTerm + "/" + field,
                  matchingDocumentsSet = new lunr.Set(matchingDocumentRefs)
    
              /*
               * if the presence of this term is required ensure that the matching
               * documents are added to the set of required matches for this clause.
               *
               */
              if (clause.presence == lunr.Query.presence.REQUIRED) {
                clauseMatches = clauseMatches.union(matchingDocumentsSet)
    
                if (requiredMatches[field] === undefined) {
                  requiredMatches[field] = lunr.Set.complete
                }
              }
    
              /*
               * if the presence of this term is prohibited ensure that the matching
               * documents are added to the set of prohibited matches for this field,
               * creating that set if it does not yet exist.
               */
              if (clause.presence == lunr.Query.presence.PROHIBITED) {
                if (prohibitedMatches[field] === undefined) {
                  prohibitedMatches[field] = lunr.Set.empty
                }
    
                prohibitedMatches[field] = prohibitedMatches[field].union(matchingDocumentsSet)
    
                /*
                 * Prohibited matches should not be part of the query vector used for
                 * similarity scoring and no metadata should be extracted so we continue
                 * to the next field
                 */
                continue
              }
    
              /*
               * The query field vector is populated using the termIndex found for
               * the term and a unit value with the appropriate boost applied.
               * Using upsert because there could already be an entry in the vector
               * for the term we are working with. In that case we just add the scores
               * together.
               */
              queryVectors[field].upsert(termIndex, clause.boost, function (a, b) { return a + b })
    
              /**
               * If we've already seen this term, field combo then we've already collected
               * the matching documents and metadata, no need to go through all that again
               */
              if (termFieldCache[termField]) {
                continue
              }
    
              for (var l = 0; l < matchingDocumentRefs.length; l++) {
                /*
                 * All metadata for this term/field/document triple
                 * are then extracted and collected into an instance
                 * of lunr.MatchData ready to be returned in the query
                 * results
                 */
                var matchingDocumentRef = matchingDocumentRefs[l],
                    matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field),
                    metadata = fieldPosting[matchingDocumentRef],
                    fieldMatch
    
                if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) {
                  matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata)
                } else {
                  fieldMatch.add(expandedTerm, field, metadata)
                }
    
              }
    
              termFieldCache[termField] = true
            }
          }
        }
    
        /**
         * If the presence was required we need to update the requiredMatches field sets.
         * We do this after all fields for the term have collected their matches because
         * the clause terms presence is required in _any_ of the fields not _all_ of the
         * fields.
         */
        if (clause.presence === lunr.Query.presence.REQUIRED) {
          for (var k = 0; k < clause.fields.length; k++) {
            var field = clause.fields[k]
            requiredMatches[field] = requiredMatches[field].intersect(clauseMatches)
          }
        }
      }
    
      /**
       * Need to combine the field scoped required and prohibited
       * matching documents into a global set of required and prohibited
       * matches
       */
      var allRequiredMatches = lunr.Set.complete,
          allProhibitedMatches = lunr.Set.empty
    
      for (var i = 0; i < this.fields.length; i++) {
        var field = this.fields[i]
    
        if (requiredMatches[field]) {
          allRequiredMatches = allRequiredMatches.intersect(requiredMatches[field])
        }
    
        if (prohibitedMatches[field]) {
          allProhibitedMatches = allProhibitedMatches.union(prohibitedMatches[field])
        }
      }
    
      var matchingFieldRefs = Object.keys(matchingFields),
          results = [],
          matches = Object.create(null)
    
      /*
       * If the query is negated (contains only prohibited terms)
       * we need to get _all_ fieldRefs currently existing in the
       * index. This is only done when we know that the query is
       * entirely prohibited terms to avoid any cost of getting all
       * fieldRefs unnecessarily.
       *
       * Additionally, blank MatchData must be created to correctly
       * populate the results.
       */
      if (query.isNegated()) {
        matchingFieldRefs = Object.keys(this.fieldVectors)
    
        for (var i = 0; i < matchingFieldRefs.length; i++) {
          var matchingFieldRef = matchingFieldRefs[i]
          var fieldRef = lunr.FieldRef.fromString(matchingFieldRef)
          matchingFields[matchingFieldRef] = new lunr.MatchData
        }
      }
    
      for (var i = 0; i < matchingFieldRefs.length; i++) {
        /*
         * Currently we have document fields that match the query, but we
         * need to return documents. The matchData and scores are combined
         * from multiple fields belonging to the same document.
         *
         * Scores are calculated by field, using the query vectors created
         * above, and combined into a final document score using addition.
         */
        var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]),
            docRef = fieldRef.docRef
    
        if (!allRequiredMatches.contains(docRef)) {
          continue
        }
    
        if (allProhibitedMatches.contains(docRef)) {
          continue
        }
    
        var fieldVector = this.fieldVectors[fieldRef],
            score = queryVectors[fieldRef.fieldName].similarity(fieldVector),
            docMatch
    
        if ((docMatch = matches[docRef]) !== undefined) {
          docMatch.score += score
          docMatch.matchData.combine(matchingFields[fieldRef])
        } else {
          var match = {
            ref: docRef,
            score: score,
            matchData: matchingFields[fieldRef]
          }
          matches[docRef] = match
          results.push(match)
        }
      }
    
      /*
       * Sort the results objects by score, highest first.
       */
      return results.sort(function (a, b) {
        return b.score - a.score
      })
    }