def primary_should

def primary_should_rules()

in src/olympia/search/filters.py [0:0]
84 lines of code
1 McCabe index (conditional complexity)

    def primary_should_rules(self, search_query, lang):
        """Return "primary" should rules for the query.

        These are the ones using the strongest boosts and are only applied to
        the add-on name.

        Applied rules:

        * Exact match on the name, using the right translation if possible
          (boost=100.0)
        * Then text matches, using a language specific analyzer if possible
          (boost=5.0)
        * Phrase matches that allows swapped terms (boost=8.0)
        * Then text matches, using the standard text analyzer (boost=6.0)
        * Then look for the query as a prefix of a name (boost=3.0)
        """
        should = [self.generate_exact_name_match_query(search_query, lang)]

        # If we are searching with a language that we support, we also try to
        # do a match against the translated field. If not, we'll do a match
        # against the name in default locale below.
        analyzer = self.get_locale_analyzer(lang)
        if analyzer:
            # Like in generate_exact_name_match_query() above, we want to
            # search in all languages supported by this analyzer.
            fields = [
                'name_l10n_%s' % lang for lang in amo.SEARCH_ANALYZER_MAP[analyzer]
            ]
            should.append(
                query.MultiMatch(
                    **{
                        '_name': 'MultiMatch(%s)' % ','.join(fields),
                        'fields': fields,
                        'query': search_query,
                        'boost': 5.0,
                        'analyzer': analyzer,
                        'operator': 'and',
                    }
                )
            )

        # The rest of the rules are applied to 'name', the field containing the
        # default locale translation only. That field has word delimiter rules
        # to help find matches, lowercase filter, etc, at the expense of any
        # language-specific features.
        if ' ' in search_query:
            should.append(
                query.MatchPhrase(
                    **{
                        'name': {
                            '_name': 'MatchPhrase(name)',
                            'query': search_query,
                            'boost': 8.0,
                            'slop': 1,
                        },
                    }
                )
            )
        should.extend(
            [
                query.Match(
                    **{
                        'name': {
                            '_name': 'Match(name)',
                            'analyzer': 'standard',
                            'query': search_query,
                            'boost': 6.0,
                            'operator': 'and',
                        },
                    }
                ),
                query.Prefix(
                    **{
                        'name': {
                            '_name': 'Prefix(name)',
                            'value': search_query,
                            'boost': 3.0,
                        },
                    }
                ),
            ]
        )

        # Add two queries inside a single DisMax rule (avoiding overboosting
        # when an add-on name matches both queries) to support partial & fuzzy
        # matches (both allowing some words in the query to be absent).
        # For short query strings only (long strings, depending on what
        # characters they contain and how many words are present, can be too
        # costly).
        # Again applied to 'name' in the default locale, without the
        # language-specific analysis.
        if len(search_query) < self.MAX_QUERY_LENGTH_FOR_FUZZY_SEARCH:
            should.append(
                query.DisMax(
                    # We only care if one of these matches, so we leave tie_breaker
                    # to the default value of 0.0.
                    _name='DisMax(FuzzyMatch(name), Match(name.trigrams))',
                    boost=4.0,
                    queries=[
                        # For the fuzzy query, only slight mispellings should be
                        # corrected, but we allow some of the words to be absent
                        # as well:
                        # 1 or 2 terms: should all be present
                        # 3 terms: 2 should be present
                        # 4 terms or more: 25% can be absent
                        {
                            'match': {
                                'name': {
                                    'query': search_query,
                                    'prefix_length': 2,
                                    'fuzziness': 'AUTO',
                                    'minimum_should_match': '2<2 3<-25%',
                                }
                            }
                        },
                        # For the trigrams query, we require at least 67% of
                        # the trigrams to be present. minimum_should_match is
                        # rounded down to the lowest integer, so picking 66%
                        # would mean 3 trigrams in the query would only require
                        # 1 matching trigram in the name: int(3*66/100) = 1.
                        # 67% ensures 2 need to match.
                        {
                            'match': {
                                'name.trigrams': {
                                    'query': search_query,
                                    'minimum_should_match': '67%',
                                }
                            }
                        },
                    ],
                )
            )

        return should