in src/olympia/search/filters.py [0:0]
def primary_should_rules(self, search_query, lang):
"""Return "primary" should rules for the query.
These are the ones using the strongest boosts and are only applied to
the add-on name.
Applied rules:
* Exact match on the name, using the right translation if possible
(boost=100.0)
* Then text matches, using a language specific analyzer if possible
(boost=5.0)
* Phrase matches that allows swapped terms (boost=8.0)
* Then text matches, using the standard text analyzer (boost=6.0)
* Then look for the query as a prefix of a name (boost=3.0)
"""
should = [self.generate_exact_name_match_query(search_query, lang)]
# If we are searching with a language that we support, we also try to
# do a match against the translated field. If not, we'll do a match
# against the name in default locale below.
analyzer = self.get_locale_analyzer(lang)
if analyzer:
# Like in generate_exact_name_match_query() above, we want to
# search in all languages supported by this analyzer.
fields = [
'name_l10n_%s' % lang for lang in amo.SEARCH_ANALYZER_MAP[analyzer]
]
should.append(
query.MultiMatch(
**{
'_name': 'MultiMatch(%s)' % ','.join(fields),
'fields': fields,
'query': search_query,
'boost': 5.0,
'analyzer': analyzer,
'operator': 'and',
}
)
)
# The rest of the rules are applied to 'name', the field containing the
# default locale translation only. That field has word delimiter rules
# to help find matches, lowercase filter, etc, at the expense of any
# language-specific features.
if ' ' in search_query:
should.append(
query.MatchPhrase(
**{
'name': {
'_name': 'MatchPhrase(name)',
'query': search_query,
'boost': 8.0,
'slop': 1,
},
}
)
)
should.extend(
[
query.Match(
**{
'name': {
'_name': 'Match(name)',
'analyzer': 'standard',
'query': search_query,
'boost': 6.0,
'operator': 'and',
},
}
),
query.Prefix(
**{
'name': {
'_name': 'Prefix(name)',
'value': search_query,
'boost': 3.0,
},
}
),
]
)
# Add two queries inside a single DisMax rule (avoiding overboosting
# when an add-on name matches both queries) to support partial & fuzzy
# matches (both allowing some words in the query to be absent).
# For short query strings only (long strings, depending on what
# characters they contain and how many words are present, can be too
# costly).
# Again applied to 'name' in the default locale, without the
# language-specific analysis.
if len(search_query) < self.MAX_QUERY_LENGTH_FOR_FUZZY_SEARCH:
should.append(
query.DisMax(
# We only care if one of these matches, so we leave tie_breaker
# to the default value of 0.0.
_name='DisMax(FuzzyMatch(name), Match(name.trigrams))',
boost=4.0,
queries=[
# For the fuzzy query, only slight mispellings should be
# corrected, but we allow some of the words to be absent
# as well:
# 1 or 2 terms: should all be present
# 3 terms: 2 should be present
# 4 terms or more: 25% can be absent
{
'match': {
'name': {
'query': search_query,
'prefix_length': 2,
'fuzziness': 'AUTO',
'minimum_should_match': '2<2 3<-25%',
}
}
},
# For the trigrams query, we require at least 67% of
# the trigrams to be present. minimum_should_match is
# rounded down to the lowest integer, so picking 66%
# would mean 3 trigrams in the query would only require
# 1 matching trigram in the name: int(3*66/100) = 1.
# 67% ensures 2 need to match.
{
'match': {
'name.trigrams': {
'query': search_query,
'minimum_should_match': '67%',
}
}
},
],
)
)
return should