in src/utils/smartQueries.ts [15:223]
export function smartQueries(tokens: string[], zhDictionary: string[]): SmartQuery[] {
const terms = smartTerms(tokens, zhDictionary);
if (terms.length === 0) {
// There are no matched terms.
// All tokens are considered required and with wildcard.
return [
{
tokens,
term: tokens.map(value => ({
value,
presence: lunr.Query.presence.REQUIRED,
wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING,
})),
},
];
}
// The last token of a term maybe incomplete while user is typing.
for (const term of terms) {
term[term.length - 1].maybeTyping = true;
}
lunr.generateStopWordFilter = function (stopWords) {
var words = stopWords.reduce(function (memo, stopWord) {
memo[stopWord] = stopWord;
return memo;
}, {});
return function (token) {
if (token && words[token.toString()] !== token.toString()) return token;
};
};
lunr.stopWordFilter = lunr.generateStopWordFilter([
'a',
'able',
'about',
'across',
'after',
'all',
'almost',
'also',
'am',
'among',
'an',
'and',
'any',
'are',
'as',
'at',
'be',
'because',
'been',
'but',
'by',
'can',
'cannot',
'could',
'dear',
'did',
'do',
'does',
'either',
'else',
'ever',
'every',
'for',
'from',
'get',
'got',
'had',
'has',
'have',
'he',
'her',
'hers',
'him',
'his',
'how',
'however',
'i',
'if',
'in',
'into',
'is',
'it',
'its',
'just',
'least',
'let',
'like',
'likely',
'may',
'me',
'might',
'most',
'must',
'my',
'neither',
'no',
'nor',
'not',
'of',
'off',
'often',
'on',
'only',
'or',
'other',
'our',
'own',
'rather',
'said',
'say',
'says',
'she',
'should',
'since',
'so',
'some',
'than',
'that',
'the',
'their',
'them',
'then',
'there',
'these',
'they',
'this',
'tis',
'to',
'too',
'twas',
'us',
'wants',
'was',
'we',
'were',
'what',
'when',
'where',
'which',
'while',
'who',
'whom',
'why',
'will',
'with',
'would',
'yet',
'you',
'your',
]);
lunrLanguageZh(lunr);
// Try to append terms without stop words,
// since they are removed in the index.
const stopWordPipelines: lunr.PipelineFunction[] = [];
for (const lang of language) {
if (lang === 'en') {
if (!removeDefaultStopWordFilter) {
stopWordPipelines.unshift(lunr.stopWordFilter);
}
} else {
const lunrLang = (lunr as any)[lang] as typeof lunr;
if (lunrLang.stopWordFilter) {
stopWordPipelines.unshift(lunrLang.stopWordFilter);
}
}
}
let refinedTerms: SmartTerm[];
if (stopWordPipelines.length > 0) {
const pipe = (term: SmartTerm) =>
stopWordPipelines.reduce(
(term, p) => term.filter(item => (p as unknown as (str: string) => string | undefined)(item.value)),
term,
);
refinedTerms = [];
const newTerms: SmartTerm[] = [];
for (const term of terms) {
const filteredTerm = pipe(term);
refinedTerms.push(filteredTerm);
// Add extra terms only if some stop words are removed,
// and some non-stop-words exist too.
if (filteredTerm.length < term.length && filteredTerm.length > 0) {
newTerms.push(filteredTerm);
}
}
terms.push(...newTerms);
} else {
refinedTerms = terms.slice();
}
// Also try to add extra terms which miss one of the searched tokens,
// when the term contains 3 or more tokens,
// to improve the search precision.
const extraTerms: SmartTerm[] = [];
for (const term of refinedTerms) {
if (term.length > 2) {
for (let i = term.length - 1; i >= 0; i -= 1) {
extraTerms.push(term.slice(0, i).concat(term.slice(i + 1)));
}
}
}
return getQueriesMaybeTyping(terms).concat(getQueriesMaybeTyping(extraTerms));
}