in augmentation/metrics.py [0:0]
def jaccard_ngrams(text_1: str, text_2: str, n: int = 1, stem: bool = False):
def identity(words):
return words
def stem_words(words):
return [STEMMER.stem(w) for w in words]
stemming_fn = stem_words if stem else identity
text_1_ngrams = set(
ngrams(stemming_fn(word_tokenize(text_1)), n)
)
text_2_ngrams = set(
ngrams(stemming_fn(word_tokenize(text_2)), n)
)
return jaccard_score(text_1_ngrams, text_2_ngrams)