def jaccard_ngrams()

in augmentation/metrics.py [0:0]


def jaccard_ngrams(text_1: str, text_2: str, n: int = 1, stem: bool = False):
    
    def identity(words):
        return words

    def stem_words(words):
        return [STEMMER.stem(w) for w in words]

    stemming_fn = stem_words if stem else identity
    text_1_ngrams = set(
        ngrams(stemming_fn(word_tokenize(text_1)), n)
    )
    text_2_ngrams = set(
        ngrams(stemming_fn(word_tokenize(text_2)), n)
    )
    return jaccard_score(text_1_ngrams, text_2_ngrams)