def remove_negation()

in expanded_checklist/checklist/perturb.py [0:0]


    def remove_negation(doc):
        """Removes negation from doc.
        This is experimental, may or may not work.

        Parameters
        ----------
        doc : spacy.token.Doc
            input

        Returns
        -------
        string
            With all negations removed

        """
        # This removes all negations in the doc. I should maybe add an option to remove just some.
        notzs = [i for i, z in enumerate(doc) if z.lemma_ == 'not' or z.dep_ == 'neg']
        new = []
        for notz in notzs:
            before = doc[notz - 1] if notz != 0 else None
            after = doc[notz + 1] if len(doc) > notz + 1 else None
            if (after and after.pos_ == 'PUNCT') or (before and before.text in ['or']):
                continue
            new.append(notz)
        notzs = new
        if not notzs:
            return None
        ret = ''
        start = 0
        for i, notz in enumerate(notzs):
            id_start = notz
            to_add = ' '
            id_end = notz + 1
            before = doc[notz - 1] if notz != 0 else None
            after = doc[notz + 1] if len(doc) > notz + 1 else None
            if before and before.lemma_ in ['will', 'can', 'do']:
                id_start = notz - 1
                tense = collections.Counter([x[0] for x in pattern.en.tenses(before.text)]).most_common(1)[0][0]
                p = pattern.en.tenses(before.text)
                params = [tense, 3]
                if p:
                    params = list(p[0])
                    params[0] = tense
                to_add = ' '+ pattern.en.conjugate(before.lemma_, *params) + ' '
            if before and after and before.lemma_ == 'do' and after.pos_ == 'VERB':
                id_start = notz - 1
                tense = collections.Counter([x[0] for x in pattern.en.tenses(before.text)]).most_common(1)[0][0]
                p = pattern.en.tenses(before.text)
                params = [tense, 3]
                if p:
                    params = list(p[0])
                    params[0] = tense
                to_add = ' '+ pattern.en.conjugate(after.text, *params) + ' '
                id_end = notz + 2
            ret += doc[start:id_start].text + to_add
            start = id_end
        ret += doc[id_end:].text
        return ret