def change_names()

in expanded_checklist/checklist/perturb.py [0:0]


    def change_names(doc, meta=False, n=10, first_only=False, last_only=False, seed=None):
        """Replace names with other names

        Parameters
        ----------
        doc : spacy.token.Doc
            input
        meta : bool
            if True, will return list of (orig_name, new_name) as meta
        n : int
            number of names to replace original names with
        first_only : bool
            if True, will only replace first names
        last_only : bool
            if True, will only replace last names
        seed : int
            random seed

        Returns
        -------
        list(str)
            if meta=True, returns (list(str), list(tuple))
            Strings with names replaced.

        """
        if seed is not None:
            np.random.seed(seed)
        ents = [x.text for x in doc.ents if np.all([a.ent_type_ == 'PERSON' for a in x])]
        ret = []
        ret_m = []
        for x in ents:
            f = x.split()[0]
            sex = None
            if f.capitalize() in Perturb.data['name_set']['women']:
                sex = 'women'
            if f.capitalize() in Perturb.data['name_set']['men']:
                sex = 'men'
            if not sex:
                continue
            if len(x.split()) > 1:
                l = x.split()[1]
                if len(l) > 2 and l.capitalize() not in Perturb.data['name_set']['last']:
                    continue
            else:
                if last_only:
                    return None
            names = Perturb.data['name'][sex][:90+n]
            to_use = np.random.choice(names, n)
            if not first_only:
                f = x
                if len(x.split()) > 1:
                    last = Perturb.data['name']['last'][:90+n]
                    last = np.random.choice(last, n)
                    to_use = ['%s %s' % (x, y) for x, y in zip(names, last)]
                    if last_only:
                        to_use = last
                        f = x.split()[1]
            for y in to_use:
                ret.append(re.sub(r'\b%s\b' % re.escape(f), y, doc.text))
                ret_m.append((f, y))
        return process_ret(ret, ret_m=ret_m, n=n, meta=meta)