in expanded_checklist/checklist/perturb.py [0:0]
def change_names(doc, meta=False, n=10, first_only=False, last_only=False, seed=None):
"""Replace names with other names
Parameters
----------
doc : spacy.token.Doc
input
meta : bool
if True, will return list of (orig_name, new_name) as meta
n : int
number of names to replace original names with
first_only : bool
if True, will only replace first names
last_only : bool
if True, will only replace last names
seed : int
random seed
Returns
-------
list(str)
if meta=True, returns (list(str), list(tuple))
Strings with names replaced.
"""
if seed is not None:
np.random.seed(seed)
ents = [x.text for x in doc.ents if np.all([a.ent_type_ == 'PERSON' for a in x])]
ret = []
ret_m = []
for x in ents:
f = x.split()[0]
sex = None
if f.capitalize() in Perturb.data['name_set']['women']:
sex = 'women'
if f.capitalize() in Perturb.data['name_set']['men']:
sex = 'men'
if not sex:
continue
if len(x.split()) > 1:
l = x.split()[1]
if len(l) > 2 and l.capitalize() not in Perturb.data['name_set']['last']:
continue
else:
if last_only:
return None
names = Perturb.data['name'][sex][:90+n]
to_use = np.random.choice(names, n)
if not first_only:
f = x
if len(x.split()) > 1:
last = Perturb.data['name']['last'][:90+n]
last = np.random.choice(last, n)
to_use = ['%s %s' % (x, y) for x, y in zip(names, last)]
if last_only:
to_use = last
f = x.split()[1]
for y in to_use:
ret.append(re.sub(r'\b%s\b' % re.escape(f), y, doc.text))
ret_m.append((f, y))
return process_ret(ret, ret_m=ret_m, n=n, meta=meta)