in expanded_checklist/checklist/perturb.py [0:0]
def perturb(data, perturb_fn, keep_original=True, nsamples=None, *args, **kwargs):
"""Perturbs data according to some function
Parameters
----------
data : list
List of examples, could be strings, tuples, dicts, spacy docs, whatever
perturb_fn : function
Arguments: (example, *args, **kwargs)
Returns: list of examples, or (examples, meta) if meta=True in **kwargs.
Can also return None if perturbation does not apply, and it will be ignored.
keep_original : bool
if True, include original example (from data) in output
nsamples : int
number of examples in data to perturb
meta : bool
if True, perturb_fn returns (examples, meta), and meta is added to ret.meta
Returns
-------
MunchWithAdd
will have .data and .meta (if meta=True in **kwargs)
"""
ret = MunchWithAdd()
use_meta = kwargs.get('meta', False)
labels = kwargs.get('labels', None)
labels_for_kept_data = []
if labels:
del kwargs['labels']
ret_data = []
meta = []
order = list(range(len(data)))
samples = 0
if nsamples:
np.random.shuffle(order)
for i in order:
d = data[i]
t = []
add = []
if keep_original:
org = recursive_apply(d, str)
t.append(org)
add.append(None)
p = perturb_fn(d, *args, **kwargs)
a = []
x = []
if not p or all([not x for x in p]):
continue
if use_meta:
p, a = p
if type(p) in [np.array, list]:
t.extend(p)
add.extend(a)
else:
t.append(p)
add.append(a)
ret_data.append(t)
meta.append(add)
if labels:
labels_for_kept_data.append(labels[i])
samples += 1
if nsamples and samples == nsamples:
break
ret.data = ret_data
if use_meta:
ret.meta = meta
if labels:
return ret, labels_for_kept_data
else:
return ret