def perturb()

in expanded_checklist/checklist/perturb.py [0:0]


    def perturb(data, perturb_fn, keep_original=True, nsamples=None, *args, **kwargs):
        """Perturbs data according to some function

        Parameters
        ----------
        data : list
            List of examples, could be strings, tuples, dicts, spacy docs, whatever
        perturb_fn : function
            Arguments: (example, *args, **kwargs)
            Returns: list of examples, or (examples, meta) if meta=True in **kwargs.
            Can also return None if perturbation does not apply, and it will be ignored.
        keep_original : bool
            if True, include original example (from data) in output
        nsamples : int
            number of examples in data to perturb
        meta : bool
            if True, perturb_fn returns (examples, meta), and meta is added to ret.meta

        Returns
        -------
        MunchWithAdd
            will have .data and .meta (if meta=True in **kwargs)

        """
        ret = MunchWithAdd()
        use_meta = kwargs.get('meta', False)

        labels = kwargs.get('labels', None)
        labels_for_kept_data = []
        if labels:
            del kwargs['labels']

        ret_data = []
        meta = []
        order = list(range(len(data)))
        samples = 0
        if nsamples:
            np.random.shuffle(order)
        for i in order:
            d = data[i]
            t = []
            add = []
            if keep_original:
                org = recursive_apply(d, str)
                t.append(org)
                add.append(None)
            p = perturb_fn(d, *args, **kwargs)
            a = []
            x = []
            if not p or all([not x for x in p]):
                continue
            if use_meta:
                p, a = p
            if type(p) in [np.array, list]:
                t.extend(p)
                add.extend(a)
            else:
                t.append(p)
                add.append(a)
            ret_data.append(t)
            meta.append(add)
            if labels:
                labels_for_kept_data.append(labels[i])
            samples += 1
            if nsamples and samples == nsamples:
                break
        ret.data = ret_data
        if use_meta:
            ret.meta = meta
        
        if labels:
            return ret, labels_for_kept_data
        else:
            return ret