def generate_data()

in de/synthetic.py [0:0]


    def generate_data(self, dtype, num_samples):
        if dtype in ("int", int):
            return np.random.randint(0, 1_000_000, size=num_samples).tolist()
        elif dtype in ("float", float):
            return np.random.uniform(0, 1_000_000, size=num_samples).round(3).tolist()
        elif dtype in ("str", str):
            num_chars = np.random.randint(10, 200, size=num_samples)
            return [self.fake.text(max_nb_chars=n_chars) for n_chars in num_chars]
        elif dtype in ("largestr",):
            num_chars = np.random.randint(100, 1000, size=num_samples)
            return [self.fake.text(max_nb_chars=n_chars) for n_chars in num_chars]
        elif dtype == ("bool", bool):
            return np.random.choice([True, False], size=num_samples).tolist()
        elif isinstance(dtype, dict):
            columns = [
                self.generate_data(field_type, num_samples)
                for field_type in dtype.values()
            ]
            return [dict(zip(dtype.keys(), row)) for row in zip(*columns)]
        elif isinstance(dtype, list) and dtype:
            lengths = np.random.randint(0, 5, size=num_samples)
            values = self.generate_data(dtype[0], lengths.sum())
            return [
                values[i : i + length]
                for i, length in zip(np.cumsum([0] + lengths), lengths)
            ]
        else:
            raise ValueError("Unsupported data type: {}".format(dtype))