def generate()

in causalml/dataset/semiSynthetic.py [0:0]


    def generate(self, K: int = 10, n=None) -> List[List[pd.DataFrame]]:
        if n is None:
            n = len(self.X)
        if all((self.y == 0) | (self.y == 1)):
            binary_y = True
        else:
            binary_y = False
        ctrl_idx = np.where(self.w == 0)[0]
        trt_idx = np.where(self.w == 1)[0]
        ctrl_n = int(n * (len(ctrl_idx) / len(self.X)))
        trt_n = int(n * (len(trt_idx) / len(self.X)))
        ans = []
        for q in range(len(self.dgps)):
            datasets = []
            dgp_q = self.dgps[q]["final_model"]
            data_tau = self.X.copy()
            y0 = dgp_q[0](data_tau)
            y1 = dgp_q[1](data_tau)
            if binary_y:
                y0 = logistic(y0)
                y1 = logistic(y1)
            data_tau["w"] = self.w
            data_tau["tau_i"] = y1 - y0
            data_tau["y_w"] = np.where(self.w == 1, y1, y0)
            resid = self.y - data_tau["y_w"]
            for k in range(K):
                rng = np.random.default_rng(seed=k)
                ctrl_idx_qk = rng.choice(ctrl_idx, size=ctrl_n, replace=True)
                trt_idx_qk = rng.choice(trt_idx, size=trt_n, replace=True)
                idx = np.concatenate([ctrl_idx_qk, trt_idx_qk])
                data_qk = data_tau.iloc[idx].copy()
                if not binary_y:
                    data_qk["y"] = data_qk["y_w"] + rng.choice(
                        resid, size=len(data_qk), replace=True
                    )  # aka observed y
                else:
                    data_qk["y"] = data_qk["y_w"].apply(lambda x: rng.binomial(1, x))
                data_qk = data_qk[["y", "w", "tau_i"] + list(self.X)]
                datasets.append(data_qk)
            ans.append(datasets)
        return ans