def synthetic_data()

in causalml/dataset/regression.py [0:0]


def synthetic_data(mode=1, n=1000, p=5, sigma=1.0, adj=0.):
    ''' Synthetic data in Nie X. and Wager S. (2018) 'Quasi-Oracle Estimation of Heterogeneous Treatment Effects'

    Args:
        mode (int, optional): mode of the simulation: \
            1 for difficult nuisance components and an easy treatment effect. \
            2 for a randomized trial. \
            3 for an easy propensity and a difficult baseline. \
            4 for unrelated treatment and control groups. \
            5 for a hidden confounder biasing treatment.
        n (int, optional): number of observations
        p (int optional): number of covariates (>=5)
        sigma (float): standard deviation of the error term
        adj (float): adjustment term for the distribution of propensity, e. Higher values shift the distribution to 0.
                     It does not apply to mode == 2 or 3.

    Returns:
        (tuple): Synthetically generated samples with the following outputs:

            - y ((n,)-array): outcome variable.
            - X ((n,p)-ndarray): independent variables.
            - w ((n,)-array): treatment flag with value 0 or 1.
            - tau ((n,)-array): individual treatment effect.
            - b ((n,)-array): expected outcome.
            - e ((n,)-array): propensity of receiving treatment.
    '''

    catalog = {1: simulate_nuisance_and_easy_treatment,
               2: simulate_randomized_trial,
               3: simulate_easy_propensity_difficult_baseline,
               4: simulate_unrelated_treatment_control,
               5: simulate_hidden_confounder}

    assert mode in catalog, 'Invalid mode {}. Should be one of {}'.format(mode, set(catalog))
    return catalog[mode](n, p, sigma, adj)