in causalml/dataset/regression.py [0:0]
def synthetic_data(mode=1, n=1000, p=5, sigma=1.0, adj=0.):
''' Synthetic data in Nie X. and Wager S. (2018) 'Quasi-Oracle Estimation of Heterogeneous Treatment Effects'
Args:
mode (int, optional): mode of the simulation: \
1 for difficult nuisance components and an easy treatment effect. \
2 for a randomized trial. \
3 for an easy propensity and a difficult baseline. \
4 for unrelated treatment and control groups. \
5 for a hidden confounder biasing treatment.
n (int, optional): number of observations
p (int optional): number of covariates (>=5)
sigma (float): standard deviation of the error term
adj (float): adjustment term for the distribution of propensity, e. Higher values shift the distribution to 0.
It does not apply to mode == 2 or 3.
Returns:
(tuple): Synthetically generated samples with the following outputs:
- y ((n,)-array): outcome variable.
- X ((n,p)-ndarray): independent variables.
- w ((n,)-array): treatment flag with value 0 or 1.
- tau ((n,)-array): individual treatment effect.
- b ((n,)-array): expected outcome.
- e ((n,)-array): propensity of receiving treatment.
'''
catalog = {1: simulate_nuisance_and_easy_treatment,
2: simulate_randomized_trial,
3: simulate_easy_propensity_difficult_baseline,
4: simulate_unrelated_treatment_control,
5: simulate_hidden_confounder}
assert mode in catalog, 'Invalid mode {}. Should be one of {}'.format(mode, set(catalog))
return catalog[mode](n, p, sigma, adj)