in causalml/inference/meta/xlearner.py [0:0]
def estimate_ate(self, X, treatment, y, p=None, bootstrap_ci=False, n_bootstraps=1000, bootstrap_size=10000):
"""Estimate the Average Treatment Effect (ATE).
Args:
X (np.matrix or np.array or pd.Dataframe): a feature matrix
treatment (np.array or pd.Series): a treatment vector
y (np.array or pd.Series): an outcome vector
p (np.ndarray or pd.Series or dict, optional): an array of propensity scores of float (0,1) in the
single-treatment case; or, a dictionary of treatment groups that map to propensity vectors of
float (0,1); if None will run ElasticNetPropensityModel() to generate the propensity scores.
bootstrap_ci (bool): whether run bootstrap for confidence intervals
n_bootstraps (int): number of bootstrap iterations
bootstrap_size (int): number of samples per bootstrap
Returns:
The mean and confidence interval (LB, UB) of the ATE estimate.
"""
te, dhat_cs, dhat_ts = self.fit_predict(X, treatment, y, p, return_components=True)
X, treatment, y = convert_pd_to_np(X, treatment, y)
if p is None:
p = self.propensity
else:
check_p_conditions(p, self.t_groups)
if isinstance(p, np.ndarray):
treatment_name = self.t_groups[0]
p = {treatment_name: convert_pd_to_np(p)}
elif isinstance(p, dict):
p = {treatment_name: convert_pd_to_np(_p) for treatment_name, _p in p.items()}
ate = np.zeros(self.t_groups.shape[0])
ate_lb = np.zeros(self.t_groups.shape[0])
ate_ub = np.zeros(self.t_groups.shape[0])
for i, group in enumerate(self.t_groups):
_ate = te[:, i].mean()
mask = (treatment == group) | (treatment == self.control_name)
treatment_filt = treatment[mask]
w = (treatment_filt == group).astype(int)
prob_treatment = float(sum(w)) / w.shape[0]
dhat_c = dhat_cs[group][mask]
dhat_t = dhat_ts[group][mask]
p_filt = p[group][mask]
# SE formula is based on the lower bound formula (7) from Imbens, Guido W., and Jeffrey M. Wooldridge. 2009.
# "Recent Developments in the Econometrics of Program Evaluation." Journal of Economic Literature
se = np.sqrt((
self.vars_t[group] / prob_treatment + self.vars_c[group] / (1 - prob_treatment) +
(p_filt * dhat_c + (1 - p_filt) * dhat_t).var()
) / w.shape[0])
_ate_lb = _ate - se * norm.ppf(1 - self.ate_alpha / 2)
_ate_ub = _ate + se * norm.ppf(1 - self.ate_alpha / 2)
ate[i] = _ate
ate_lb[i] = _ate_lb
ate_ub[i] = _ate_ub
if not bootstrap_ci:
return ate, ate_lb, ate_ub
else:
t_groups_global = self.t_groups
_classes_global = self._classes
models_mu_c_global = deepcopy(self.models_mu_c)
models_mu_t_global = deepcopy(self.models_mu_t)
models_tau_c_global = deepcopy(self.models_tau_c)
models_tau_t_global = deepcopy(self.models_tau_t)
logger.info('Bootstrap Confidence Intervals for ATE')
ate_bootstraps = np.zeros(shape=(self.t_groups.shape[0], n_bootstraps))
for n in tqdm(range(n_bootstraps)):
cate_b = self.bootstrap(X, treatment, y, p, size=bootstrap_size)
ate_bootstraps[:, n] = cate_b.mean()
ate_lower = np.percentile(ate_bootstraps, (self.ate_alpha / 2) * 100, axis=1)
ate_upper = np.percentile(ate_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=1)
# set member variables back to global (currently last bootstrapped outcome)
self.t_groups = t_groups_global
self._classes = _classes_global
self.models_mu_c = deepcopy(models_mu_c_global)
self.models_mu_t = deepcopy(models_mu_t_global)
self.models_tau_c = deepcopy(models_tau_c_global)
self.models_tau_t = deepcopy(models_tau_t_global)
return ate, ate_lower, ate_upper