in causalml/inference/meta/rlearner.py [0:0]
def fit(self, X, treatment, y, p=None, verbose=True):
"""Fit the treatment effect and outcome models of the R learner.
Args:
X (np.matrix or np.array or pd.Dataframe): a feature matrix
treatment (np.array or pd.Series): a treatment vector
y (np.array or pd.Series): an outcome vector
p (np.ndarray or pd.Series or dict, optional): an array of propensity scores of float (0,1) in the
single-treatment case; or, a dictionary of treatment groups that map to propensity vectors of
float (0,1); if None will run ElasticNetPropensityModel() to generate the propensity scores.
verbose (bool, optional): whether to output progress logs
"""
X, treatment, y = convert_pd_to_np(X, treatment, y)
check_treatment_vector(treatment, self.control_name)
self.t_groups = np.unique(treatment[treatment != self.control_name])
self.t_groups.sort()
if p is None:
logger.info('Generating propensity score')
p = dict()
p_model = dict()
for group in self.t_groups:
mask = (treatment == group) | (treatment == self.control_name)
treatment_filt = treatment[mask]
X_filt = X[mask]
w_filt = (treatment_filt == group).astype(int)
w = (treatment == group).astype(int)
p[group], p_model[group] = compute_propensity_score(X=X_filt, treatment=w_filt,
X_pred=X, treatment_pred=w)
self.propensity_model = p_model
self.propensity = p
else:
check_p_conditions(p, self.t_groups)
if isinstance(p, (np.ndarray, pd.Series)):
treatment_name = self.t_groups[0]
p = {treatment_name: convert_pd_to_np(p)}
elif isinstance(p, dict):
p = {treatment_name: convert_pd_to_np(_p) for treatment_name, _p in p.items()}
self._classes = {group: i for i, group in enumerate(self.t_groups)}
self.models_tau = {group: deepcopy(self.model_tau) for group in self.t_groups}
self.vars_c = {}
self.vars_t = {}
if verbose:
logger.info('generating out-of-fold CV outcome estimates')
yhat = cross_val_predict(self.model_mu, X, y, cv=self.cv, n_jobs=-1)
for group in self.t_groups:
mask = (treatment == group) | (treatment == self.control_name)
treatment_filt = treatment[mask]
X_filt = X[mask]
y_filt = y[mask]
yhat_filt = yhat[mask]
p_filt = p[group][mask]
w = (treatment_filt == group).astype(int)
if verbose:
logger.info('training the treatment effect model for {} with R-loss'.format(group))
self.models_tau[group].fit(X_filt, (y_filt - yhat_filt) / (w - p_filt),
sample_weight=(w - p_filt) ** 2)
self.vars_c[group] = (y_filt[w == 0] - yhat_filt[w == 0]).var()
self.vars_t[group] = (y_filt[w == 1] - yhat_filt[w == 1]).var()