in causalml/inference/meta/rlearner.py [0:0]
def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
"""Fit the treatment effect and outcome models of the R learner.
Args:
X (np.matrix or np.array or pd.Dataframe): a feature matrix
y (np.array or pd.Series): an outcome vector
p (np.ndarray or pd.Series or dict, optional): an array of propensity scores of float (0,1) in the
single-treatment case; or, a dictionary of treatment groups that map to propensity vectors of
float (0,1); if None will run ElasticNetPropensityModel() to generate the propensity scores.
sample_weight (np.array or pd.Series, optional): an array of sample weights indicating the
weight of each observation for `effect_learner`. If None, it assumes equal weight.
verbose (bool, optional): whether to output progress logs
"""
X, treatment, y = convert_pd_to_np(X, treatment, y)
check_treatment_vector(treatment, self.control_name)
# initialize equal sample weight if it's not provided, for simplicity purpose
sample_weight = (
convert_pd_to_np(sample_weight)
if sample_weight is not None
else convert_pd_to_np(np.ones(len(y)))
)
assert len(sample_weight) == len(
y
), "Data length must be equal for sample_weight and the input data"
self.t_groups = np.unique(treatment[treatment != self.control_name])
self.t_groups.sort()
if p is None:
self._set_propensity_models(X=X, treatment=treatment, y=y)
p = self.propensity
else:
p = self._format_p(p, self.t_groups)
self._classes = {group: i for i, group in enumerate(self.t_groups)}
self.models_tau = {group: deepcopy(self.model_tau) for group in self.t_groups}
self.vars_c = {}
self.vars_t = {}
if verbose:
logger.info("generating out-of-fold CV outcome estimates")
yhat = cross_val_predict(self.model_mu, X, y, cv=self.cv, n_jobs=-1)
for group in self.t_groups:
treatment_mask = (treatment == group) | (treatment == self.control_name)
treatment_filt = treatment[treatment_mask]
w = (treatment_filt == group).astype(int)
X_filt = X[treatment_mask]
y_filt = y[treatment_mask]
yhat_filt = yhat[treatment_mask]
p_filt = p[group][treatment_mask]
sample_weight_filt = sample_weight[treatment_mask]
if verbose:
logger.info(
"training the treatment effect model for {} with R-loss".format(
group
)
)
if self.early_stopping:
(
X_train_filt,
X_test_filt,
y_train_filt,
y_test_filt,
yhat_train_filt,
yhat_test_filt,
w_train,
w_test,
p_train_filt,
p_test_filt,
sample_weight_train_filt,
sample_weight_test_filt,
) = train_test_split(
X_filt,
y_filt,
yhat_filt,
w,
p_filt,
sample_weight_filt,
test_size=self.test_size,
random_state=self.random_state,
)
weight = sample_weight_filt
self.models_tau[group].fit(
X=X_train_filt,
y=(y_train_filt - yhat_train_filt) / (w_train - p_train_filt),
sample_weight=sample_weight_train_filt
* ((w_train - p_train_filt) ** 2),
eval_set=[
(
X_test_filt,
(y_test_filt - yhat_test_filt) / (w_test - p_test_filt),
)
],
sample_weight_eval_set=[
sample_weight_test_filt * ((w_test - p_test_filt) ** 2)
],
eval_metric=self.effect_learner_eval_metric,
early_stopping_rounds=self.early_stopping_rounds,
verbose=verbose,
)
else:
self.models_tau[group].fit(
X_filt,
(y_filt - yhat_filt) / (w - p_filt),
sample_weight=sample_weight_filt * ((w - p_filt) ** 2),
eval_metric=self.effect_learner_eval_metric,
)
diff_c = y_filt[w == 0] - yhat_filt[w == 0]
diff_t = y_filt[w == 1] - yhat_filt[w == 1]
sample_weight_filt_c = sample_weight_filt[w == 0]
sample_weight_filt_t = sample_weight_filt[w == 1]
self.vars_c[group] = get_weighted_variance(diff_c, sample_weight_filt_c)
self.vars_t[group] = get_weighted_variance(diff_t, sample_weight_filt_t)