def fit()

in causalml/inference/meta/rlearner.py [0:0]
64 lines of code
12 McCabe index (conditional complexity)

    def fit(self, X, treatment, y, p=None, verbose=True):
        """Fit the treatment effect and outcome models of the R learner.

        Args:
            X (np.matrix or np.array or pd.Dataframe): a feature matrix
            y (np.array or pd.Series): an outcome vector
            p (np.ndarray or pd.Series or dict, optional): an array of propensity scores of float (0,1) in the
                single-treatment case; or, a dictionary of treatment groups that map to propensity vectors of
                float (0,1); if None will run ElasticNetPropensityModel() to generate the propensity scores.
            verbose (bool, optional): whether to output progress logs
        """
        X, treatment, y = convert_pd_to_np(X, treatment, y)
        check_treatment_vector(treatment, self.control_name)
        self.t_groups = np.unique(treatment[treatment != self.control_name])
        self.t_groups.sort()

        if p is None:
            logger.info('Generating propensity score')
            p = dict()
            p_model = dict()
            for group in self.t_groups:
                mask = (treatment == group) | (treatment == self.control_name)
                treatment_filt = treatment[mask]
                X_filt = X[mask]
                w_filt = (treatment_filt == group).astype(int)
                w = (treatment == group).astype(int)
                p[group], p_model[group] = compute_propensity_score(X=X_filt, treatment=w_filt,
                                                                    X_pred=X, treatment_pred=w)
            self.propensity_model = p_model
            self.propensity = p
        else:
            check_p_conditions(p, self.t_groups)

        if isinstance(p, (np.ndarray, pd.Series)):
            treatment_name = self.t_groups[0]
            p = {treatment_name: convert_pd_to_np(p)}
        elif isinstance(p, dict):
            p = {treatment_name: convert_pd_to_np(_p) for treatment_name, _p in p.items()}

        self._classes = {group: i for i, group in enumerate(self.t_groups)}
        self.models_tau = {group: deepcopy(self.model_tau) for group in self.t_groups}
        self.vars_c = {}
        self.vars_t = {}

        if verbose:
            logger.info('generating out-of-fold CV outcome estimates')
        yhat = cross_val_predict(self.model_mu, X, y, cv=self.cv, n_jobs=-1)

        for group in self.t_groups:
            treatment_mask = (treatment == group) | (treatment == self.control_name)
            treatment_filt = treatment[treatment_mask]
            w = (treatment_filt == group).astype(int)

            X_filt = X[treatment_mask]
            y_filt = y[treatment_mask]
            yhat_filt = yhat[treatment_mask]
            p_filt = p[group][treatment_mask]

            if verbose:
                logger.info('training the treatment effect model for {} with R-loss'.format(group))

            if self.early_stopping:
                X_train_filt, X_test_filt, y_train_filt, y_test_filt, yhat_train_filt, yhat_test_filt, \
                    w_train, w_test, p_train_filt, p_test_filt = train_test_split(
                        X_filt, y_filt, yhat_filt, w, p_filt,
                        test_size=self.test_size, random_state=self.random_state
                    )

                self.models_tau[group].fit(X=X_train_filt,
                                           y=(y_train_filt - yhat_train_filt) / (w_train - p_train_filt),
                                           sample_weight=(w_train - p_train_filt) ** 2,
                                           eval_set=[(X_test_filt,
                                                      (y_test_filt - yhat_test_filt) / (w_test - p_test_filt))],
                                           sample_weight_eval_set=[(w_test - p_test_filt) ** 2],
                                           eval_metric=self.effect_learner_eval_metric,
                                           early_stopping_rounds=self.early_stopping_rounds,
                                           verbose=verbose)

            else:
                self.models_tau[group].fit(X_filt, (y_filt - yhat_filt) / (w - p_filt),
                                           sample_weight=(w - p_filt) ** 2,
                                           eval_metric=self.effect_learner_eval_metric)

            self.vars_c[group] = (y_filt[w == 0] - yhat_filt[w == 0]).var()
            self.vars_t[group] = (y_filt[w == 1] - yhat_filt[w == 1]).var()