in causalml/inference/tree/uplift.pyx [0:0]
def predict(self, X, full_output=False):
'''
Returns the recommended treatment group and predicted optimal
probability conditional on using the recommended treatment group.
Args
----
X : ndarray, shape = [num_samples, num_features]
An ndarray of the covariates used to train the uplift model.
full_output : bool, optional (default=False)
Whether the UpliftTree algorithm returns upliftScores, pred_nodes
alongside the recommended treatment group and p_hat in the treatment group.
Returns
-------
y_pred_list : ndarray, shape = (num_samples, num_treatments])
An ndarray containing the predicted treatment effect of each treatment group for each sample
df_res : DataFrame, shape = [num_samples, (num_treatments * 2 + 3)]
If `full_output` is `True`, a DataFrame containing the predicted outcome of each treatment and
control group, the treatment effect of each treatment group, the treatment group with the
highest treatment effect, and the maximum treatment effect for each sample.
'''
# Make predictions with all trees and take the average
if self.n_jobs != 1:
y_pred_ensemble = sum(
Parallel(n_jobs=self.n_jobs, prefer=self.joblib_prefer)
(delayed(tree.predict)(X=X) for tree in self.uplift_forest)
) / len(self.uplift_forest)
else:
y_pred_ensemble = sum([tree.predict(X=X) for tree in self.uplift_forest]) / len(self.uplift_forest)
# Summarize results into dataframe
df_res = pd.DataFrame(y_pred_ensemble, columns=self.classes_)
df_res['recommended_treatment'] = df_res.apply(np.argmax, axis=1)
# Calculate delta
delta_cols = [f'delta_{treatment_group}' for treatment_group in self.classes_[1:]]
for i_tr in range(1, self.n_class):
treatment_group = self.classes_[i_tr]
df_res[f'delta_{treatment_group}'] = df_res[treatment_group] - df_res[self.control_name]
df_res['max_delta'] = df_res[delta_cols].max(axis=1)
if full_output:
return df_res
else:
return df_res[delta_cols].values