in causalml/dataset/synthetic.py [0:0]
def get_synthetic_auuc(synthetic_preds, drop_learners=[], outcome_col='y', treatment_col='w',
treatment_effect_col='tau', plot=True):
"""Get auuc values for cumulative gains of model estimates in quantiles.
For details, reference get_cumgain() and plot_gain()
Args:
synthetic_preds (dict): dictionary of predictions generated by get_synthetic_preds()
or get_synthetic_preds_holdout()
outcome_col (str, optional): the column name for the actual outcome
treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
treatment_effect_col (str, optional): the column name for the true treatment effect
plot (boolean,optional): plot the cumulative gain chart or not
Returns:
(pandas.DataFrame): auuc values by learner for cumulative gains of model estimates
"""
synthetic_preds_df = synthetic_preds.copy()
generated_data = synthetic_preds_df.pop(KEY_GENERATED_DATA)
synthetic_preds_df = pd.DataFrame(synthetic_preds_df)
synthetic_preds_df = synthetic_preds_df.drop(drop_learners, axis=1)
synthetic_preds_df['y'] = generated_data[outcome_col]
synthetic_preds_df['w'] = generated_data[treatment_col]
if treatment_effect_col in generated_data.keys():
synthetic_preds_df['tau'] = generated_data[treatment_effect_col]
assert ((outcome_col in synthetic_preds_df.columns) and
(treatment_col in synthetic_preds_df.columns) or
treatment_effect_col in synthetic_preds_df.columns)
cumlift = get_cumgain(synthetic_preds_df, outcome_col='y', treatment_col='w',
treatment_effect_col='tau')
auuc_df = pd.DataFrame(cumlift.columns)
auuc_df.columns = ['Learner']
auuc_df['cum_gain_auuc'] = [auc(cumlift.index.values/100, cumlift[learner].values) for learner in cumlift.columns]
auuc_df = auuc_df.sort_values('cum_gain_auuc', ascending=False)
if plot:
plot_gain(synthetic_preds_df, outcome_col=outcome_col,
treatment_col=treatment_col, treatment_effect_col=treatment_effect_col)
return auuc_df