def get_synthetic_auuc()

in causalml/dataset/synthetic.py [0:0]


def get_synthetic_auuc(synthetic_preds, drop_learners=[], outcome_col='y', treatment_col='w',
                       treatment_effect_col='tau', plot=True):
    """Get auuc values for cumulative gains of model estimates in quantiles.

    For details, reference get_cumgain() and plot_gain()
    Args:
        synthetic_preds (dict): dictionary of predictions generated by get_synthetic_preds()
        or get_synthetic_preds_holdout()
        outcome_col (str, optional): the column name for the actual outcome
        treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
        treatment_effect_col (str, optional): the column name for the true treatment effect
        plot (boolean,optional): plot the cumulative gain chart or not

    Returns:
        (pandas.DataFrame): auuc values by learner for cumulative gains of model estimates
    """
    synthetic_preds_df = synthetic_preds.copy()
    generated_data = synthetic_preds_df.pop(KEY_GENERATED_DATA)
    synthetic_preds_df = pd.DataFrame(synthetic_preds_df)
    synthetic_preds_df = synthetic_preds_df.drop(drop_learners, axis=1)

    synthetic_preds_df['y'] = generated_data[outcome_col]
    synthetic_preds_df['w'] = generated_data[treatment_col]
    if treatment_effect_col in generated_data.keys():
        synthetic_preds_df['tau'] = generated_data[treatment_effect_col]

    assert ((outcome_col in synthetic_preds_df.columns) and
            (treatment_col in synthetic_preds_df.columns) or
            treatment_effect_col in synthetic_preds_df.columns)

    cumlift = get_cumgain(synthetic_preds_df, outcome_col='y', treatment_col='w',
                          treatment_effect_col='tau')
    auuc_df = pd.DataFrame(cumlift.columns)
    auuc_df.columns = ['Learner']
    auuc_df['cum_gain_auuc'] = [auc(cumlift.index.values/100, cumlift[learner].values) for learner in cumlift.columns]
    auuc_df = auuc_df.sort_values('cum_gain_auuc', ascending=False)

    if plot:
        plot_gain(synthetic_preds_df, outcome_col=outcome_col,
                  treatment_col=treatment_col, treatment_effect_col=treatment_effect_col)

    return auuc_df