in causalml/metrics/visualize.py [0:0]
def get_qini(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau',
normalize=False, random_seed=42):
"""Get Qini of model estimates in population.
If the true treatment effect is provided (e.g. in synthetic data), it's calculated
as the cumulative gain of the true treatment effect in each population.
Otherwise, it's calculated as the cumulative difference between the mean outcomes
of the treatment and control groups in each population.
For details, see Radcliffe (2007), `Using Control Group to Target on Predicted Lift:
Building and Assessing Uplift Models`
For the former, `treatment_effect_col` should be provided. For the latter, both
`outcome_col` and `treatment_col` should be provided.
Args:
df (pandas.DataFrame): a data frame with model estimates and actual data as columns
outcome_col (str, optional): the column name for the actual outcome
treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
treatment_effect_col (str, optional): the column name for the true treatment effect
normalize (bool, optional): whether to normalize the y-axis to 1 or not
random_seed (int, optional): random seed for numpy.random.rand()
Returns:
(pandas.DataFrame): cumulative gains of model estimates in population
"""
assert ((outcome_col in df.columns) and (treatment_col in df.columns) or
treatment_effect_col in df.columns)
df = df.copy()
np.random.seed(random_seed)
random_cols = []
for i in range(10):
random_col = '__random_{}__'.format(i)
df[random_col] = np.random.rand(df.shape[0])
random_cols.append(random_col)
model_names = [x for x in df.columns if x not in [outcome_col, treatment_col,
treatment_effect_col]]
qini = []
for i, col in enumerate(model_names):
df = df.sort_values(col, ascending=False).reset_index(drop=True)
df.index = df.index + 1
df['cumsum_tr'] = df[treatment_col].cumsum()
if treatment_effect_col in df.columns:
# When treatment_effect_col is given, use it to calculate the average treatment effects
# of cumulative population.
l = df[treatment_effect_col].cumsum() / df.index * df['cumsum_tr']
else:
# When treatment_effect_col is not given, use outcome_col and treatment_col
# to calculate the average treatment_effects of cumulative population.
df['cumsum_ct'] = df.index.values - df['cumsum_tr']
df['cumsum_y_tr'] = (df[outcome_col] * df[treatment_col]).cumsum()
df['cumsum_y_ct'] = (df[outcome_col] * (1 - df[treatment_col])).cumsum()
l = df['cumsum_y_tr'] - df['cumsum_y_ct'] * df['cumsum_tr'] / df['cumsum_ct']
qini.append(l)
qini = pd.concat(qini, join='inner', axis=1)
qini.loc[0] = np.zeros((qini.shape[1], ))
qini = qini.sort_index().interpolate()
qini.columns = model_names
qini[RANDOM_COL] = qini[random_cols].mean(axis=1)
qini.drop(random_cols, axis=1, inplace=True)
if normalize:
qini = qini.div(np.abs(qini.iloc[-1, :]), axis=1)
return qini