in causalml/metrics/visualize.py [0:0]
def get_tmlegain(df, inference_col, learner=LGBMRegressor(num_leaves=64, learning_rate=.05, n_estimators=300),
outcome_col='y', treatment_col='w', p_col='p', n_segment=5, cv=None,
calibrate_propensity=True, ci=False):
"""Get TMLE based average uplifts of model estimates of segments.
Args:
df (pandas.DataFrame): a data frame with model estimates and actual data as columns
inferenece_col (list of str): a list of columns that used in learner for inference
learner (optional): a model used by TMLE to estimate the outcome
outcome_col (str, optional): the column name for the actual outcome
treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
p_col (str, optional): the column name for propensity score
n_segment (int, optional): number of segment that TMLE will estimated for each
cv (sklearn.model_selection._BaseKFold, optional): sklearn CV object
calibrate_propensity (bool, optional): whether calibrate propensity score or not
ci (bool, optional): whether return confidence intervals for ATE or not
Returns:
(pandas.DataFrame): cumulative gains of model estimates based of TMLE
"""
assert ((outcome_col in df.columns) and (treatment_col in df.columns) or
p_col in df.columns)
inference_col = [x for x in inference_col if x in df.columns]
# Initialize TMLE
tmle = TMLELearner(learner, cv=cv, calibrate_propensity=calibrate_propensity)
ate_all, ate_all_lb, ate_all_ub = tmle.estimate_ate(X=df[inference_col],
p=df[p_col],
treatment=df[treatment_col],
y=df[outcome_col])
df = df.copy()
model_names = [x for x in df.columns if x not in [outcome_col, treatment_col, p_col] + inference_col]
lift = []
lift_lb = []
lift_ub = []
for col in model_names:
ate_model, ate_model_lb, ate_model_ub = tmle.estimate_ate(X=df[inference_col],
p=df[p_col],
treatment=df[treatment_col],
y=df[outcome_col],
segment=pd.qcut(df[col], n_segment, labels=False))
lift_model = [0.] * (n_segment + 1)
lift_model[n_segment] = ate_all[0]
for i in range(1, n_segment):
lift_model[i] = ate_model[0][n_segment - i] * (1/n_segment) + lift_model[i - 1]
lift.append(lift_model)
if ci:
lift_lb_model = [0.] * (n_segment + 1)
lift_lb_model[n_segment] = ate_all_lb[0]
lift_ub_model = [0.] * (n_segment + 1)
lift_ub_model[n_segment] = ate_all_ub[0]
for i in range(1, n_segment):
lift_lb_model[i] = ate_model_lb[0][n_segment - i] * (1/n_segment) + lift_lb_model[i - 1]
lift_ub_model[i] = ate_model_ub[0][n_segment - i] * (1/n_segment) + lift_ub_model[i - 1]
lift_lb.append(lift_lb_model)
lift_ub.append(lift_ub_model)
lift = pd.DataFrame(lift).T
lift.columns = model_names
if ci:
lift_lb = pd.DataFrame(lift_lb).T
lift_lb.columns = [x + " LB" for x in model_names]
lift_ub = pd.DataFrame(lift_ub).T
lift_ub.columns = [x + " UB" for x in model_names]
lift = pd.concat([lift, lift_lb, lift_ub], axis=1)
lift.index = lift.index/n_segment
lift[RANDOM_COL] = np.linspace(0, 1, n_segment + 1)*ate_all[0]
return lift