in causalml/metrics/visualize.py [0:0]
def get_tmleqini(df, inference_col, learner=LGBMRegressor(num_leaves=64, learning_rate=.05, n_estimators=300),
outcome_col='y', treatment_col='w', p_col='p', n_segment=5, cv=None,
calibrate_propensity=True, ci=False, normalize=False):
"""Get TMLE based Qini of model estimates by segments.
Args:
df (pandas.DataFrame): a data frame with model estimates and actual data as columns
inferenece_col (list of str): a list of columns that used in learner for inference
learner(optional): a model used by TMLE to estimate the outcome
outcome_col (str, optional): the column name for the actual outcome
treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
p_col (str, optional): the column name for propensity score
n_segment (int, optional): number of segment that TMLE will estimated for each
cv (sklearn.model_selection._BaseKFold, optional): sklearn CV object
calibrate_propensity (bool, optional): whether calibrate propensity score or not
ci (bool, optional): whether return confidence intervals for ATE or not
Returns:
(pandas.DataFrame): cumulative gains of model estimates based of TMLE
"""
assert ((outcome_col in df.columns) and (treatment_col in df.columns) or
p_col in df.columns)
inference_col = [x for x in inference_col if x in df.columns]
# Initialize TMLE
tmle = TMLELearner(learner, cv=cv, calibrate_propensity=calibrate_propensity)
ate_all, ate_all_lb, ate_all_ub = tmle.estimate_ate(X=df[inference_col],
p=df[p_col],
treatment=df[treatment_col],
y=df[outcome_col])
df = df.copy()
model_names = [x for x in df.columns if x not in [outcome_col, treatment_col, p_col] + inference_col]
qini = []
qini_lb = []
qini_ub = []
for col in model_names:
ate_model, ate_model_lb, ate_model_ub = tmle.estimate_ate(X=df[inference_col],
p=df[p_col],
treatment=df[treatment_col],
y=df[outcome_col],
segment=pd.qcut(df[col], n_segment, labels=False))
qini_model = [0]
for i in range(1, n_segment):
n_tr = df[pd.qcut(df[col], n_segment, labels=False) == (n_segment - i)][treatment_col].sum()
qini_model.append(ate_model[0][n_segment - i] * n_tr)
qini.append(qini_model)
if ci:
qini_lb_model = [0]
qini_ub_model = [0]
for i in range(1, n_segment):
n_tr = df[pd.qcut(df[col], n_segment, labels=False) == (n_segment - i)][treatment_col].sum()
qini_lb_model.append(ate_model_lb[0][n_segment - i] * n_tr)
qini_ub_model.append(ate_model_ub[0][n_segment - i] * n_tr)
qini_lb.append(qini_lb_model)
qini_ub.append(qini_ub_model)
qini = pd.DataFrame(qini).T
qini.columns = model_names
if ci:
qini_lb = pd.DataFrame(qini_lb).T
qini_lb.columns = [x + " LB" for x in model_names]
qini_ub = pd.DataFrame(qini_ub).T
qini_ub.columns = [x + " UB" for x in model_names]
qini = pd.concat([qini, qini_lb, qini_ub], axis=1)
qini = qini.cumsum()
qini.loc[n_segment] = ate_all[0] * df[treatment_col].sum()
qini[RANDOM_COL] = np.linspace(0, 1, n_segment + 1) * ate_all[0] * df[treatment_col].sum()
qini.index = np.linspace(0, 1, n_segment + 1) * df.shape[0]
return qini