def get_tmlegain()

in causalml/metrics/visualize.py [0:0]
48 lines of code
14 McCabe index (conditional complexity)

def get_tmlegain(df, inference_col, learner=LGBMRegressor(num_leaves=64, learning_rate=.05, n_estimators=300),
                 outcome_col='y', treatment_col='w', p_col='p', n_segment=5, cv=None,
                 calibrate_propensity=True, ci=False):
    """Get TMLE based average uplifts of model estimates of segments.

    Args:
        df (pandas.DataFrame): a data frame with model estimates and actual data as columns
        inferenece_col (list of str): a list of columns that used in learner for inference
        learner (optional): a model used by TMLE to estimate the outcome
        outcome_col (str, optional): the column name for the actual outcome
        treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
        p_col (str, optional): the column name for propensity score
        n_segment (int, optional): number of segment that TMLE will estimated for each
        cv (sklearn.model_selection._BaseKFold, optional): sklearn CV object
        calibrate_propensity (bool, optional): whether calibrate propensity score or not
        ci (bool, optional): whether return confidence intervals for ATE or not
    Returns:
        (pandas.DataFrame): cumulative gains of model estimates based of TMLE
    """
    assert ((outcome_col in df.columns) and (treatment_col in df.columns) or
            p_col in df.columns)

    inference_col = [x for x in inference_col if x in df.columns]

    # Initialize TMLE
    tmle = TMLELearner(learner, cv=cv, calibrate_propensity=calibrate_propensity)
    ate_all, ate_all_lb, ate_all_ub = tmle.estimate_ate(X=df[inference_col],
                                                        p=df[p_col],
                                                        treatment=df[treatment_col],
                                                        y=df[outcome_col])

    df = df.copy()
    model_names = [x for x in df.columns if x not in [outcome_col, treatment_col, p_col] + inference_col]

    lift = []
    lift_lb = []
    lift_ub = []

    for col in model_names:
        ate_model, ate_model_lb, ate_model_ub = tmle.estimate_ate(X=df[inference_col],
                                                                  p=df[p_col],
                                                                  treatment=df[treatment_col],
                                                                  y=df[outcome_col],
                                                                  segment=pd.qcut(df[col], n_segment, labels=False))
        lift_model = [0.] * (n_segment + 1)
        lift_model[n_segment] = ate_all[0]
        for i in range(1, n_segment):
            lift_model[i] = ate_model[0][n_segment - i] * (1/n_segment) + lift_model[i - 1]
        lift.append(lift_model)

        if ci:
            lift_lb_model = [0.] * (n_segment + 1)
            lift_lb_model[n_segment] = ate_all_lb[0]

            lift_ub_model = [0.] * (n_segment + 1)
            lift_ub_model[n_segment] = ate_all_ub[0]
            for i in range(1, n_segment):
                lift_lb_model[i] = ate_model_lb[0][n_segment - i] * (1/n_segment) + lift_lb_model[i - 1]
                lift_ub_model[i] = ate_model_ub[0][n_segment - i] * (1/n_segment) + lift_ub_model[i - 1]

            lift_lb.append(lift_lb_model)
            lift_ub.append(lift_ub_model)

    lift = pd.DataFrame(lift).T
    lift.columns = model_names

    if ci:
        lift_lb = pd.DataFrame(lift_lb).T
        lift_lb.columns = [x + " LB" for x in model_names]

        lift_ub = pd.DataFrame(lift_ub).T
        lift_ub.columns = [x + " UB" for x in model_names]
        lift = pd.concat([lift, lift_lb, lift_ub], axis=1)

    lift.index = lift.index/n_segment
    lift[RANDOM_COL] = np.linspace(0, 1, n_segment + 1)*ate_all[0]

    return lift