in causalml/feature_selection/filters.py [0:0]
def _GetNodeSummary(self, data,
experiment_group_column='treatment_group_key',
y_name='conversion'):
"""
To count the conversions and get the probabilities by treatment groups. This function comes from the uplift tree algorithm, that is used for tree node split evaluation.
Parameters
----------
data : DataFrame
The DataFrame that contains all the data (in the current "node").
Returns
-------
results : dict
Counts of conversions by treatment groups, of the form:
{'control': {0: 10, 1: 8}, 'treatment1': {0: 5, 1: 15}}
nodeSummary: dict
Probability of conversion and group size by treatment groups, of
the form:
{'control': [0.490, 500], 'treatment1': [0.584, 500]}
"""
# Note: results and nodeSummary are both dict with treatment_group_key
# as the key. So we can compute the treatment effect and/or
# divergence easily.
# Counts of conversions by treatment group
results_series = data.groupby([experiment_group_column, y_name]).size()
treatment_group_keys = results_series.index.levels[0].tolist()
y_name_keys = results_series.index.levels[1].tolist()
results = {}
for ti in treatment_group_keys:
results.update({ti: {}})
for ci in y_name_keys:
results[ti].update({ci: results_series[ti, ci]})
# Probability of conversion and group size by treatment group
nodeSummary = {}
for treatment_group_key in results:
n_1 = results[treatment_group_key][1]
n_total = (results[treatment_group_key][1]
+ results[treatment_group_key][0])
y_mean = 1.0 * n_1 / n_total
nodeSummary[treatment_group_key] = [y_mean, n_total]
return results, nodeSummary