in causalml/feature_selection/filters.py [0:0]
def get_importance(self, data, features, y_name, method,
experiment_group_column='treatment_group_key',
control_group = 'control',
treatment_group = 'treatment',
n_bins=5,
):
"""
Rank features based on the chosen statistic of the interaction.
Parameters
----------
data (pd.Dataframe): DataFrame containing outcome, features, and experiment group
features (list of string): list of feature names, that are columns in the data DataFrame
y_name (string): name of the outcome variable
method (string, optional, default = 'KL'): taking one of the following values {'F', 'LR', 'KL', 'ED', 'Chi'}
The feature selection method to be used to rank the features.
'F' for F-test
'LR' for likelihood ratio test
'KL', 'ED', 'Chi' for bin-based uplift filter methods, KL divergence, Euclidean distance, Chi-Square respectively
experiment_group_column (string): the experiment column name in the DataFrame, which contains the treatment and control assignment label
control_group (string): name for control group, value in the experiment group column
treatment_group (string): name for treatment group, value in the experiment group column
n_bins (int, optional): number of bins to be used for bin-based uplift filter methods
Returns
----------
(pd.DataFrame): a data frame with following columns: ['method', 'feature', 'rank', 'score', 'p_value', 'misc']
"""
if method == 'F':
data = data[data[experiment_group_column].isin([control_group, treatment_group])]
data['treatment_indicator'] = 0
data.loc[data[experiment_group_column]==treatment_group,'treatment_indicator'] = 1
all_result = self.filter_F(data=data,
treatment_indicator='treatment_indicator', features=features, y_name=y_name
)
elif method == 'LR':
data = data[data[experiment_group_column].isin([control_group, treatment_group])]
data['treatment_indicator'] = 0
data.loc[data[experiment_group_column]==treatment_group,'treatment_indicator'] = 1
all_result = self.filter_LR(data=data, disp=True,
treatment_indicator='treatment_indicator', features=features, y_name=y_name
)
else:
all_result = self.filter_D(data=data, method=method,
features=features, y_name=y_name,
n_bins=n_bins, control_group=control_group,
experiment_group_column=experiment_group_column,
)
all_result['method'] = method + ' filter'
return all_result[['method', 'feature', 'rank', 'score', 'p_value', 'misc']]