in causalml/feature_selection/filters.py [0:0]
def _filter_F_one_feature(data, treatment_indicator, feature_name, y_name, order=1):
"""
Conduct F-test of the interaction between treatment and one feature.
Args:
data (pd.Dataframe): DataFrame containing outcome, features, and experiment group
treatment_indicator (string): the column name for binary indicator of treatment (value 1) or control (value 0)
feature_name (string): feature name, as one column in the data DataFrame
y_name (string): name of the outcome variable
order (int): the order of feature to be evaluated with the treatment effect, order takes 3 values: 1,2,3. order = 1 corresponds to linear importance of the feature, order=2 corresponds to quadratic and linear importance of the feature,
order= 3 will calculate feature importance up to cubic forms.
Returns:
F_test_result : pd.DataFrame
a data frame containing the feature importance statistics
"""
Y = data[y_name]
X = data[[treatment_indicator, feature_name]]
X = sm.add_constant(X)
X["{}-{}".format(treatment_indicator, feature_name)] = X[
[treatment_indicator, feature_name]
].product(axis=1)
if order not in [1, 2, 3]:
raise Exception("ValueError: order argument only takes value 1,2,3.")
if order == 1:
pass
elif order == 2:
x_tmp_name = "{}_o{}".format(feature_name, order)
X[x_tmp_name] = X[[feature_name]] ** order
X["{}-{}".format(treatment_indicator, x_tmp_name)] = X[
[treatment_indicator, x_tmp_name]
].product(axis=1)
elif order == 3:
x_tmp_name = "{}_o{}".format(feature_name, 2)
X[x_tmp_name] = X[[feature_name]] ** 2
X["{}-{}".format(treatment_indicator, x_tmp_name)] = X[
[treatment_indicator, x_tmp_name]
].product(axis=1)
x_tmp_name = "{}_o{}".format(feature_name, order)
X[x_tmp_name] = X[[feature_name]] ** order
X["{}-{}".format(treatment_indicator, x_tmp_name)] = X[
[treatment_indicator, x_tmp_name]
].product(axis=1)
model = sm.OLS(Y, X)
result = model.fit()
if order == 1:
F_test = result.f_test(np.array([0, 0, 0, 1]))
elif order == 2:
F_test = result.f_test(np.array([[0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1]]))
elif order == 3:
F_test = result.f_test(
np.array(
[
[0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1],
]
)
)
F_test_result = pd.DataFrame(
{
"feature": feature_name, # for the interaction, not the main effect
"method": "F{} Filter".format(order),
"score": float(F_test.fvalue),
"p_value": F_test.pvalue,
"misc": "df_num: {}, df_denom: {}, order:{}".format(
F_test.df_num, F_test.df_denom, order
),
},
index=[0],
).reset_index(drop=True)
return F_test_result