def _filter_F_one

def _filter_F_one_feature()

in causalml/feature_selection/filters.py [0:0]
57 lines of code
9 McCabe index (conditional complexity)

    def _filter_F_one_feature(data, treatment_indicator, feature_name, y_name, order=1):
        """
        Conduct F-test of the interaction between treatment and one feature.

        Args:
            data (pd.Dataframe): DataFrame containing outcome, features, and experiment group
            treatment_indicator (string): the column name for binary indicator of treatment (value 1) or control (value 0)
            feature_name (string): feature name, as one column in the data DataFrame
            y_name (string): name of the outcome variable
            order (int): the order of feature to be evaluated with the treatment effect, order takes 3 values: 1,2,3. order = 1 corresponds to linear importance of the feature, order=2 corresponds to quadratic and linear importance of the feature,
            order= 3 will calculate feature importance up to cubic forms.

        Returns:
            F_test_result : pd.DataFrame
                a data frame containing the feature importance statistics
        """
        Y = data[y_name]
        X = data[[treatment_indicator, feature_name]]
        X = sm.add_constant(X)
        X["{}-{}".format(treatment_indicator, feature_name)] = X[
            [treatment_indicator, feature_name]
        ].product(axis=1)

        if order not in [1, 2, 3]:
            raise Exception("ValueError: order argument only takes value 1,2,3.")

        if order == 1:
            pass
        elif order == 2:
            x_tmp_name = "{}_o{}".format(feature_name, order)
            X[x_tmp_name] = X[[feature_name]] ** order
            X["{}-{}".format(treatment_indicator, x_tmp_name)] = X[
                [treatment_indicator, x_tmp_name]
            ].product(axis=1)
        elif order == 3:
            x_tmp_name = "{}_o{}".format(feature_name, 2)
            X[x_tmp_name] = X[[feature_name]] ** 2
            X["{}-{}".format(treatment_indicator, x_tmp_name)] = X[
                [treatment_indicator, x_tmp_name]
            ].product(axis=1)

            x_tmp_name = "{}_o{}".format(feature_name, order)
            X[x_tmp_name] = X[[feature_name]] ** order
            X["{}-{}".format(treatment_indicator, x_tmp_name)] = X[
                [treatment_indicator, x_tmp_name]
            ].product(axis=1)

        model = sm.OLS(Y, X)
        result = model.fit()

        if order == 1:
            F_test = result.f_test(np.array([0, 0, 0, 1]))
        elif order == 2:
            F_test = result.f_test(np.array([[0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1]]))
        elif order == 3:
            F_test = result.f_test(
                np.array(
                    [
                        [0, 0, 0, 1, 0, 0, 0, 0],
                        [0, 0, 0, 0, 0, 1, 0, 0],
                        [0, 0, 0, 0, 0, 0, 0, 1],
                    ]
                )
            )

        F_test_result = pd.DataFrame(
            {
                "feature": feature_name,  # for the interaction, not the main effect
                "method": "F{} Filter".format(order),
                "score": float(F_test.fvalue),
                "p_value": F_test.pvalue,
                "misc": "df_num: {}, df_denom: {}, order:{}".format(
                    F_test.df_num, F_test.df_denom, order
                ),
            },
            index=[0],
        ).reset_index(drop=True)

        return F_test_result