def __extract_frauddetector_schema()

in frauddetector/profiler.py [0:0]


    def __extract_frauddetector_schema(self, data, df_warn, event_column="EVENT_LABEL", timestamp_column="EVENT_TIMESTAMP", filter_warnings=False):
        """Get the Amazon Fraud Detector inputs:
            * training data schema
            * event_variables
            * event_labels
            
            Args:
                data (pandas.core.frame.DataFrame): panda's dataframe to create summary statistics for
                df_warn (pandas.core.frame.DataFrame): DataFrame with added warnings
                event_column (str): column that contains the target event
                timestamp_column (str): column that contains the timestamp
                filter_warning (bool): Flag for filtering out warnings
            Returns:
                data_schema (dict): The training data schema for AFD
        """
        df = df_warn.copy("deep")
        if filter_warnings:
            df = df[(df['feature_warning'] != 'NO WARNING')].reset_index(drop=True)
        variables = self.__create_variables(df_stats=df, event_column=event_column, timestamp_column=timestamp_column)
        labels = self.__create_labels(data=data, event_column=event_column)

        data_schema = {
            'modelVariables' : df.loc[(df['feature_type'].isin(['IP_ADDRESS', 'EMAIL_ADDRESS', 'CATEGORY', 'NUMERIC']))]['feature_name'].to_list(),
            'labelSchema'    : {
                'labelMapper' : {
                    'FRAUD' : [data[event_column].value_counts().idxmin()],
                    'LEGIT' : [data[event_column].value_counts().idxmax()]
                }
            }
        }
        return data_schema, variables, labels