in frauddetector/profiler.py [0:0]
def __extract_frauddetector_schema(self, data, df_warn, event_column="EVENT_LABEL", timestamp_column="EVENT_TIMESTAMP", filter_warnings=False):
"""Get the Amazon Fraud Detector inputs:
* training data schema
* event_variables
* event_labels
Args:
data (pandas.core.frame.DataFrame): panda's dataframe to create summary statistics for
df_warn (pandas.core.frame.DataFrame): DataFrame with added warnings
event_column (str): column that contains the target event
timestamp_column (str): column that contains the timestamp
filter_warning (bool): Flag for filtering out warnings
Returns:
data_schema (dict): The training data schema for AFD
"""
df = df_warn.copy("deep")
if filter_warnings:
df = df[(df['feature_warning'] != 'NO WARNING')].reset_index(drop=True)
variables = self.__create_variables(df_stats=df, event_column=event_column, timestamp_column=timestamp_column)
labels = self.__create_labels(data=data, event_column=event_column)
data_schema = {
'modelVariables' : df.loc[(df['feature_type'].isin(['IP_ADDRESS', 'EMAIL_ADDRESS', 'CATEGORY', 'NUMERIC']))]['feature_name'].to_list(),
'labelSchema' : {
'labelMapper' : {
'FRAUD' : [data[event_column].value_counts().idxmin()],
'LEGIT' : [data[event_column].value_counts().idxmax()]
}
}
}
return data_schema, variables, labels