in frauddetector/profiler.py [0:0]
def __calculate_summary_stats(self, data, event_column="EVENT_LABEL"):
""" Generate summary statistics for a panda's data frame
Args:
data (pandas.core.frame.DataFrame): panda's dataframe to create summary statistics for
event_column (str): column that contains the target event
Returns:
df_stats (pandas.core.frame.DataFrame): DataFrame of summary statistics, training data schema, event variables and event lables
"""
df = data.copy("deep")
rowcnt = len(df)
df[event_column] = df[event_column].astype('str', errors='ignore')
df_s1 = df.agg(['count', 'nunique']).transpose().reset_index().rename(columns={"index":"feature_name"})
df_s1["null"] = (rowcnt - df_s1["count"]).astype('int64')
df_s1["not_null"] = rowcnt - df_s1["null"]
df_s1["null_pct"] = df_s1["null"] / rowcnt
df_s1["nunique_pct"] = df_s1['nunique']/ rowcnt
dt = pd.DataFrame(df.dtypes).reset_index().rename(columns={"index":"feature_name", 0:"dtype"})
df_stats = pd.merge(dt, df_s1, on='feature_name', how='inner').round(4)
df_stats['nunique'] = df_stats['nunique'].astype('int64')
df_stats['count'] = df_stats['count'].astype('int64')
return df_stats