in src/utils.py [0:0]
def compute_prob_stats(df_col):
stats = {
'nan': df_col.isna().sum()
}
if df_col.dtype == 'int64' or df_col.dtype == 'float64':
stats['type'] = ColType.NUMERICAL
stats['mean'] = df_col.mean()
stats['std'] = df_col.std()
stats['dtype'] = df_col.dtype
else:
stats['type'] = ColType.CATEGORICAL
prob = df_col.value_counts(normalize=True, dropna=False).to_dict()
stats['prob'] = prob
return stats