in bindings/python-compute/feature_differentiation.py [0:0]
def compute_features_meta_data(self):
features_list = []
for feature_name in self.feature_df.columns:
col = self.feature_df[feature_name]
if feature_name in self.cat_dict:
type = 'categorical'
value_counts = col.value_counts(dropna=False)
distribution = np.stack([value_counts.index.values, value_counts.values]).tolist()
else:
x = np.linspace(np.min(col), np.max(col), num=NUMERICAL_DOMAIN_INTERVAL)
try:
kde = gaussian_kde(col, bw_method=0.1)
except LinAlgError:
kde = lambda x: get_single_value_distribution(x, col)
distribution = np.stack([x, 10000*kde(x)]).tolist()
if len(distribution[0]) <= 1 or \
len(distribution[0]) > max(len(col) / 10., 100):
continue
feature_dict = {
'name': feature_name,
'type': 'categorical' if feature_name in self.cat_dict else 'numerical',
'distribution': distribution
}
features_list.append(feature_dict)
return features_list