def compute_features_meta_data()

in bindings/python-compute/feature_differentiation.py [0:0]


    def compute_features_meta_data(self):
        features_list = []
        for feature_name in self.feature_df.columns:
            col = self.feature_df[feature_name]

            if feature_name in self.cat_dict:
                type = 'categorical'
                value_counts = col.value_counts(dropna=False)
                distribution = np.stack([value_counts.index.values, value_counts.values]).tolist()
            else:
                x = np.linspace(np.min(col), np.max(col), num=NUMERICAL_DOMAIN_INTERVAL)
                try:
                    kde = gaussian_kde(col, bw_method=0.1)
                except LinAlgError:
                    kde = lambda x: get_single_value_distribution(x, col)
                distribution = np.stack([x, 10000*kde(x)]).tolist()

            if len(distribution[0]) <= 1 or \
                len(distribution[0]) > max(len(col) / 10., 100):
                continue

            feature_dict = {
                'name': feature_name,
                'type': 'categorical' if feature_name in self.cat_dict else 'numerical',
                'distribution': distribution
            }
            features_list.append(feature_dict)
        return features_list