def compute_split_kde()

in bindings/python-compute/feature_differentiation.py [0:0]


    def compute_split_kde(self, col, exclude_outlier=True):
        try:
            col_value_range = RANGE_FILTER[col.name]
        except:
            if exclude_outlier:
                col_value_range = [np.percentile(col, 1), np.percentile(col, 99)]
            else:
                col_value_range = [np.min(col), np.max(col)]

        x = np.linspace(col_value_range[0], col_value_range[1], num=NUMERICAL_DOMAIN_INTERVAL)
        # could also use pd.Series.value_counts, (use bins)
        # if unique values in array <=1, kde will have exception
        try:
            kde0 = gaussian_kde(col[self.target == 0], bw_method=0.1)
        except LinAlgError:
            kde0 = lambda x: get_single_value_distribution(x, col[self.target == 0])
        try:
            kde1 = gaussian_kde(col[self.target == 1], bw_method=0.1)
        except LinAlgError:
            kde1 = lambda x: get_single_value_distribution(x, col[self.target == 1])

        return np.stack((x, 10000*kde0(x), 10000*kde1(x)))