def resample()

in src/sagemaker_sklearn_extension/contrib/taei/star_oversampler.py [0:0]


    def resample(self, X, y, verbose=False):
        """
        Generate synthetic minority samples
        """
        unique, counts = np.unique(y, return_counts=True)
        class_stats = dict(zip(unique, counts))
        min_label = unique[0] if counts[0] < counts[1] else unique[1]
        maj_label = unique[1] if counts[0] < counts[1] else unique[0]

        # determine the number of samples to generate
        n_to_sample = self.det_n_to_sample(self.proportion, class_stats[maj_label], class_stats[min_label])

        if n_to_sample == 0:
            if verbose:
                print("StarOversampler: Sampling is not needed")
            return X.copy(), y.copy()

        samples = []
        # Implementation of the star topology
        X_min = X[y == min_label]
        X_mean = np.mean(X_min, axis=0)
        k = max([1, int(np.rint(n_to_sample / len(X_min)))])
        for x in X_min:
            diff = X_mean - x
            for i in range(1, k + 1):
                samples.append(x + float(i) / (k + 1) * diff)
        return np.vstack([X, np.vstack(samples)]), np.hstack([y, np.repeat(min_label, len(samples))])