in src/sagemaker_sklearn_extension/contrib/taei/star_oversampler.py [0:0]
def resample(self, X, y, verbose=False):
"""
Generate synthetic minority samples
"""
unique, counts = np.unique(y, return_counts=True)
class_stats = dict(zip(unique, counts))
min_label = unique[0] if counts[0] < counts[1] else unique[1]
maj_label = unique[1] if counts[0] < counts[1] else unique[0]
# determine the number of samples to generate
n_to_sample = self.det_n_to_sample(self.proportion, class_stats[maj_label], class_stats[min_label])
if n_to_sample == 0:
if verbose:
print("StarOversampler: Sampling is not needed")
return X.copy(), y.copy()
samples = []
# Implementation of the star topology
X_min = X[y == min_label]
X_mean = np.mean(X_min, axis=0)
k = max([1, int(np.rint(n_to_sample / len(X_min)))])
for x in X_min:
diff = X_mean - x
for i in range(1, k + 1):
samples.append(x + float(i) / (k + 1) * diff)
return np.vstack([X, np.vstack(samples)]), np.hstack([y, np.repeat(min_label, len(samples))])