in code/preprocessing.py [0:0]
def save_kfold_datasets(X, y, k):
""" Splits the datasets (X,y) k folds and saves the output from
each fold into separate directories.
Args:
X : numpy array represents the features
y : numpy array represetns the target
k : int value represents the number of folds to split the given datasets
"""
# Shuffles and Split dataset into k folds. Using fixed random state
# for repeatable dataset splits.
kf = KFold(n_splits=k, random_state=23, shuffle=True)
fold_idx = 0
for train_index, test_index in kf.split(X, y=y, groups=None):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
os.makedirs(f'{base_dir}/train/{fold_idx}', exist_ok=True)
np.savetxt(f'{base_dir}/train/{fold_idx}/train_x.csv', X_train, delimiter=',')
np.savetxt(f'{base_dir}/train/{fold_idx}/train_y.csv', y_train, delimiter=',')
os.makedirs(f'{base_dir}/test/{fold_idx}', exist_ok=True)
np.savetxt(f'{base_dir}/test/{fold_idx}/test_x.csv', X_test, delimiter=',')
np.savetxt(f'{base_dir}/test/{fold_idx}/test_y.csv', y_test, delimiter=',')
fold_idx += 1