def save_kfold_datasets()

in code/preprocessing.py [0:0]


def save_kfold_datasets(X, y, k):
    """ Splits the datasets (X,y) k folds and saves the output from 
    each fold into separate directories.

    Args:
        X : numpy array represents the features
        y : numpy array represetns the target
        k : int value represents the number of folds to split the given datasets
    """

    # Shuffles and Split dataset into k folds. Using fixed random state 
    # for repeatable dataset splits.
    kf = KFold(n_splits=k, random_state=23, shuffle=True)

    fold_idx = 0
    for train_index, test_index in kf.split(X, y=y, groups=None):    
       X_train, X_test = X[train_index], X[test_index]
       y_train, y_test = y[train_index], y[test_index]
       os.makedirs(f'{base_dir}/train/{fold_idx}', exist_ok=True)
       np.savetxt(f'{base_dir}/train/{fold_idx}/train_x.csv', X_train, delimiter=',')
       np.savetxt(f'{base_dir}/train/{fold_idx}/train_y.csv', y_train, delimiter=',')

       os.makedirs(f'{base_dir}/test/{fold_idx}', exist_ok=True)
       np.savetxt(f'{base_dir}/test/{fold_idx}/test_x.csv', X_test, delimiter=',')
       np.savetxt(f'{base_dir}/test/{fold_idx}/test_y.csv', y_test, delimiter=',')
       fold_idx += 1