def download_training_and_eval_data()

in catboost_scikit_learn_script_mode_local_training_and_serving/catboost_scikit_learn_script_mode_local_training_and_serving.py [0:0]


def download_training_and_eval_data():
    if os.path.isfile('./data/train/boston_train.csv') and \
            os.path.isfile('./data/validation/boston_validation.csv') and \
            os.path.isfile('./data/test/boston_test.csv'):
        print('Training dataset exist. Skipping Download')
    else:
        print('Downloading training dataset')

        os.makedirs("./data", exist_ok=True)
        os.makedirs("./data/train", exist_ok=True)
        os.makedirs("./data/validation", exist_ok=True)
        os.makedirs("./data/test", exist_ok=True)

        data = load_boston()

        X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=45)
        X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=45)

        trainX = pd.DataFrame(X_train, columns=data.feature_names)
        trainX['target'] = y_train

        valX = pd.DataFrame(X_test, columns=data.feature_names)
        valX['target'] = y_test

        testX = pd.DataFrame(X_test, columns=data.feature_names)

        trainX.to_csv(local_train, header=None, index=False)
        valX.to_csv(local_validation, header=None, index=False)
        testX.to_csv(local_test, header=None, index=False)

        print('Downloading completed')