in catboost_scikit_learn_script_mode_local_training_and_serving/catboost_scikit_learn_script_mode_local_training_and_serving.py [0:0]
def download_training_and_eval_data():
if os.path.isfile('./data/train/boston_train.csv') and \
os.path.isfile('./data/validation/boston_validation.csv') and \
os.path.isfile('./data/test/boston_test.csv'):
print('Training dataset exist. Skipping Download')
else:
print('Downloading training dataset')
os.makedirs("./data", exist_ok=True)
os.makedirs("./data/train", exist_ok=True)
os.makedirs("./data/validation", exist_ok=True)
os.makedirs("./data/test", exist_ok=True)
data = load_boston()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=45)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=45)
trainX = pd.DataFrame(X_train, columns=data.feature_names)
trainX['target'] = y_train
valX = pd.DataFrame(X_test, columns=data.feature_names)
valX['target'] = y_test
testX = pd.DataFrame(X_test, columns=data.feature_names)
trainX.to_csv(local_train, header=None, index=False)
valX.to_csv(local_validation, header=None, index=False)
testX.to_csv(local_test, header=None, index=False)
print('Downloading completed')