in tensorflow_bring_your_own_california_housing_local_training_and_batch_transform/tensorflow_bring_your_own_california_housing_local_training_and_batch_transform.py [0:0]
def download_training_and_eval_data():
if os.path.isfile('./data/train/x_train.csv') and \
os.path.isfile('./data/test/x_test.csv') and \
os.path.isfile('./data/train/y_train.csv') and \
os.path.isfile('./data/test/y_test.csv'):
print('Training and evaluation datasets exist. Skipping Download')
else:
print('Downloading training and evaluation dataset')
data_dir = os.path.join(os.getcwd(), 'data')
os.makedirs(data_dir, exist_ok=True)
train_dir = os.path.join(os.getcwd(), 'data/train')
os.makedirs(train_dir, exist_ok=True)
test_dir = os.path.join(os.getcwd(), 'data/test')
os.makedirs(test_dir, exist_ok=True)
input_dir = os.path.join(os.getcwd(), 'data/input')
os.makedirs(input_dir, exist_ok=True)
output_dir = os.path.join(os.getcwd(), 'data/output')
os.makedirs(output_dir, exist_ok=True)
data_set = fetch_california_housing()
X = pd.DataFrame(data_set.data, columns=data_set.feature_names)
Y = pd.DataFrame(data_set.target)
# We partition the dataset into 2/3 training and 1/3 test set.
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, Y, test_size=0.33)
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
pd.DataFrame(x_train).to_csv(os.path.join(train_dir, 'x_train.csv'), header=None, index=False)
pd.DataFrame(x_test).to_csv(os.path.join(test_dir, 'x_test.csv'),header=None, index=False)
pd.DataFrame(x_test).to_csv(os.path.join(input_dir, 'x_test.csv'),header=None, index=False)
pd.DataFrame(y_train).to_csv(os.path.join(train_dir, 'y_train.csv'), header=None, index=False)
pd.DataFrame(y_test).to_csv(os.path.join(test_dir, 'y_test.csv'), header=None, index=False)
print('Downloading completed')