in ec2-spot-deep-learning-training/ec2_spot_keras_training.py [0:0]
def main():
# Training parameters
batch_size = 512
epochs = 50
volume_mount_dir = '/dltraining/'
dataset_path = os.path.join(volume_mount_dir, 'datasets')
checkpoint_path = os.path.join(volume_mount_dir, 'checkpoints')
checkpoint_names = 'cifar10_model.{epoch:03d}.h5'
today_date = datetime.datetime.today().strftime('%Y-%m-%d')
# Load dataset
(x_train, y_train), (x_test, y_test) = load_prepare_dataset(dataset_path)
input_shape = x_train.shape[1:]
# Load model
if os.path.isdir(checkpoint_path) and any(glob.glob(os.path.join(checkpoint_path, '*'))):
model, epoch_number = load_checkpoint_model(checkpoint_path, checkpoint_names)
else:
model = cifar10_model(input_shape)
epoch_number = 0
# Define Callbacks
callbacks = define_callbacks(volume_mount_dir, checkpoint_path, checkpoint_names, today_date)
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, initial_epoch=epoch_number, callbacks=callbacks)
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
# Backup terminal output once training is complete
shutil.copy2('/var/log/cloud-init-output.log', os.path.join(volume_mount_dir,
'cloud-init-output-{}.log'.format(today_date)))