in blog_train_algos.py [0:0]
def train(bucket, seq, algo, freq, prediction_length, epochs, learning_rate, hybridize, num_batches_per_epoch):
#create train dataset
df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] + "/train.csv", header=0, index_col=0)
training_data = ListDataset([{"start": df.index[0],
"target": df.usage[:],
"item_id": df.client[:]}],
freq=freq)
#create test dataset
df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] + "/test.csv", header=0, index_col=0)
test_data = ListDataset([{"start": df.index[0],
"target": df.usage[:],
"item_id": 'client_12'}],
freq=freq)
hook = Hook.create_from_json_file()
#determine estimators##################################
if algo == "DeepAR":
estimator = DeepAREstimator(freq=freq,
prediction_length=prediction_length,
context_length=1,
trainer=Trainer(ctx="cpu",
epochs=epochs,
learning_rate=learning_rate,
hybridize=hybridize,
num_batches_per_epoch=num_batches_per_epoch
))
#train the model
predictor = estimator.train(training_data=training_data)
print("DeepAR training is complete SUCCESS")
elif algo == "SFeedFwd":
estimator = SimpleFeedForwardEstimator(freq=freq,
prediction_length=prediction_length,
trainer=Trainer(ctx="cpu",
epochs=epochs,
learning_rate=learning_rate,
hybridize=hybridize,
num_batches_per_epoch=num_batches_per_epoch
))
#train the model
predictor = estimator.train(training_data=training_data)
print("training is complete SUCCESS")
elif algo == "lstnet":
# Needed for LSTNet ONLY
grouper = MultivariateGrouper(max_target_dim=6)
training_data = grouper(training_data)
test_data = grouper(test_data)
context_length = prediction_length
num_series = 1
skip_size = 1
ar_window = 1
channels = 4
estimator = LSTNetEstimator(freq=freq,
prediction_length=prediction_length,
context_length=context_length,
num_series=num_series,
skip_size=skip_size,
ar_window=ar_window,
channels=channels,
trainer=Trainer(ctx="cpu",
epochs=epochs,
learning_rate=learning_rate,
hybridize=hybridize,
num_batches_per_epoch=num_batches_per_epoch
))
#train the model
predictor = estimator.train(training_data=training_data)
print("training is complete SUCCESS")
elif algo == "seq2seq":
estimator = MQCNNEstimator(freq=freq,
prediction_length=prediction_length,
trainer=Trainer(ctx="cpu",
epochs=epochs,
learning_rate=learning_rate,
hybridize=hybridize,
num_batches_per_epoch=num_batches_per_epoch
))
#train the model
predictor = estimator.train(training_data=training_data)
print("training is complete SUCCESS")
else:
estimator = TransformerEstimator(freq=freq,
prediction_length=prediction_length,
trainer=Trainer(ctx="cpu",
epochs=epochs,
learning_rate=learning_rate,
hybridize=hybridize,
num_batches_per_epoch=num_batches_per_epoch
))
#train the model
predictor = estimator.train(training_data=training_data)
print("training is complete SUCCESS")
###################################################
#evaluate trained model on test data
forecast_it, ts_it = make_evaluation_predictions(test_data, predictor, num_samples=100)
print("EVALUATION is complete SUCCESS")
forecasts = list(forecast_it)
tss = list(ts_it)
evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_data))
print("METRICS retrieved SUCCESS")
#bucket = "bwp-sandbox"
mainpref = "gluonts/blog-models/"
prefix = mainpref + str(seq) + "/"
agg_df = pd.DataFrame(agg_metrics, index=[0])
file = "metrics"+str(seq)+".csv"
os.system('mkdir metrics')
cspath = os.path.join('metrics', file)
agg_df.to_csv(cspath)
s3.upload_file(cspath,bucket,mainpref+"metrics/"+file)
hook.save_scalar("MAPE", agg_metrics["MAPE"], sm_metric=True)
hook.save_scalar("RMSE", agg_metrics["RMSE"], sm_metric=True)
hook.save_scalar("MASE", agg_metrics["MASE"], sm_metric=True)
hook.save_scalar("MSE", agg_metrics["MSE"], sm_metric=True)
print("MAPE:", agg_metrics["MAPE"])
#save the model
predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR']))
uploadDirectory(os.environ['SM_MODEL_DIR'], prefix, bucket)
return predictor