def train()

in blog_train_algos.py [0:0]
104 lines of code
6 McCabe index (conditional complexity)

def train(bucket, seq, algo, freq, prediction_length, epochs, learning_rate, hybridize, num_batches_per_epoch):
    
    #create train dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] + "/train.csv", header=0, index_col=0)
    
    training_data = ListDataset([{"start": df.index[0], 
                                  "target": df.usage[:],
                                 "item_id": df.client[:]}], 
                                  freq=freq)
    
    
    #create test dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] + "/test.csv", header=0, index_col=0)
    
    test_data = ListDataset([{"start": df.index[0], 
                              "target": df.usage[:],
                              "item_id": 'client_12'}], 
                              freq=freq)
    
    hook = Hook.create_from_json_file()
    #determine estimators##################################
    if algo == "DeepAR":
        estimator = DeepAREstimator(freq=freq,  
                                       prediction_length=prediction_length,
                                       context_length=1,
                                       trainer=Trainer(ctx="cpu",
                                            epochs=epochs,
                                            learning_rate=learning_rate,
                                            hybridize=hybridize,
                                            num_batches_per_epoch=num_batches_per_epoch
                                         ))
    
        #train the model
        predictor = estimator.train(training_data=training_data)
        print("DeepAR training is complete SUCCESS")
    elif algo == "SFeedFwd":
        estimator = SimpleFeedForwardEstimator(freq=freq,  
                                       prediction_length=prediction_length,
                                       trainer=Trainer(ctx="cpu",
                                            epochs=epochs,
                                            learning_rate=learning_rate,
                                            hybridize=hybridize,
                                            num_batches_per_epoch=num_batches_per_epoch
                                         ))
    
        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    elif algo == "lstnet":
        # Needed for LSTNet ONLY
        grouper = MultivariateGrouper(max_target_dim=6)
        training_data = grouper(training_data)
        test_data = grouper(test_data)
        context_length = prediction_length
        num_series = 1
        skip_size = 1
        ar_window = 1
        channels = 4
        
        estimator = LSTNetEstimator(freq=freq,  
                                       prediction_length=prediction_length,
                                       context_length=context_length,
                                       num_series=num_series,
                                       skip_size=skip_size,
                                       ar_window=ar_window,
                                       channels=channels, 
                                       trainer=Trainer(ctx="cpu",
                                            epochs=epochs,
                                            learning_rate=learning_rate,
                                            hybridize=hybridize,
                                            num_batches_per_epoch=num_batches_per_epoch
                                         ))
    
        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    elif algo == "seq2seq":
        estimator = MQCNNEstimator(freq=freq,  
                                       prediction_length=prediction_length,
                                       trainer=Trainer(ctx="cpu",
                                            epochs=epochs,
                                            learning_rate=learning_rate,
                                            hybridize=hybridize,
                                            num_batches_per_epoch=num_batches_per_epoch
                                         ))
    
        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    else:
        estimator = TransformerEstimator(freq=freq,  
                                       prediction_length=prediction_length,
                                       trainer=Trainer(ctx="cpu",
                                            epochs=epochs,
                                            learning_rate=learning_rate,
                                            hybridize=hybridize,
                                            num_batches_per_epoch=num_batches_per_epoch
                                         ))
    
        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    
    ###################################################
    
    #evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_data, predictor,  num_samples=100)
    print("EVALUATION is complete SUCCESS")
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_data))
    print("METRICS retrieved SUCCESS")
    #bucket = "bwp-sandbox"
    
    mainpref = "gluonts/blog-models/"
    prefix = mainpref + str(seq) + "/"
    agg_df = pd.DataFrame(agg_metrics, index=[0])
    file = "metrics"+str(seq)+".csv"
    os.system('mkdir metrics')
    cspath = os.path.join('metrics', file)
    agg_df.to_csv(cspath)
    s3.upload_file(cspath,bucket,mainpref+"metrics/"+file)
    
    
    hook.save_scalar("MAPE", agg_metrics["MAPE"], sm_metric=True)
    hook.save_scalar("RMSE", agg_metrics["RMSE"], sm_metric=True)
    hook.save_scalar("MASE", agg_metrics["MASE"], sm_metric=True)
    hook.save_scalar("MSE", agg_metrics["MSE"], sm_metric=True)
    
    print("MAPE:", agg_metrics["MAPE"])
    
    #save the model
    predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR'])) 
    
    
    uploadDirectory(os.environ['SM_MODEL_DIR'], prefix, bucket)
    
    return predictor