def train_and_evaluate()

in courses/DSL/challenge-mlprep/fraud_detection/trainer/model.py [0:0]


def train_and_evaluate(hparams):

    # Parse parameters for train and evaluate process. Includes:
    # num_bins: Number of buckets for float-valued columns
    # hash_bkts: Number of hash buckets for idOrig and idDest features
    # batch_size: Batch size for training loop
    # train_data_path: Location of training data CSVs
    # eval_data_path: Location of eval data CSVs
    # num_evals: Number of evaluations to perform during training process
    # num_examples_to_train_on: Total number of examples to train on
    # output_dir: Output directory for model artifacts post-training

    num_bins = hparams["num_bins"]
    hash_bkts = hparams["hash_bkts"]
    batch_size = hparams["batch_size"]
    train_data_path = hparams["train_data_path"]
    eval_data_path = hparams["eval_data_path"]
    num_evals = hparams["num_evals"]
    num_examples_to_train_on = hparams["num_examples_to_train_on"]
    output_dir = hparams["output_dir"]

    # Define output paths for model artifacts and create directory if needed

    model_export_path = os.path.join(output_dir, "savedmodel")
    checkpoint_path = os.path.join(output_dir, "checkpoints")
    tensorboard_path = os.path.join(output_dir, "tensorboard")

    if tf.io.gfile.exists(output_dir):
        tf.io.gfile.rmtree(output_dir)
        
    # Create training, adaptation and evaluation datasets using helper functions

    trainds = create_train_dataset(train_data_path, batch_size)
    adaptds = create_adapt_dataset(train_data_path, batch_size)
    evalds = create_eval_dataset(eval_data_path, batch_size)
    
    # Build DNN model and print summary to logs using helper function

    model = build_dnn_model(adaptds, num_bins, hash_bkts)
    logging.info(model.summary())

    # Define number of training steps per evaluation during training process
    steps_per_epoch = num_examples_to_train_on // (batch_size * num_evals)

    # Define callbacks to save model checkpoints (per eval) and log metrics for Tensorboard
    checkpoint_cb = callbacks.ModelCheckpoint(
        checkpoint_path, save_weights_only=True, verbose=1
    )
    tensorboard_cb = callbacks.TensorBoard(tensorboard_path, histogram_freq=0)

    # Train Keras model and store evaluation metrics in dictionary called history
    history = model.fit(
        trainds,
        validation_data=evalds,
        epochs=num_evals,
        steps_per_epoch=max(1, steps_per_epoch),
        verbose=1,  # 0=silent, 1=progress bar, 2=one line per epoch
        callbacks=[checkpoint_cb, tensorboard_cb],
    )
    
    # Code to export metrics to Vertex AI for hyperparameter tuning jobs
    hp_metric = history.history['val_auc'][-1]

    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='ROC AUC',
        metric_value=hp_metric,
        global_step=num_evals)

    # Exporting the model with default serving function.
    model.save(model_export_path)
    return history