In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Tabular Workflows: TabNet Pipeline

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/tabular_workflows/tabnet_on_vertex_pipelines.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2Ftabular_workflows%2Ftabnet_on_vertex_pipelines.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/tabular_workflows/tabnet_on_vertex_pipelines.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/tabular_workflows/tabnet_on_vertex_pipelines.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

**_NOTE_**: This notebook has been tested in the following environment:

* Python version = 3.9

## Overview

This notebook showcases how to run the TabNet algorithm using Vertex AI Tabular Workflows.

Learn more about [Tabular Workflow for TabNet](https://cloud.google.com/vertex-ai/docs/tabular-data/tabular-workflows/tabnet).

### Objective

In this tutorial, you learn how to create classification models on tabular data using two of the Vertex AI TabNet Tabular Workflows. Each workflow is a managed instance of [Vertex AI Pipelines](https://cloud.google.com/vertex-ai/docs/pipelines/introduction).

This tutorial uses the following Google Cloud ML services and resources:

- Vertex AI Training
- Vertex AI Pipelines
- Cloud Storage

The steps performed include:

- Create a TabNet CustomJob. This is the best option if you know which hyperparameters to use for training.
- Create a TabNet HyperparameterTuningJob. This allows you to get the best set of hyperparameters for your dataset.

After training, each pipeline returns a link to the Vertex Model UI. You can use the UI to deploy the model, get online predictions, or run batch prediction.

### Dataset

The dataset you use in this notebook is the [Bank Marketing](https://archive.ics.uci.edu/ml/datasets/bank+marketing) dataset.
It consists of data related to direct marketing campaigns (phone calls) of a Portuguese banking institution. The objective of the binary classification task in this notebook is to predict if a client subscribes to a term deposit or not. 

For this notebook, a subset of randomly selected rows that makes 90% of the original dataset was saved to `train.csv` file and hosted on Cloud Storage. To download the file, click [here](https://storage.googleapis.com/cloud-samples-data-us-central1/vertex-ai/tabular-workflows/datasets/bank-marketing/train.csv).

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloud Storage
pricing](https://cloud.google.com/storage/pricing), and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

## Get Started

### Install Vertex AI SDK for Python and other required packages

In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform \
                                 google-cloud-pipeline-components

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>

### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project. Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type: "string"}

### Create a Cloud Storage bucket

Create a storage bucket to store intermediate artifacts such as datasets.

When you submit a training job using the Cloud SDK, you upload a Python package
containing your training code to a Cloud Storage bucket. Vertex AI runs
the code from this package. In this tutorial, Vertex AI also saves the
trained model that results from your job in the same bucket. Using this model artifact, you can then
create Vertex AI Model resource and use for prediction.

In [None]:
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

### Notes about service account and permission

**By default no configuration is required**, if you run into any permission related issue, please make sure the service accounts have the required roles listed in the [Service accounts for Tabular Workflow for TabNet, and Tabular Workflow for Wide & Deep, and Prophet documentation](https://cloud.google.com/vertex-ai/docs/tabular-data/tabular-workflows/service-accounts#fte-workflow).

#### Service Account

You use a service account to create Vertex AI Pipeline jobs. If you don't want to use your project's Compute Engine service account, set `SERVICE_ACCOUNT` to another service account ID.

In [None]:
SERVICE_ACCOUNT = "[your-service-account]"  # @param {type:"string"}

In [None]:
import sys

IS_COLAB = "google.colab" in sys.modules
if (
    SERVICE_ACCOUNT == ""
    or SERVICE_ACCOUNT is None
    or SERVICE_ACCOUNT == "[your-service-account]"
):
    # Get your service account from gcloud
    if not IS_COLAB:
        shell_output = !gcloud auth list 2>/dev/null
        SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()

    else:  # IS_COLAB:
        shell_output = ! gcloud projects describe  $PROJECT_ID
        project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
        SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"

    print("Service Account:", SERVICE_ACCOUNT)

#### Set service account access for Vertex AI Pipelines
Run the following commands to grant your service account access to read and write pipeline artifacts in the bucket that you created in the previous step. You only need to run this step once per service account.

In [None]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI

! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI

### Import libraries

In [None]:
# Import required modules
import os
from typing import Any, Dict, List

from google.cloud import aiplatform, storage
from google_cloud_pipeline_components.preview.automl.tabular import \
    utils as automl_tabular_utils

### Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project.

In [None]:
aiplatform.init(project=PROJECT_ID, location=LOCATION)

## Define helper functions
Define the following helper functions:

- `get_model_artifacts_path`: Gets the model artifacts path from task details.
- `get_model_uri`: Gets the model uri from the task details.
- `get_bucket_name_and_path`: Gets the bucket name and path.
- `download_from_gcs`: Downloads the content from the bucket.
- `write_to_gcs`: Uploads content into the bucket.
- `get_task_detail`: Gets the task details by using task name.
- `get_model_name`: Gets the model name from pipeline job ID.
- `get_evaluation_metrics`: Gets the evaluation metrics from pipeline task details.


In [None]:
# Get the model artifacts path from task details.


def get_model_artifacts_path(task_details: List[Dict[str, Any]], task_name: str) -> str:
    task = get_task_detail(task_details, task_name)
    return task.outputs["unmanaged_container_model"].artifacts[0].uri


# Get the model uri from the task details.
def get_model_uri(task_details: List[Dict[str, Any]]) -> str:
    task = get_task_detail(task_details, "model-upload")
    # in format https://<location>-aiplatform.googleapis.com/v1/projects/<project_number>/locations/<location>/models/<model_id>
    model_id = task.outputs["model"].artifacts[0].uri.split("/")[-1]
    return f"https://console.cloud.google.com/vertex-ai/locations/{LOCATION}/models/{model_id}?project={PROJECT_ID}"


# Get the bucket name and path.
def get_bucket_name_and_path(uri: str) -> str:
    no_prefix_uri = uri[len("gs://") :]
    splits = no_prefix_uri.split("/")
    return splits[0], "/".join(splits[1:])


# Get the content from the bucket.
def download_from_gcs(uri: str) -> str:
    bucket_name, path = get_bucket_name_and_path(uri)
    storage_client = storage.Client(project=PROJECT_ID)
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(path)
    return blob.download_as_string()


# Upload content into the bucket.
def write_to_gcs(uri: str, content: str):
    bucket_name, path = get_bucket_name_and_path(uri)
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(path)
    blob.upload_from_string(content)


# Get the task details by using task name.
def get_task_detail(
    task_details: List[Dict[str, Any]], task_name: str
) -> List[Dict[str, Any]]:
    for task_detail in task_details:
        if task_detail.task_name == task_name:
            return task_detail


# Get the model name from pipeline job ID.
def get_model_name(job_id: str) -> str:
    pipeline_task_details = aiplatform.PipelineJob.get(
        job_id
    ).gca_resource.job_detail.task_details
    upload_task_details = get_task_detail(pipeline_task_details, "model-upload")
    return upload_task_details.outputs["model"].artifacts[0].metadata["resourceName"]


# Get the evaluation metrics.
def get_evaluation_metrics(
    task_details: List[Dict[str, Any]],
) -> str:
    ensemble_task = get_task_detail(task_details, "model-evaluation")
    return download_from_gcs(
        ensemble_task.outputs["evaluation_metrics"].artifacts[0].uri
    )

## Define training specifications

Before creating the training job, you create the below steps in this section:

1. Configure the source dataset.
2. Configure the feature transformation process.
3. Configure the feature selection process.
4. Set up the parameters needed for running the training process.

### Configure the dataset

You define either of the following parameters:

- `data_source_csv_filenames`: The CSV data source. You specify the Cloud Storage path to the `train.csv` file described in the dataset section.
- `data_source_bigquery_table_path`: The BigQuery data source. As you use the Cloud Storage source, this is kept as none.

***Notes***: Please note that the dataset's location has to be the same as the same as the service location (i.e., `REGION`) set for launching the training pipeline.


In [None]:
data_source_csv_filenames = "gs://cloud-samples-data-us-central1/vertex-ai/tabular-workflows/datasets/bank-marketing/train.csv"
data_source_bigquery_table_path = (
    None  # @param {type:"string"}, format: bq://bq_project.bq_dataset.bq_table
)

### Configure feature transformation

Transformations can be specified using Feature Transform Engine (FTE) specific configurations. FTE supports both TensorFlow-based row-level and BigQuery-based dataset-level transformations.

* **TensorFlow-based row-level transformations**:
  * Full automatic transformations: FTE automatically configures a set of built-in transformations for each input column based on its data statistics. This can be set via `tf_auto_transform_features` in the training pipeline.
  * Fully specified transformations: All transformations on input columns are explicitly specified with FTE's built-in transformations. Chaining of multiple transformations on a single column is also supported. These transformations can be saved to JSON configuration file and specified via `tf_transformations_path` argument of the training pipeline.
  * Custom transformations: Custom, bring-your-own transform function, where you can define and import your own transform function and use it with other FTE's built-in transformations. You can specify custom transformations as an array of JSON object and pass through the `tf_custom_transformation_definitions` argument of the training pipeline.
  

* **BigQuery-based dataset-level transformations**:
  * Fully specified transformations: All transformations on input columns are explicitly specified with FTE's built-in transformations. These transformations can be specified as an array of JSON objects via `dataset_level_transformations` argument of the training pipeline.
  * Custom transformations: Custom, bring-your-own transform function, where you can define and import your own transform function and use it with other FTE's built-in transformations. You can specify custom transformations as an array of JSON object and pass through the `dataset_level_custom_transformation_definitions` argument of the training pipeline.

Below, you configure full automatic transformations by specifying a list of input features to pass to the `tf_auto_transform_features` argument of the training pipeline.

Learn more about [feature transformation configurations](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-1.0.31/google_cloud_pipeline_components.experimental.automl.tabular.html#google_cloud_pipeline_components.experimental.automl.tabular.FeatureTransformEngineOp).

In [None]:
auto_transform_features = [
    "age",
    "job",
    "marital",
    "education",
    "default",
    "balance",
    "housing",
    "loan",
    "contact",
    "day",
    "month",
    "duration",
    "campaign",
    "pdays",
    "previous",
    "poutcome",
]

### Configure feature selection

In addition to transformations, you can also apply feature selection via Feature Transform Engine to use only highly ranked features, evaluated by supported algorithms. If enabled, it will be applied right after dataset level transformations, and exclude any feature that's not selected.

To enable it, you need to set `run_feature_selection` to True.

To configure the algorihtm to use, and number of features to be selected, you need to configure both `feature_selection_algorithm` and `max_selected_features` parameters.

Learn more about [feature selection algorithms and configurations](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-1.0.31/google_cloud_pipeline_components.experimental.automl.tabular.html#google_cloud_pipeline_components.experimental.automl.tabular.FeatureTransformEngineOp).

In [None]:
RUN_FEATURE_SELECTION = True  # @param {type:"boolean"}

FEATURE_SELECTION_ALGORITHM = "AMI"  # @param {type:"string"}

MAX_SELECTED_FEATURES = 10  # @param {type:"integer"}

### Setup training configuration

Now, you define the following parameters for training:

- `target_column`: The target column name.
- `prediction_type`: The type of prediction the model is to produce.
  'classification' or 'regression'.
- `predefined_split_key`: The predefined_split column name.
- `timestamp_split_key`: The timestamp_split column name.
- `stratified_split_key`: The stratified_split column name.
- `training_fraction`: The training fraction.
- `validation_fraction`: The validation fraction.
- `test_fraction`: The test fraction.
- `weight_column`: The weight column name.
- `run_evaluation`: Whether to run evaluation steps during training.

In [None]:
run_evaluation = True  # @param {type:"boolean"}
prediction_type = "classification"
target_column = "deposit"

# Fraction split
training_fraction = 0.8
validation_fraction = 0.1
test_fraction = 0.1

timestamp_split_key = None  # timestamp column name when using timestamp split
stratified_split_key = None  # target column name when using stratified split

predefined_split_key = None
if predefined_split_key:
    training_fraction = None
    validation_fraction = None
    test_fraction = None

weight_column = None

## Setup VPC configuration for Dataflow

In this section, you define the following parameters:

- `dataflow_subnetwork`: Dataflow's fully qualified subnetwork name, when empty the default subnetwork is used. See an [example](
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications).
- `dataflow_use_public_ips`: Specifies whether Dataflow workers use public IP
  addresses.

If you need to use a custom Dataflow subnetwork, you can set it through the `dataflow_subnetwork` parameter. The requirements are:
1. `dataflow_subnetwork` must be a fully qualified subnetwork name.
   [[reference](https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications)]
1. The following service accounts must have [Compute Network User role](https://cloud.google.com/compute/docs/access/iam#compute.networkUser) assigned on the specified dataflow subnetwork [[reference](https://cloud.google.com/dataflow/docs/guides/specifying-networks#shared)]:
    1. Compute Engine default service account: PROJECT_NUMBER-compute@developer.gserviceaccount.com
    1. Dataflow service account: service-PROJECT_NUMBER@dataflow-service-producer-prod.iam.gserviceaccount.com

If your project has VPC-SC enabled, please make sure of the following:

1. The dataflow subnetwork used in VPC-SC is configured properly for Dataflow.
   See [reference](https://cloud.google.com/dataflow/docs/guides/routes-firewall).
1. `dataflow_use_public_ips` is set to False.


In [None]:
dataflow_subnetwork = ""  # @param {type:"string"}
dataflow_use_public_ips = True  # @param {type:"boolean"}

## Customize TabNet CustomJob configuration and create pipeline

Creating a TabNet CustomJob is the best choice if you know exactly which hyperparameter values to use for model training. It uses fewer training resources than a HyperparameterTuningJob.

In the example below, you configure the following key parameters:

- `root_dir`: The root GCS directory for the pipeline components.
- `worker_pool_specs_override`: The dictionary for overriding training and evaluation worker pool specs. The dictionary should follow a [particular format]( https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172). TabNet supports training using both CPUs and GPUs.
- `learning_rate`: The learning rate used by the linear optimizer.
- `max_steps`: Number of steps to run the trainer for.
- `max_train_secs`: Amount of time in seconds to run the trainer for.

Learn more about [pipeline inputs and model hyperparameters](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-1.0.23/google_cloud_pipeline_components.experimental.automl.tabular.html#google_cloud_pipeline_components.experimental.automl.tabular.utils.get_tabnet_trainer_pipeline_and_parameters).

Learn more about the parameters needed for [creating a pipeline job](https://cloud.google.com/vertex-ai/docs/pipelines/run-pipeline#create_a_pipeline_run).

In [None]:
# set a unique display name for your pipeline
pipeline_job_id = "tabnet-unique"  # @param {type: "string"}
# set the root dir
pipeline_job_root_dir = os.path.join(BUCKET_URI, "tabnet_custom_job")
# set the worker pool specs
worker_pool_specs_override = [
    {"machine_spec": {"machine_type": "c2-standard-16"}}  # Override for TF chief node
]
# set the learning rate
learning_rate = 0.01
# max_steps and/or max_train_secs must be set. If both are
# specified, training stop after either condition is met.
# By default, max_train_secs is set to -1.
max_steps = 20

max_train_secs = -1

# To test GPU training, the worker_pool_specs_override can be specified like this.
# worker_pool_specs_override =  [
#     {"machine_spec": {
#       'machine_type': "n1-highmem-32",
#       "accelerator_type": "NVIDIA_TESLA_V100",
#       "accelerator_count": 2
#       }
#     }
#   ]

# define the pipeline
# If your system does not use Python, you can save the JSON file (`template_path`),
# and use another programming language to submit the pipeline.
(
    template_path,
    parameter_values,
) = automl_tabular_utils.get_tabnet_trainer_pipeline_and_parameters(
    project=PROJECT_ID,
    location=LOCATION,
    root_dir=pipeline_job_root_dir,
    max_steps=max_steps,
    max_train_secs=max_train_secs,
    learning_rate=learning_rate,
    target_column=target_column,
    prediction_type=prediction_type,
    tf_auto_transform_features=auto_transform_features,
    run_feature_selection=RUN_FEATURE_SELECTION,
    feature_selection_algorithm=FEATURE_SELECTION_ALGORITHM,
    max_selected_features=MAX_SELECTED_FEATURES,
    training_fraction=training_fraction,
    validation_fraction=validation_fraction,
    test_fraction=test_fraction,
    data_source_csv_filenames=data_source_csv_filenames,
    data_source_bigquery_table_path=data_source_bigquery_table_path,
    worker_pool_specs_override=worker_pool_specs_override,
    dataflow_use_public_ips=dataflow_use_public_ips,
    dataflow_subnetwork=dataflow_subnetwork,
    run_evaluation=run_evaluation,
)

# create the pipeline job
training_pipeline_job = aiplatform.PipelineJob(
    display_name=pipeline_job_id,
    template_path=template_path,
    job_id=pipeline_job_id,
    pipeline_root=pipeline_job_root_dir,
    parameter_values=parameter_values,
    enable_caching=False,
)

# run the pipeline
training_pipeline_job.run(service_account=SERVICE_ACCOUNT)

### Go to the Vertex Model UI

Through the link generated from the below cell, you can deploy the model and run online prediction or batch prediction.

In [None]:
tabnet_trainer_pipeline_task_details = aiplatform.PipelineJob.get(
    pipeline_job_id
).gca_resource.job_detail.task_details
CUSTOM_JOB_MODEL = get_model_name(pipeline_job_id)
print("model uri:", get_model_uri(tabnet_trainer_pipeline_task_details))
print(
    "model artifacts:",
    get_model_artifacts_path(tabnet_trainer_pipeline_task_details, "tabnet-trainer"),
)

## Customize TabNet HyperparameterTuningJob configuration and create pipeline

To get the best set of hyperparameters on your dataset, it's recommended that you run a HyperparameterTuningJob.

Hyperparameters that can be tuned are set with the optional `study_spec_parameters_override` parameter. You provide a helper function named `get_tabnet_study_spec_parameters_override` to get these hyperparameters. To this helper function, you provide:

- `dataset_size_bucket`: one of 'small' (< 1M rows), 'medium' (1M - 100M rows), or 'large' (> 100M rows)).
- `training_budget_bucket`: one of 'small' (< \\$600), 'medium' (\\$600 - \\$2400), or 'large' (> \\$2400)).
- `prediction_type`: The type of prediction the model is to produce. “classification” or “regression”.

Then, you get the list of hyperparameters and ranges. `study_spec_parameters_override` can be empty or one or more of the above hyperparameters can be specified. For hyperparameters not specified, you can set their ranges in the pipeline. Learn more about the [hyperparameters available for tuning](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-1.0.23/google_cloud_pipeline_components.experimental.automl.tabular.html#google_cloud_pipeline_components.experimental.automl.tabular.utils.get_tabnet_trainer_pipeline_and_parameters).

In addition to hyperparameters, HyperparameterTuningJob takes the following values:

- `root_dir`: The root GCS directory for the pipeline components.
- `worker_pool_specs_override`: The dictionary for overriding training and evaluation worker pool specs. The dictionary should follow a [particular format]( https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172). TabNet supports training using both CPUs and GPUs.
- `study_spec_metric_id`: Metric to optimize, possible values: ['loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall'].
- `study_spec_metric_goal`: Optimization goal of the metric, possible values: "MAXIMIZE", "MINIMIZE".
- `max_trial_count`: The desired total number of trials.
- `parallel_trial_count`: The desired number of trials to run in parallel.
- `max_failed_trial_count`: The number of failed trials that need to be seen before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides how many trials must fail before the whole job fails.
- `study_spec_algorithm`: The search algorithm specified for the study. One of
'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or 'RANDOM_SEARCH'.

Learno more about the [HyperparameterTuningJob parameters](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-1.0.23/google_cloud_pipeline_components.experimental.automl.tabular.html#google_cloud_pipeline_components.experimental.automl.tabular.utils.get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters).

Multiple trials can be configured. The pipeline returns the best trial based on the metric specified in `study_spec_metrics`. In the example below, you return the trial with the lowest loss value.

In [None]:
# set a unique display name for pipeline
pipeline_job_id = "tabnet-hpt-unique"  # @param {type: "string"}
# set the root dir
pipeline_job_root_dir = os.path.join(BUCKET_URI, "tabnet_hyperparameter_tuning_job")
# set the worker pool specs
worker_pool_specs_override = [
    {"machine_spec": {"machine_type": "c2-standard-16"}}  # Override for TF chief node
]
# set the metric
study_spec_metric_id = "loss"
# set the objective for metric
study_spec_metric_goal = "MINIMIZE"

# To test GPU training, the worker_pool_specs_override can be specified like this.
# worker_pool_specs_override =  [
#    {
#       "machine_spec":{
#          "machine_type":"n1-highmem-32",
#          "accelerator_type":"NVIDIA_TESLA_V100",
#          "accelerator_count":2
#       }
#    }
# ]


# define the component to get the hyperparameters
# max_steps and/or max_train_secs must be set. If both are
# specified, training stop after either condition is met.
# By default, max_train_secs is set to -1 and max_steps is set to
# an appropriate range given dataset_size and training budget.
study_spec_parameters_override = (
    automl_tabular_utils.get_tabnet_study_spec_parameters_override(
        dataset_size_bucket="small",
        prediction_type=prediction_type,
        training_budget_bucket="small",
    )
)

# define the hyperparameter tuning pipeline
# If your system does not use Python, you can save the JSON file (`template_path`),
# and use another programming language to submit the pipeline.
(
    template_path,
    parameter_values,
) = automl_tabular_utils.get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters(
    project=PROJECT_ID,
    location=LOCATION,
    root_dir=pipeline_job_root_dir,
    target_column=target_column,
    prediction_type=prediction_type,
    tf_auto_transform_features=auto_transform_features,
    run_feature_selection=RUN_FEATURE_SELECTION,
    feature_selection_algorithm=FEATURE_SELECTION_ALGORITHM,
    max_selected_features=MAX_SELECTED_FEATURES,
    training_fraction=training_fraction,
    validation_fraction=validation_fraction,
    test_fraction=test_fraction,
    data_source_csv_filenames=data_source_csv_filenames,
    data_source_bigquery_table_path=data_source_bigquery_table_path,
    study_spec_metric_id=study_spec_metric_id,
    study_spec_metric_goal=study_spec_metric_goal,
    study_spec_parameters_override=study_spec_parameters_override,
    max_trial_count=1,
    parallel_trial_count=1,
    max_failed_trial_count=0,
    worker_pool_specs_override=worker_pool_specs_override,
    dataflow_use_public_ips=dataflow_use_public_ips,
    dataflow_subnetwork=dataflow_subnetwork,
    run_evaluation=True,
)

# create the pipeline job
tuning_pipeline_job = aiplatform.PipelineJob(
    display_name=pipeline_job_id,
    template_path=template_path,
    job_id=pipeline_job_id,
    pipeline_root=pipeline_job_root_dir,
    parameter_values=parameter_values,
    enable_caching=False,
)

# run the pipeline job
tuning_pipeline_job.run(service_account=SERVICE_ACCOUNT)

### Go to the Vertex Model UI

Through the link generated from the below cell, you can deploy the model and run online prediction or batch prediction.

In [None]:
tabnet_hpt_pipeline_task_details = aiplatform.PipelineJob.get(
    pipeline_job_id
).gca_resource.job_detail.task_details
HPT_JOB_MODEL = get_model_name(pipeline_job_id)

print("model uri:", get_model_uri(tabnet_hpt_pipeline_task_details))
print(
    "model artifacts:",
    get_model_artifacts_path(
        tabnet_hpt_pipeline_task_details, "get-best-hyperparameter-tuning-job-trial"
    ),
)

## Clean up Vertex and BigQuery resources

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.

Otherwise, you can delete the individual resources you created in this tutorial:

- Pipeline from CustomJob pipeline
- Pipeline from HyperparameterTuningJob pipeline
- Model from CustomJob pipeline
- Model from HyperparameterTuningJob pipeline
- Cloud Storage Bucket (set `delete_bucket` to True to delete the bucket)

In [None]:
# Delete the training pipeline job
training_pipeline_job.delete()

# Delete the tuning pipeline job
tuning_pipeline_job.delete()

# Delete model resources
custom_job_model = aiplatform.Model(CUSTOM_JOB_MODEL)
hpt_job_model = aiplatform.Model(HPT_JOB_MODEL)
custom_job_model.delete()
hpt_job_model.delete()

# Delete Cloud Storage objects that were created
delete_bucket = False  # Set True for deletion
if delete_bucket:
    ! gsutil -m rm -r $BUCKET_URI