In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Feedback or issues?

For any feedback or questions, please open an [issue](https://github.com/googleapis/python-aiplatform/issues).

# Vertex SDK for Python: AutoML Video Action Recognition Example
To use this Jupyter notebook, copy the notebook to a Google Cloud Notebooks instance with Tensorflow installed and open it. You can run each step, or cell, and see its results. To run a cell, use Shift+Enter. Jupyter automatically displays the return value of the last line in each cell. For more information about running notebooks in Google Cloud Notebook, see the [Google Cloud Notebook guide](https://cloud.google.com/vertex-ai/docs/general/notebooks).


This notebook demonstrate how to create an AutoML Video Action Recognition Model, with a Vertex AI video dataset, and how to serve the model for batch prediction. It will require you provide a bucket where the dataset will be stored.

Note: you may incur charges for training, prediction, storage or usage of other GCP products in connection with testing this SDK.

### Install Vertex SDK for Python


After the SDK installation the kernel will be automatically restarted.

In [None]:
!pip3 uninstall -y google-cloud-aiplatform
!pip3 install google-cloud-aiplatform
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

### Enter Your Project and GCS Bucket

Enter your Project Id in the cell below. Then run the cell to make sure the Cloud SDK uses the right project for all the commands in this notebook.

In [None]:
MY_PROJECT = "YOUR PROJECT ID"
MY_STAGING_BUCKET = "gs://YOUR BUCKET"  # bucket should be in same region as ucaip

In [None]:
import sys

if "google.colab" in sys.modules:
    import os

    from google.colab import auth

    auth.authenticate_user()
    os.environ["GOOGLE_CLOUD_PROJECT"] = MY_PROJECT

### Set Your Task Name, and GCS Prefix

If you want to centeralize all input and output files under the gcs location.

In [None]:
TASK_TYPE = "mbsdk_automl-video-training"
PREDICTION_TYPE = "action_recognition"
MODEL_TYPE = "CLOUD"

TASK_NAME = f"{TASK_TYPE}_{PREDICTION_TYPE}"
BUCKET_NAME = MY_STAGING_BUCKET.split("gs://")[1]
GCS_PREFIX = TASK_NAME

print(f"Bucket Name:    {BUCKET_NAME}")
print(f"Task Name:      {TASK_NAME}")

# HMDB: a large human motion database
We prepared some training data and prediction data for the demo using the [HMDB Dataset](https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database).

The HMDB Dataset is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit https://creativecommons.org/licenses/by/4.0/

For more information about this dataset please visit: https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/

In [None]:
automl_video_demo_train_data = "gs://automl-video-demo-data/hmdb_golf_swing_all.csv"
automl_video_demo_batch_prediction_data = (
    "gs://automl-video-demo-data/hmdb_golf_swing_predict.jsonl"
)

### Copy AutoML Video Demo Train Data for Creating Managed Dataset

In [None]:
gcs_source_train = f"gs://{BUCKET_NAME}/{TASK_NAME}/data/video_action_recognition.csv"

In [None]:
!gsutil cp $automl_video_demo_train_data $gcs_source_train

# Run AutoML Video Training with Managed Video Dataset

## Initialize Vertex SDK for Python

Initialize the *client* for Vertex AI.

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=MY_PROJECT, staging_bucket=MY_STAGING_BUCKET)

## Create a Dataset on Vertex AI
We will now create a Vertex AI video dataset using the previously prepared csv files. Choose one of the options below. 

Option 1: Using MBSDK VideoDataset class

In [None]:
dataset = aiplatform.VideoDataset.create(
    display_name=f"temp-{TASK_NAME}",
    gcs_source=gcs_source_train,
    import_schema_uri=aiplatform.schema.dataset.ioformat.video.action_recognition,
    sync=False,
)

Option 2: Using MBSDK Dataset class
```
dataset = aiplatform.Dataset.create(
    display_name=f'temp-{TASK_NAME}',
    metadata_schema_uri=aiplatform.schema.dataset.metadata.video,
    gcs_source=gcs_source_train, 
    import_schema_uri=aiplatform.schema.dataset.ioformat.video.action_recognition,
    sync=False
)
```

In [None]:
dataset.wait()

## Launch a Training Job and Create a Model on Vertex AI

### Config a Training Job

In [None]:
job = aiplatform.AutoMLVideoTrainingJob(
    display_name=f"temp-{TASK_NAME}",
    prediction_type=PREDICTION_TYPE,
    model_type=MODEL_TYPE,
)

### Run the Training Job

In [None]:
model = job.run(
    dataset=dataset,
    training_fraction_split=0.8,
    test_fraction_split=0.2,
    model_display_name=f"temp-{TASK_NAME}",
    sync=False,
)

In [None]:
model.wait()

# Batch Prediction Job on the Model

### Copy AutoML Video Demo Prediction Data for Creating Batch Prediction Job

In [None]:
gcs_source_batch_prediction = f"gs://{BUCKET_NAME}/{TASK_NAME}/data/video_action_recognition_batch_prediction.jsonl"
gcs_destination_prefix_batch_prediction = (
    f"gs://{BUCKET_NAME}/{TASK_NAME}/batch_prediction"
)

In [None]:
!gsutil cp $automl_video_demo_batch_prediction_data $gcs_source_batch_prediction

In [None]:
batch_predict_job = model.batch_predict(
    job_display_name=f"temp-{TASK_NAME}",
    gcs_source=gcs_source_batch_prediction,
    gcs_destination_prefix=gcs_destination_prefix_batch_prediction,
    sync=False,
)

In [None]:
batch_predict_job.wait()
bp_iter_outputs = batch_predict_job.iter_outputs()

prediction_results = list()
for blob in bp_iter_outputs:
    if blob.name.split("/")[-1].startswith("prediction"):
        prediction_results.append(blob.name)

In [None]:
import json

import tensorflow as tf

tags = list()
for prediction_result in prediction_results:
    gfile_name = f"gs://{bp_iter_outputs.bucket.name}/{prediction_result}"
    with tf.io.gfile.GFile(name=gfile_name, mode="r") as gfile:
        for line in gfile.readlines():
            line = json.loads(line)
            break

In [None]:
line