In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Feedback or issues?

For any feedback or questions, please open an [issue](https://github.com/googleapis/python-aiplatform/issues).

# Explainable AI via MB SDK on Custom Tabular model

To use this Jupyter notebook, copy the notebook to a Google Cloud Notebooks instance with Tensorflow installed and open it. You can run each step, or cell, and see its results. To run a cell, use Shift+Enter. Jupyter automatically displays the return value of the last line in each cell. For more information about running notebooks in Google Cloud Notebook, see the [Google Cloud Notebook guide](https://cloud.google.com/vertex-ai/docs/general/notebooks).


This notebook demonstrate how to create an Custom Tabular Model and how to serve the model for online prediction with Explainability.

Note: you may incur charges for training, prediction, storage or usage of other GCP products in connection with testing this SDK.

### Install Vertex SDK for Python


After the SDK installation the kernel will be automatically restarted.

In [None]:
%%capture
!pip3 uninstall -y google-cloud-aiplatform
!pip3 install -y google-cloud-aiplatform tabulate
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Enter Your Project and GCS Bucket

Enter your Project Id in the cell below. Then run the cell to make sure the Cloud SDK uses the right project for all the commands in this notebook.

In [None]:
MY_PROJECT = "YOUR PROJECT ID"
MY_STAGING_BUCKET = "gs://YOUR BUCKET"  # bucket should be in same region as ucaip

## Set up SDK workspace

In [None]:
import uuid

import tensorflow as tf
from google.cloud import aiplatform
from tabulate import tabulate

## Initialize Vertex SDK for Python

Initialize the *client* for Vertex AI.

In [None]:
aiplatform.init(project=MY_PROJECT, staging_bucket=MY_STAGING_BUCKET)

## Create Training Script that saves Explainable model

In [None]:
%%writefile training_script.py
# Single, Mirror and Multi-Machine Distributed Training for CIFAR-10

from explainable_ai_sdk.metadata.tf.v2 import SavedModelMetadataBuilder

from tensorflow.python.client import device_lib
import tensorflow_datasets as tfds
import tensorflow as tf

import numpy as np
import tempfile
import argparse
import sys
import os

tfds.disable_progress_bar()

parser = argparse.ArgumentParser()
parser.add_argument('--model-dir', dest='model_dir',
                    default=os.getenv('AIP_MODEL_DIR'), type=str, help='Model dir.')
parser.add_argument('--lr', dest='lr',
                    default=0.001, type=float,
                    help='Learning rate.')
parser.add_argument('--epochs', dest='epochs',
                    default=20, type=int,
                    help='Number of epochs.')
parser.add_argument('--steps', dest='steps',
                    default=100, type=int,
                    help='Number of steps per epoch.')
parser.add_argument('--distribute', dest='distribute', type=str, default='single',
                    help='distributed training strategy')
args = parser.parse_args()

print('Python Version = {}'.format(sys.version))
print('TensorFlow Version = {}'.format(tf.__version__))
print('TF_CONFIG = {}'.format(os.environ.get('TF_CONFIG', 'Not found')))

# Single Machine, single compute device
if args.distribute == 'single':
    if tf.test.is_gpu_available():
        strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
    else:
        strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
# Single Machine, multiple compute device
elif args.distribute == 'mirror':
    strategy = tf.distribute.MirroredStrategy()
# Multiple Machine, multiple compute device
elif args.distribute == 'multi':
    strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

# Multi-worker configuration
print('num_replicas_in_sync = {}'.format(strategy.num_replicas_in_sync))

def make_dataset():
  # Scaling Boston Housing data features
  def scale(feature):
    max = np.max(feature)
    feature = (feature / max).astype(np.float)
    return feature

  (x_train, y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data(
    path="boston_housing.npz", test_split=0.2, seed=113
  )
  for _ in range(13):
    x_train[_] = scale(x_train[_])
    x_test[_] = scale(x_test[_])
  return (x_train, y_train), (x_test, y_test)

# Build the Keras model
def build_and_compile_dnn_model():
  model = tf.keras.Sequential([
      tf.keras.layers.Dense(128, activation='relu', input_shape=(13,)),
      tf.keras.layers.Dense(128, activation='relu'),
      tf.keras.layers.Dense(1, activation='linear')
  ])
  model.compile(
      loss='mse',
      optimizer=tf.keras.optimizers.RMSprop(learning_rate=args.lr))
  return model

# Train the model
NUM_WORKERS = strategy.num_replicas_in_sync
# Here the batch size scales up by number of workers since
# `tf.data.Dataset.batch` expects the global batch size.
BATCH_SIZE = 16
GLOBAL_BATCH_SIZE = BATCH_SIZE * NUM_WORKERS

with strategy.scope():
  # Creation of dataset, and model building/compiling need to be within
  # `strategy.scope()`.
  model = build_and_compile_dnn_model()

# Train the model
(x_train, y_train), (x_test, y_test) = make_dataset()
model.fit(x_train, y_train, epochs=args.epochs, batch_size=GLOBAL_BATCH_SIZE)

tmpdir = tempfile.mkdtemp()

model.save(tmpdir)

# Save TF Model with Explainable metadata to GCS
builder = SavedModelMetadataBuilder(tmpdir)
builder.save_model_with_metadata(args.model_dir)


## Launch a Training Job and Create a Model on Vertex AI

### Config a Training Job

In [None]:
job = aiplatform.CustomTrainingJob(
    display_name=f"temp-mbsdk-explainable-ai-custom-tabular-nb-{uuid.uuid4()}",
    script_path="training_script.py",
    container_uri="gcr.io/cloud-aiplatform/training/tf-gpu.2-1:latest",
    requirements=[
        "tensorflow_datasets",
        "explainable-ai-sdk",
    ],
    model_serving_container_image_uri="gcr.io/cloud-aiplatform/prediction/tf2-gpu.2-1:latest",
)

### Run the Training Job

In [None]:
model = job.run(
    model_display_name="temp-boston-housing-mbsdk-explainable-tabular-model",
    replica_count=1,
    machine_type="n1-standard-4",
    accelerator_type="NVIDIA_TESLA_K80",
    accelerator_count=1,
    args=["--epochs=50", "--distribute=single"],
)

In [None]:
# Get info about the Custom Job
print(
    f"Display Name:\t{job.display_name}\n"
    f"Resource Name:\t{job.resource_name}\n"
    f"Current State:\t{job.state.name}\n"
)

In [None]:
# Get path to saved model in GCS
output_dir = model._gca_resource.artifact_uri

## Build the Explanation Metadata and Parameters

In [None]:
loaded = tf.keras.models.load_model(output_dir)

In [None]:
serving_input = list(
    loaded.signatures["serving_default"].structured_input_signature[1].keys()
)[0]
serving_output = list(loaded.signatures["serving_default"].structured_outputs.keys())[0]
feature_names = [
    "crim",
    "zn",
    "indus",
    "chas",
    "nox",
    "rm",
    "age",
    "dis",
    "rad",
    "tax",
    "ptratio",
    "b",
    "lstat",
]

In [None]:
explain_params = aiplatform.explain.ExplanationParameters(
    {"sampled_shapley_attribution": {"path_count": 10}}
)

In [None]:
input_metadata = {
    "input_tensor_name": serving_input,
    "encoding": "BAG_OF_FEATURES",
    "modality": "numeric",
    "index_feature_mapping": feature_names,
}
output_metadata = {"output_tensor_name": serving_output}

input_metadata = aiplatform.explain.ExplanationMetadata.InputMetadata(input_metadata)
output_metadata = aiplatform.explain.ExplanationMetadata.OutputMetadata(output_metadata)

explain_metadata = aiplatform.explain.ExplanationMetadata(
    inputs={"features": input_metadata}, outputs={"medv": output_metadata}
)

## Deploy the model with model explanations enabled

In [None]:
endpoint = model.deploy(
    machine_type="n1-standard-4",
    accelerator_type="NVIDIA_TESLA_K80",
    accelerator_count=1,
    explanation_metadata=explain_metadata,
    explanation_parameters=explain_params,
)

In [None]:
print(f"Endpoint resource name: {endpoint.resource_name}")
print(
    f"\nTo use this endpoint in the future:\nendpoint = aiplatform.Endpoint('{endpoint.resource_name}')"
)

## Fetch test data to use

In [None]:
import numpy as np
from tensorflow.keras.datasets import boston_housing

(_, _), (x_test, y_test) = boston_housing.load_data(
    path="boston_housing.npz", test_split=0.2, seed=113
)


def scale(feature):
    max = np.max(feature)
    feature = (feature / max).astype(np.float32)
    return feature


for _ in range(13):
    x_test[_] = scale(x_test[_])
x_test = x_test.astype(np.float32)

print(x_test.shape, x_test.dtype, y_test.shape)

## Get predictions with explanations on our deployed tabular model

In [None]:
response = endpoint.explain(
    instances=[{"dense_input": s.tolist()} for s in [x_test[0]]]
)

## Check out feature attributions

In [None]:
test_data = x_test[0]
attributions = response.explanations[0].attributions[0].feature_attributions

rows = []
for i, val in enumerate(feature_names):
    rows.append([val, test_data[i], attributions[val][0]])
print(tabulate(rows, headers=["Feature name", "Feature value", "Attribution value"]))