In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Fraudfinder - Model Inference

<table align="left">
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://github.com/GoogleCloudPlatform/fraudfinder/raw/main/07_model_inference.ipynb">
       <img src="https://www.gstatic.com/cloud/images/navigation/vertex-ai.svg" alt="Google Cloud Notebooks">Open in Cloud Notebook
    </a>
  </td> 
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/fraudfinder/blob/main/07_model_inference.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Open in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/fraudfinder/blob/main/07_model_inference.ipynb">
        <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
</table>

## Overview

[FraudFinder](https://github.com/googlecloudplatform/fraudfinder) is a series of labs on how to build a real-time fraud detection system on Google Cloud. Throughout the FraudFinder labs, you will learn how to read historical bank transaction data stored in data warehouse, read from a live stream of new transactions, perform exploratory data analysis (EDA), do feature engineering, ingest features into a feature store, train a model using feature store, register your model in a model registry, evaluate your model, deploy your model to an endpoint, do real-time inference on your model with feature store, and monitor your model.

### Objective

In this notebook, you will create a Cloud Run app to perform model inference on the endpoint deployed in the previous notebooks. This Cloud Run app will be triggered by the Pub/Sub subscriber for live transactions, perform a look-up on feature values from the feature store you created in earlier notebooks, and send the prediction request to the Vertex AI endpoint. You can then view the resulting prediction-response logs in BigQuery.

This lab uses the following Google Cloud services and resources:

- [Vertex AI](https://cloud.google.com/vertex-ai/)
- [BigQuery](https://cloud.google.com/bigquery/)
- [Cloud Run](https://cloud.google.com/run)
- [Pub/Sub](https://cloud.google.com/pubsub/)

Steps performed in this notebook:

- Build and deploy a Cloud Run app for model inference
- Create and use a Pub/Sub push subscription to invoke the Cloud Run model inference app
- Inspect the prediction-responses of the endpoint in BigQuery

### Load config settings

In [None]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
BUCKET_NAME = f"{PROJECT_ID}-fraudfinder"
config = !gsutil cat gs://{BUCKET_NAME}/config/notebook_env.py
print(config.n)
exec(config.n)

###Â Define constants

In [None]:
PAYLOAD_SCHEMA = {
    "tx_amount": "float64",
    "customer_id_nb_tx_1day_window": "int64",
    "customer_id_nb_tx_7day_window": "int64",
    "customer_id_nb_tx_14day_window": "int64",
    "customer_id_avg_amount_1day_window": "float64",
    "customer_id_avg_amount_7day_window": "float64",
    "customer_id_avg_amount_14day_window": "float64",
    "customer_id_nb_tx_15min_window": "int64",
    "customer_id_avg_amount_15min_window": "float64",
    "customer_id_nb_tx_30min_window": "int64",
    "customer_id_avg_amount_30min_window": "float64",
    "customer_id_nb_tx_60min_window": "int64",
    "customer_id_avg_amount_60min_window": "float64",
    "terminal_id_nb_tx_1day_window": "int64",
    "terminal_id_nb_tx_7day_window": "int64",
    "terminal_id_nb_tx_14day_window": "int64",
    "terminal_id_risk_1day_window": "float64",
    "terminal_id_risk_7day_window": "float64",
    "terminal_id_risk_14day_window": "float64",
    "terminal_id_nb_tx_15min_window": "int64",
    "terminal_id_avg_amount_15min_window": "float64",
    "terminal_id_nb_tx_30min_window": "int64",
    "terminal_id_avg_amount_30min_window": "float64",
    "terminal_id_nb_tx_60min_window": "int64",
    "terminal_id_avg_amount_60min_window": "float64",
}

### Import libraries

In [None]:
from google.cloud.aiplatform import Featurestore, EntityType, Feature
from google.cloud import aiplatform
from google.cloud import aiplatform as vertex_ai
from google.cloud import bigquery

### Initialize Vertex AI for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [None]:
vertex_ai.init(project=PROJECT_ID, location=REGION)

## Build and deploy a Cloud Run app for model inference
To formalize the process of prediction, you will use a Cloud Run app that takes in live transactions as a trigger, then fetches feature values from Vertex AI Feature Store, then sends the prediction payload to an endpoint. To clarify, to invoke the Cloud Run app, you will create a Pub/Sub push subscription that reads live transactions from the public Pub/Sub topic to invoke the Cloud Run app.

[Cloud Run](https://cloud.google.com/run) is a serverless compute platform that enables you to deploy containers that can be executed every time it is triggered. 

#### Steps to build and deploy the Cloud Run app

To deploy a Cloud Run app, you must:
1. Build a Docker container with your code
2. Deploy your container to Cloud Run

### 1. Build a Docker container with your code

The container code has been prepared for you in the `cloud_run_model_inference/` folder, which you can use to build and submit to automatically to Google Container Registry.

In [None]:
!gcloud builds submit ../scripts/cloud_run_model_inference --tag gcr.io/$PROJECT_ID/cloud_run_model_inference --quiet

### 2. Deploy your container to Cloud Run

With your container built on Container Registry, you can now deploy it to Cloud Run.

To do so, you will need some environment variables to make sure your Cloud Run app knows which Vertex AI endpoint to use.

In [None]:
# Retrieve your Vertex AI endpoint name
endpoints = vertex_ai.Endpoint.list(
    filter=f"display_name={ENDPOINT_NAME}_monitored", # optional: filter by specific endpoint name
    order_by="update_time"
)

ENDPOINT_ID = endpoints[-1].name
print(ENDPOINT_ID)

The following cell will deploy the container as an app on Cloud Run, which you can then check on https://console.cloud.google.com/run. 

Note that once deployed, if you try to visit the Service URL (which may look like http://cloud-run-model-inference-app-XXXXXX-a.run.app), you should expect to see `Error: Forbidden
Your client does not have permission to get URL / from this server`, which is normal, as you don't want the public internet to invoke your app.

In [None]:
!gcloud run deploy cloud-run-model-inference-app \
--image gcr.io/{PROJECT_ID}/cloud_run_model_inference \
--no-allow-unauthenticated \
--region $REGION \
--update-env-vars FEATURESTORE_ID=$FEATURESTORE_ID,ENDPOINT_ID=$ENDPOINT_ID,PROJECT_ID=$PROJECT_ID,REGION=$REGION \
--quiet --verbosity=none

You have now deployed a Cloud Run app to do model inference. However, it is not currently triggered by anything. In the next section, you will connect your Cloud Run app to the live transactions so you can continuously trigger your model inference app.

## Create and use a Pub/Sub push subscription to invoke the Cloud Run model inference app

In this section, you will connect the live transactions to trigger your Cloud Run app. To do so, you will need to create a Pub/Sub push subscription from the live transactions (the public Pub/Sub topic), then use a service account to trigger your Cloud Run app.

#### There are a few steps needed:
1. Create a service account that can invoke your Cloud Run app with appropriate IAM policies
2. Create the Pub/Sub subscription from the live transactions to invoke the Cloud Run app

### 1. Create a service account that can invoke your Cloud Run app with appropriate IAM policies

In [None]:
# Create a service account
!gcloud iam service-accounts create cloud-run-invoker --display-name "Cloud Run Pub/Sub Invoker"

# Retrieve your project number
PROJECT_NUMBER = !gcloud projects list --filter="$PROJECT_ID" --format="value(PROJECT_NUMBER)"
PROJECT_NUMBER = PROJECT_NUMBER[0]

# Bind the service account with an IAM policy to invoke the Cloud Run app
!gcloud run services add-iam-policy-binding cloud-run-model-inference-app \
   --member=serviceAccount:cloud-run-invoker@{PROJECT_ID}.iam.gserviceaccount.com \
   --role=roles/run.invoker \
   --region=us-central1

# Add another IAM policy to the service account to provide authentication needed to invoke Cloud Run
!gcloud projects add-iam-policy-binding $PROJECT_ID \
     --member=serviceAccount:service-{PROJECT_NUMBER}@gcp-sa-pubsub.iam.gserviceaccount.com \
     --role=roles/iam.serviceAccountTokenCreator

### 2. Create the Pub/Sub subscription from the live transactions to invoke the Cloud Run app

With the service account read, now you can create a Pub/Sub push subscription to connect the live transactions (from the public Pub/Sub topic `ff-tx`) to trigger your Cloud Run app.

In other words, as new transactions are received in the Pub/Sub topic, the push subscription will then automatically trigger the Cloud Run app, which processes the live data, retrieves values from Vertex AI Feature Store, then sends the prediction request to the Vertex AI endpoint.

To create the Pub/Sub push subscription, you will first need to retrieve your Cloud Run service URL.

In [None]:
# to get the service URL programmatically
SERVICE_URL = !gcloud run services describe cloud-run-model-inference-app \
  --platform managed \
  --region $REGION \
  --format "value(status.url)"
SERVICE_URL = SERVICE_URL[0]

print(SERVICE_URL)

Now you can create your Pub/Sub push subscription:

In [None]:
!gcloud pubsub subscriptions create push-live-tx-to-cloudrun --topic projects/cymbal-fraudfinder/topics/ff-tx \
   --ack-deadline=600 \
   --push-endpoint=$SERVICE_URL \
   --push-auth-service-account=cloud-run-invoker@{PROJECT_ID}.iam.gserviceaccount.com

Once created, you can do some checks to make sure everything worked successfully:
- On the [Pub/Sub page](https://console.cloud.google.com/cloudpubsub/subscription/list), inspect your new Pub/Sub subscription `push-live-tx-to-cloudrun`
- On the [Cloud Run logs page](https://console.cloud.google.com/run/detail/us-central1/cloud-run-model-inference-app/logs), check the logs of your Cloud Run app to confirm that you see model prediction requests and responses

## Inspecting model prediction requests and responses in BigQuery

With Model Monitoring enabled on your Vertex AI endpoint, your endpoint will now automatically store all of your model predictions and responses in BigQuery.

You may need to wait a few minutes before you start to see new rows updated in BigQuery.

In [None]:
bq_client = bigquery.Client(project=PROJECT_ID)

sql = f"""
SELECT
  *
FROM
  `model_deployment_monitoring_{ENDPOINT_ID}.serving_predict`
ORDER BY
  TIMESTAMP(logging_time) DESC
LIMIT
  100
"""

client_result = bq_client.query(sql, job_config=bigquery.QueryJobConfig())
df = client_result.result().to_arrow().to_pandas()
df