In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Getting Started with qodo Models


<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/Qodo_intro.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fnotebook_template.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/generative_ai/Qodo_intro.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/Qodo_intro.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

## Overview

This notebook demonstrates how to deploy and use Qodo's state-of-the-art code embedding models on Google Cloud's Vertex AI platform. You'll learn how to set up, deploy, and make predictions with these specialized embedding models that enhance code retrieval and search capabilities.
In this notebook, you will:

Set up your Google Cloud environment and initialize the Vertex AI SDK
Upload a Qodo model to your Vertex AI Model Registry
Create a Vertex AI endpoint for model deployment
Deploy the Qodo model to your endpoint with appropriate compute resources
Make predictions using the deployed model.

### Qodo on Vertex AI

You can deploy the Qodo models in your own endpoint.



### Available Qodo models

#### Qodo-Embed-1-7B
Qodo-Embed-1-7B is a state-of-the-art code embedding model for efficient code & text retrieval, enhancing the search accuracy of RAG methods.



## Objective

This notebook shows how to use **Vertex AI API** to deploy the qodo models.

For more information, see the [qodo website](https://www.qodo.ai/blog/qodo-embed-1-code-embedding-code-retreival/).


## Get Started


### Install Vertex AI SDK for Python or other required packages


In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform

In [None]:
! pip3 install -U -q httpx

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

In [None]:
PUBLISHER_NAME = "qodo"  # @param {type:"string"}
PUBLISHER_MODEL_NAME = "qodo-embed-1-7b-v1"  # @param ["publisher-model-name-1", "publisher-model-name-2", "test-marketplace-publisher-model-e2e-01"]

available_regions = ["us-central1", "europe-west4"]

### Select a location and a version from the dropdown

In [None]:
import ipywidgets as widgets
from IPython.display import display

dropdown_loc = widgets.Dropdown(
    options=available_regions,
    description="Select a location:",
    font_weight="bold",
    style={"description_width": "initial"},
)


def dropdown_loc_eventhandler(change):
    global LOCATION
    if change["type"] == "change" and change["name"] == "value":
        LOCATION = change.new
        print("Selected:", change.new)


LOCATION = dropdown_loc.value
dropdown_loc.observe(dropdown_loc_eventhandler, names="value")
display(dropdown_loc)

### Set Google Cloud project information and initialize Vertex AI SDK for Python

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
ENDPOINT = f"https://{LOCATION}-aiplatform.googleapis.com"

if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    raise ValueError("Please set your PROJECT_ID")

### Import required libraries

In [None]:
import json
import time

## Using Vertex AI API

### Upload Model

In [None]:
UPLOAD_MODEL_PAYLOAD = {
    "model": {
        "displayName": "ModelGarden_LaunchPad_Model_" + time.strftime("%Y%m%d-%H%M%S"),
        "baseModelSource": {
            "modelGardenSource": {
                "publicModelName": f"publishers/{PUBLISHER_NAME}/models/{PUBLISHER_MODEL_NAME}",
            }
        },
    }
}

request = json.dumps(UPLOAD_MODEL_PAYLOAD)

! curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/models:upload -d '{request}'

## Extract the Model ID

After uploading your model to Vertex AI, you'll need to extract the model ID from the response for use in subsequent steps.

The response from the upload command will look similar to this:

```json
{
  "name": "projects/123456789/locations/us-central1/models/9876543210/operations/1122334455",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1beta1.UploadModelOperationMetadata",
    "genericMetadata": {
      "createTime": "2025-04-07T16:47:27.076450Z",
      "updateTime": "2025-04-07T16:47:27.076450Z"
    }
  }
}
```

Your **model ID** is the number between `models/` and `/operations` in the "name" field.

In the example above, the model ID would be `9876543210`.

Extract the model ID from the response

### Verify Your Model

In [None]:
MODEL_ID = "[extracted_model_id]"  # @param {type: "number"}

! curl -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/models/{MODEL_ID}

### Create an Endpoint and Extract the Endpoint ID

In [None]:
CREATE_ENDPOINT_PAYLOAD = {
    "displayName": "ModelGarden_LaunchPad_Endpoint_" + time.strftime("%Y%m%d-%H%M%S"),
}

request = json.dumps(CREATE_ENDPOINT_PAYLOAD)

! curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints -d '{request}'

### Extracting the Endpoint ID

 After running the command above, you'll see a JSON response similar to:
```json
{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/endpoints/ENDPOINT_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.CreateEndpointOperationMetadata",
    "genericMetadata": {
      "createTime": "2025-04-07T16:55:27.076450Z",
      "updateTime": "2025-04-07T16:55:27.076450Z"
    }
  }
}
```

Your endpoint ID is the number that appears after "endpoints/" and before "/operations" in the "name" field.

For example, if the "name" field shows:
"projects/123456789/locations/us-central1/endpoints/9876543210/operations/1122334455"

Then your endpoint ID is: 9876543210

### Verify Your Endpoint

In [None]:
ENDPOINT_ID = "[extracted_endpoint_id]"  # @param {type: "number"}

! curl -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}

### Deploy Model

In [None]:
import ipywidgets as widgets
from IPython.display import display

# Initial data
PUBLISHER_NAME = "qodo"  # @param {type:"string"}
PUBLISHER_MODEL_NAME = "qodo-embed-1-7b-v1"
available_regions = ["us-central1", "europe-west4"]
compatible_machines = [
    "a2-highgpu-1g",
    "a2-highgpu-4g",
    "a2-ultragpu-1g",
    "a2-ultragpu-2g",
    "a3-highgpu-2g",
]

# Machine type to accelerator mapping (you can customize this based on your needs)
machine_config = {
    "a2-highgpu-1g": {"type": "NVIDIA_A100", "count": 1},
    "a2-highgpu-4g": {"type": "NVIDIA_A100", "count": 4},
    "a2-ultragpu-1g": {"type": "NVIDIA_A100_80GB", "count": 1},
    "a2-ultragpu-2g": {"type": "NVIDIA_A100_80GB", "count": 2},
    "a3-highgpu-2g": {"type": "NVIDIA_H100", "count": 2},
}

# Create widgets


dropdown_machine = widgets.Dropdown(
    options=compatible_machines,
    description="Machine type:",
    font_weight="bold",
    style={"description_width": "initial"},
)

label_accelerator_type = widgets.HTML(
    value=f"<b>Accelerator type:</b> {machine_config[compatible_machines[0]]['type']}"
)

label_accelerator_count = widgets.HTML(
    value=f"<b>Accelerator count:</b> {machine_config[compatible_machines[0]]['count']}"
)

# Event handlers


def dropdown_machine_eventhandler(change):
    global MACHINE_TYPE, ACCELERATOR_TYPE, ACCELERATOR_COUNT
    if change["type"] == "change" and change["name"] == "value":
        MACHINE_TYPE = change.new
        machine_info = machine_config.get(change.new, {"type": "Unknown", "count": 0})
        ACCELERATOR_TYPE = machine_info["type"]
        ACCELERATOR_COUNT = machine_info["count"]

        # Update the displayed information
        label_accelerator_type.value = f"<b>Accelerator type:</b> {ACCELERATOR_TYPE}"
        label_accelerator_count.value = f"<b>Accelerator count:</b> {ACCELERATOR_COUNT}"


# Initialize global variables
MACHINE_TYPE = dropdown_machine.value
ACCELERATOR_TYPE = machine_config[MACHINE_TYPE]["type"]
ACCELERATOR_COUNT = machine_config[MACHINE_TYPE]["count"]

# Set up observers
dropdown_machine.observe(dropdown_machine_eventhandler, names="value")

# Display widgets
display(
    widgets.VBox([dropdown_machine, label_accelerator_type, label_accelerator_count])
)

In [None]:
DEPLOY_PAYLOAD = {
    "deployedModel": {
        "model": f"projects/{PROJECT_ID}/locations/{LOCATION}/models/{MODEL_ID}",
        "displayName": "ModelGarden_LaunchPad_DeployedModel_"
        + time.strftime("%Y%m%d-%H%M%S"),
        "dedicatedResources": {
            "machineSpec": {
                "machineType": MACHINE_TYPE,
                "acceleratorType": ACCELERATOR_TYPE,
                "acceleratorCount": ACCELERATOR_COUNT,
            },
            "minReplicaCount": 1,
            "maxReplicaCount": 1,
        },
    },
    "trafficSplit": {"0": 100},
}

request = json.dumps(DEPLOY_PAYLOAD)

! curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}:deployModel -d '{request}'

### Wait for Deployment to Complete

Deployment can take several minutes. This cell will check the status of the operation.

### Extracting the operation ID

 After running the command above, you'll see a JSON response similar to:
```json
{
  "name": "projects/513257720056/locations/us-central1/endpoints/3978337634014461952/operations/2501704616106786816",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.DeployModelOperationMetadata",
    "genericMetadata": {
      "createTime": "2025-04-07T17:10:55.383719Z",
      "updateTime": "2025-04-07T17:10:55.383719Z"
    }
  }
}
```

Your operation ID is the number that appears after "operation/" in the "name" field.

For example, if the "name" field shows:
"projects/123456789/locations/us-central1/endpoints/9876543210/operations/1122334455"

Then your endpoint ID is: 1122334455

###  Check Operation Status

Run this cell to check the current status of the deployment operation. You may need to run this cell multiple times until the operation is complete (`"done": true`).

**Note:** Model deployment typically takes 5-20 minutes to complete.

In [None]:
OPERATION_ID = "[extracted_operation_id]"  # @param {type: "number"}
# Check operation status
print("Checking deployment status...")
!curl -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/operations/{OPERATION_ID}

### Prediction

Sends a POST request to the specified API endpoint to get a response from the model for a joke using the provided payload.

In [None]:
PAYLOAD = {
   "instances":[
      {
         "input":[
            "def hello_world(): 
 print('hello_world')"
         ]
      }
   ]
}


request = json.dumps(PAYLOAD)

!curl -X POST \
  -H "Authorization: Bearer $(gcloud auth print-access-token)" \
  -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}:rawPredict \
  -d '{request}'

## Using Vertex AI SDK for *Python*

In [None]:
from google.cloud import aiplatform

In [None]:
aiplatform.init(project=PROJECT_ID, location=LOCATION)

### Upload Model

In [None]:
model = aiplatform.Model.upload(
    display_name="ModelGarden_LaunchPad_Endpoint_" + time.strftime("%Y%m%d-%H%M%S"),
    model_garden_source_model_name=f"publishers/{PUBLISHER_NAME}/models/{PUBLISHER_MODEL_NAME}",
)

### Create Endpoint

In [None]:
my_endpoint = aiplatform.Endpoint.create(
    display_name="ModelGarden_LaunchPad_Endpoint_" + time.strftime("%Y%m%d-%H%M%S")
)

### Deploy Model

In [None]:
import ipywidgets as widgets
from IPython.display import display

# Initial data
PUBLISHER_NAME = "qodo"  # @param {type:"string"}
PUBLISHER_MODEL_NAME = "qodo-embed-1-7b-v1"
available_regions = ["us-central1", "europe-west4"]
compatible_machines = [
    "a2-highgpu-1g",
    "a2-highgpu-4g",
    "a2-ultragpu-1g",
    "a2-ultragpu-2g",
    "a3-highgpu-2g",
]

# Machine type to accelerator mapping (you can customize this based on your needs)
machine_config = {
    "a2-highgpu-1g": {"type": "NVIDIA_A100", "count": 1},
    "a2-highgpu-4g": {"type": "NVIDIA_A100", "count": 4},
    "a2-ultragpu-1g": {"type": "NVIDIA_A100_80GB", "count": 1},
    "a2-ultragpu-2g": {"type": "NVIDIA_A100_80GB", "count": 2},
    "a3-highgpu-2g": {"type": "NVIDIA_H100", "count": 2},
}

# Create widgets


dropdown_machine = widgets.Dropdown(
    options=compatible_machines,
    description="Machine type:",
    font_weight="bold",
    style={"description_width": "initial"},
)

label_accelerator_type = widgets.HTML(
    value=f"<b>Accelerator type:</b> {machine_config[compatible_machines[0]]['type']}"
)

label_accelerator_count = widgets.HTML(
    value=f"<b>Accelerator count:</b> {machine_config[compatible_machines[0]]['count']}"
)

# Event handlers


def dropdown_machine_eventhandler(change):
    global MACHINE_TYPE, ACCELERATOR_TYPE, ACCELERATOR_COUNT
    if change["type"] == "change" and change["name"] == "value":
        MACHINE_TYPE = change.new
        machine_info = machine_config.get(change.new, {"type": "Unknown", "count": 0})
        ACCELERATOR_TYPE = machine_info["type"]
        ACCELERATOR_COUNT = machine_info["count"]

        # Update the displayed information
        label_accelerator_type.value = f"<b>Accelerator type:</b> {ACCELERATOR_TYPE}"
        label_accelerator_count.value = f"<b>Accelerator count:</b> {ACCELERATOR_COUNT}"


# Initialize global variables
MACHINE_TYPE = dropdown_machine.value
ACCELERATOR_TYPE = machine_config[MACHINE_TYPE]["type"]
ACCELERATOR_COUNT = machine_config[MACHINE_TYPE]["count"]

# Set up observers
dropdown_machine.observe(dropdown_machine_eventhandler, names="value")

# Display widgets
display(
    widgets.VBox([dropdown_machine, label_accelerator_type, label_accelerator_count])
)

In [None]:
model.deploy(
    endpoint=my_endpoint,
    deployed_model_display_name="ModelGarden_LaunchPad_DeployedModel_"
    + time.strftime("%Y%m%d-%H%M%S"),
    traffic_split={"0": 100},
    machine_type=MACHINE_TYPE,
    accelerator_type=ACCELERATOR_TYPE,
    accelerator_count=ACCELERATOR_COUNT,
    min_replica_count=1,
    max_replica_count=1,
)

### Prediction

In [None]:
PAYLOAD = {"instances": [{"input": ["def hello_world(): \n  print('hello_world')"]}]}

request = json.dumps(PAYLOAD)

response = my_endpoint.raw_predict(
    body=request, headers={"Content-Type": "application/json"}
)
data = response.json()
embedding = data["predictions"][0]["data"][0]["embedding"]
print(embedding)

## Cleaning up


In [None]:
# Cleaning up

print("Starting cleanup process...\n")

# First undeploy the model from the endpoint
print("Undeploying model from endpoint...")
try:
    my_endpoint.undeploy_all()
    print("✓ Model successfully undeployed from endpoint")
except Exception as e:
    print(f"Error undeploying model: {e}")

# Delete the endpoint
print("\nDeleting endpoint...")
try:
    my_endpoint.delete()
    print("✓ Endpoint successfully deleted")
except Exception as e:
    print(f"Error deleting endpoint: {e}")

# Delete the model
print("\nDeleting model...")
try:
    model.delete()
    print("✓ Model successfully deleted")
except Exception as e:
    print(f"Error deleting model: {e}")

print("\nCleanup complete! All resources have been removed.")
