In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex prompt optimizer Notebook SDK (Preview)

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fprompts%2Fprompt_optimizer%2Fvertex_ai_prompt_optimizer_sdk.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb">
      <img width="32px" src="https://www.svgrepo.com/download/217753/github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_sdk.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>            

    

| | | |
|-|-|-|
|Author | [Ivan Nardini](https://github.com/inardini)

##  1. Overview

When developing Generative AI (Gen AI) applications, prompt engineering poses challenges due to its time-consuming and error-prone nature. Significant effort is involved when crafting and inputting prompts to achieve successful task completion. With the frequent release of foundational models, you face the added burden of migrating working prompts from one model version to another.

Vertex AI prompt optimizer aims to alleviate these challenges by providing you with an intelligent prompt optimization tool. With this tool you can both translate and optimize system instruction in the prompts and the best demonstrations (examples) for prompt templates, empowering you to shape LLM responses from any source model to a target Google model.


### Objective

This notebook demonstrates how to leverage Vertex AI prompt optimizer to optimize a simple prompt for a Gemini model with respect to a question-answering task. The goal is to use Vertex AI prompt optimizer to find the new prompt template that generates the most accurate and grounded responses.

This tutorial uses the following Google Cloud ML services and resources:

- Generative AI on Vertex AI
- Vertex AI prompt optimizer
- Vertex AI Gen AI evaluation
- Vertex AI Custom job

The steps performed include:

1. Define the prompt template you want to optimize.
2. Prepare the prompt optimization dataset.
3. Set target model and evaluation metric.
4. Set optimization mode and steps.
5. Run the automatic prompt optimization job.
6. Collect the best prompt template and evaluation metric.
7. Validate the best prompt template.

### Dataset

The dataset is a question-answering dataset generated by  a simple AI cooking assistant that provides suggestions on how to prepare healthier dishes.


### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloud Storage pricing](https://cloud.google.com/storage/pricing) and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## 2. Before you start

### Install Vertex AI SDK for Python and other required packages


In [None]:
%pip install --upgrade --quiet 'google-cloud-aiplatform[evaluation]'
%pip install --upgrade --quiet 'plotly' 'asyncio' 'tqdm' 'tenacity' 'etils' 'importlib_resources' 'fsspec' 'gcsfs' 'nbformat>=4.2.0'

In [None]:
! mkdir -p ./tutorial/utils && wget https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/prompts/prompt_optimizer/vapo_lib.py -P ./tutorial/utils

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [None]:
# import sys

# if "google.colab" in sys.modules:
#     from google.colab import auth

#     auth.authenticate_user()

### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

#### Set your project ID and project number

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

In [None]:
PROJECT_NUMBER = !gcloud projects describe {PROJECT_ID} --format="get(projectNumber)"[0]
PROJECT_NUMBER = PROJECT_NUMBER[0]

#### Region

You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [None]:
REGION = "us-central1"  # @param {type: "string"}

#### Create a Cloud Storage bucket

Create a storage bucket to store intermediate artifacts such as datasets.

In [None]:
BUCKET_NAME = "your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

BUCKET_URI = f"gs://{BUCKET_NAME}"  # @param {type:"string"}

In [None]:
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

#### Service Account and permissions

Vertex AI Automated Prompt Design requires a service account with the following permissions:

-   `Vertex AI User` to call Vertex LLM API
-   `Storage Object Admin` to read and write to your GCS bucket.
-   `Artifact Registry Reader` to download the pipeline template from Artifact Registry.

[Check out the documentation](https://cloud.google.com/iam/docs/manage-access-service-accounts#iam-view-access-sa-gcloud) to learn how to grant those permissions to a single service account.

> If you run following commands using Vertex AI Workbench, run directly in the terminal.


In [None]:
SERVICE_ACCOUNT = f"{PROJECT_NUMBER}-compute@developer.gserviceaccount.com"

In [None]:
for role in ['aiplatform.user', 'storage.objectAdmin', 'artifactregistry.reader']:

    ! gcloud projects add-iam-policy-binding {PROJECT_ID} \
      --member=serviceAccount:{SERVICE_ACCOUNT} \
      --role=roles/{role} --condition=None

### Set tutorial folder and workspace

Set a local folder to collect and organize data and any tutorial artifacts.

In [None]:
from pathlib import Path as path

ROOT_PATH = path.cwd()
TUTORIAL_PATH = ROOT_PATH / "tutorial"
TUTORIAL_PATH.mkdir(parents=True, exist_ok=True)

Set an associated workspace to store prompt optimization results on Cloud Storage bucket.

In [None]:
from etils import epath

WORKSPACE_URI = epath.Path(BUCKET_URI) / "optimization"
INPUT_DATA_URI = epath.Path(WORKSPACE_URI) / "data"

WORKSPACE_URI.mkdir(parents=True, exist_ok=True)
INPUT_DATA_URI.mkdir(parents=True, exist_ok=True)

### Import libraries

Import required libraries.

In [None]:
# Tutorial
from argparse import Namespace
import json

# General
import logging
import warnings

from IPython.display import HTML, display
from google.cloud import aiplatform
import pandas as pd
from sklearn.model_selection import train_test_split
from tutorial.utils import vapo_lib

### Libraries logging

Configure logging for libraries to display output within the notebook.

In [None]:
warnings.filterwarnings("ignore")
logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)

### Define constants

Define some tutorial constants.

In [None]:
INPUT_DATA_FILE_URI = "gs://github-repo/prompts/prompt_optimizer/rag_qa_dataset.jsonl"

EXPERIMENT_NAME = "qa-prompt-eval"
INPUT_OPTIMIZATION_DATA_URI = epath.Path(WORKSPACE_URI) / "prompt_optimization_data"
INPUT_OPTIMIZATION_DATA_FILE_URI = str(
    INPUT_DATA_URI / "prompt_optimization_dataset.jsonl"
)
OUTPUT_OPTIMIZATION_DATA_URI = epath.Path(WORKSPACE_URI) / "optimization_jobs"
APD_CONTAINER_URI = (
    "us-docker.pkg.dev/vertex-ai-restricted/builtin-algorithm/apd:preview_v1_0"
)
CONFIG_FILE_URI = str(WORKSPACE_URI / "config" / "config.json")

### Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project.

In [None]:
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

## 3. Automated prompt design with Vertex AI prompt optimizer

### Load the dataset

Load the cooking question-answer dataset from a Google Cloud Storage bucket. The dataset contains the following columns:

*   `user_question`: The cooking question posed by the user to the AI cooking assistant.
*   `context`: Relevant information retrieved to answer the user's question.
*   `prompt`: The content fed to the language model to generate an answer.
*   `answer`: The generated answer from the language model.
*   `reference`: The ground truth answer—the ideal response the user expects from the AI cooking assistant.

In [None]:
prompt_optimization_df = pd.read_json(INPUT_DATA_FILE_URI, lines=True)

In [None]:
prompt_optimization_df.head()

Print an example of the cooking question-answer dataset.  

In [None]:
vapo_lib.print_df_rows(prompt_optimization_df, n=1)

### Evaluate the system instruction in the original prompt template

Assess the original prompt's effectiveness for our AI cooking assistant's question-answering task using Vertex AI's Gen AI Evaluation service. This service offers various metrics and methods to evaluate generative models, which enables comparing the model's performance against our own expectations and criteria.

Specifically, you focus on the quality and groundedness of the answers generated in response to the prompt using a test dataset.

To learn more, see [Gen AI evaluation service overview](https://cloud.google.com/vertex-ai/generative-ai/docs/models/evaluation-overview).  


In [None]:
train_prompt_optimization_df, test_prompt_optimization_df = train_test_split(
    prompt_optimization_df, test_size=0.8, random_state=8
)

In [None]:
evaluation_qa_results = [
    (
        "qa_eval_result_without_prompt_optimization",
        vapo_lib.evaluate_task(
            df=test_prompt_optimization_df,
            prompt_col="prompt",
            reference_col="reference",
            response_col="answer",
            experiment_name=EXPERIMENT_NAME,
            eval_metrics=["question_answering_quality", "groundedness"],
            eval_sample_n=len(test_prompt_optimization_df),
        ),
    )
]

Plot the evaluation metrics.

In [None]:
vapo_lib.plot_eval_metrics(evaluation_qa_results)

### Optimize the prompt template with Vertex AI prompt optimizer


#### Prepare the prompt template you want to optimize

A prompt consists of two key parts:

* **System Instruction Template** which is a fixed part of the prompt that control or alter the model's behavior across all queries for a given task.

* **Prompt Template** which is a dynamic part of the prompt that changes based on the task. Prompt template includes context, task and more. To learn more, see [components of a prompt](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-design-strategies#components-of-a-prompt) in the official documentation.

In this scenario, you use Vertex AI prompt optimizer to optimize a simple system instruction template. And you use some examples in the remaining prompt template for evaluating different instruction templates along the optimization process.

> Having the `target` placeholder in the prompt template is optional. It represents the prompt's ground truth response in your prompt optimization dataset that you aim to optimize for your templates. If you don't have the prompt's ground truth response, remember to set the `source_model` parameter to your prompt optimizer configuration (see below) instead of adding ground truth responses. Vertex AI prompt optimizer would run your sample prompts on the source model to generate the ground truth responses for you.


In [None]:
SYSTEM_INSTRUCTION_TEMPLATE = """
Given a question with context, provide the correct answer to the question.
"""

PROMPT_TEMPLATE = """
Some examples of correct answer to a question are:
Question: {question}
Context: {ctx}
Answer: {target}
"""

#### Prepare the prompt optimization dataset

To use Vertex AI prompt optimizer, you'll need a CSV or JSONL file with labeled examples.  These examples should follow a specific naming convention. For details see [Optimize prompts](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer).


> For effective **prompt optimization**, provide a dataset of examples where your model is poor in performance when using current system instruction template. For reliable results, use 50-100 distinct samples.

> In case of **prompt migration**, consider using the source model to label examples that the target model struggles with, helping to identify areas for improvement.


In [None]:
prepared_train_prompt_optimization_df = train_prompt_optimization_df.copy()

# Prepare optimization dataset columns
prepared_train_prompt_optimization_df = prepared_train_prompt_optimization_df.rename(
    columns={"user_question": "question", "context": "ctx", "reference": "target"}
)

# Remove uneccessary columns
prepared_train_prompt_optimization_df = prepared_train_prompt_optimization_df.drop(
    columns=["prompt", "answer"]
)

# Reorder columns
prepared_train_prompt_optimization_df = prepared_train_prompt_optimization_df[
    ["question", "ctx", "target"]
]

Print some examples of the prompt optimization dataset.  

In [None]:
prepared_train_prompt_optimization_df.head()

#### Upload samples to bucket

Once you prepare your prompt optimization dataset, you can upload them on Cloud Storage bucket.

In [None]:
prepared_train_prompt_optimization_df.to_json(
    INPUT_OPTIMIZATION_DATA_FILE_URI, orient="records", lines=True
)

#### Configure optimization settings

Vertex AI prompt optimizer lets you control the optimization process by specifying what to optimize (instructions only, demonstrations only, or both), providing a system instruction and prompt template, and selecting the target model.  You can optionally refine the optimization with some advanced settings like its duration and the number of optimization iterations it runs, which models the Vertex AI prompt optimizer uses, and other parameters to control the structure and content of prompts.

Below are some common and recommended default configurations. For more advanced control, you can learn and explore more about all the parameters and how to best use them in the [detailed documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer).


In [None]:
PROMPT_OPTIMIZATION_JOB = "auto-prompt-design-job-" + vapo_lib.get_id()
OUTPUT_OPTIMIZATION_RUN_URI = str(
    OUTPUT_OPTIMIZATION_DATA_URI / PROMPT_OPTIMIZATION_JOB
)

args = Namespace(
    # Basic configuration
    system_instruction=SYSTEM_INSTRUCTION_TEMPLATE,  # System instructions for the target model. String.
    prompt_template=PROMPT_TEMPLATE,  # Template for prompts,  String.
    target_model="gemini-2.0-flash-001",  # Target model for optimization. String. Supported models: "gemini-2.0-flash-lite-001", "gemini-2.0-flash-001"
    optimization_mode="instruction",  # Optimization mode. String. Supported modes: "instruction", "demonstration", "instruction_and_demo"
    eval_metrics_types=[
        "question_answering_correctness",
        "groundedness",
    ],  # List of evaluation metrics. List of strings. Supported metrics: "bleu", "coherence", "comet", "exact_match", "fluency", "groundedness", "metricx", "rouge_1", "rouge_2", "rouge_l", "rouge_l_sum", "safety", "question_answering_correctness", "question_answering_quality", "summarization_quality", "text_quality", "verbosity", "tool_call_valid", "tool_name_match", "tool_parameter_key_match", "tool_parameter_kv_match"
    eval_metrics_weights=[
        0.9,
        0.1,
    ],  # Weights for evaluation metrics. List of floats.  Length must match eval_metrics_types.  Should sum to 1.
    aggregation_type="weighted_sum",  # Aggregation type for evaluation metrics. String. Supported aggregation types: "weighted_sum", "weighted_average"
    input_data_path=INPUT_OPTIMIZATION_DATA_FILE_URI,  # Cloud Storage URI to input optimization data. String.
    output_path=OUTPUT_OPTIMIZATION_RUN_URI,  # Cloud Storage URI to save optimization results. String.
    project=PROJECT_ID,  # Google Cloud project ID. String.
    # (Optional) Advanced configuration
    num_steps=10,  # Number of iterations in instruction optimization mode. Integer between 10 and 20.
    num_demo_set_candidates=10,  # Number of demonstrations evaluated in instruction and instruction_and_demo mode. Integer between 10 and 30.
    demo_set_size=3,  # Number of demonstrations generated per prompt. Integer between 3 and 6.
    target_model_location="us-central1",  # Location of the target model. String. Default us-central1.
    source_model="",  # Google model that the system instructions and prompts were previously used with. String. Not needed if you provide target column.
    source_model_location="",  # Location of the source model. String. Default us-central1. Not needed if you provide target column.
    target_model_qps=1,  # The queries per second (QPS) sent to the target model. Integer greater or equal than 1 depending on your quota.
    optimizer_model_qps=1,  # The queries per second (QPS) sent to the optimization model. Integer greater or equal than 1 depending on your quota.
    eval_qps=1,  # The queries per second (QPS) sent to the eval model. Integer greater or equal than 1 depending on your quota.
    source_model_qps="",  # The queries per second (QPS) sent to the source model. Integer greater or equal than 1 depending on your quota.
    response_mime_type="text/plain",  # MIME response type that the target model uses. String. Supported response: text/plain, text/x.enum, application/json.
    response_schema="",  # The Vertex AI's Controlled Generation response schema that the target model uses to generate answers. String.
    language="English",  # Language of the system instructions. String. Supported languages: "English", "French", "German", "Hebrew", "Hindi", "Italian", "Japanese", "Korean", "Portuguese", "Simplified Chinese", "Spanish", "Traditional Chinese"
    placeholder_to_content=json.loads(
        "{}"
    ),  # Placeholder to replace any parameter in the system instruction. Dict.
    data_limit=10,  # Amount of data used for validation. Integer between 5 and 100.
    translation_source_field_name="",  # Fill in with the corresponding field name of the source text in the data if translation metrics like Comet or MetricX are selected. Otherwise, leave it as empty.
)

#### Upload Vertex AI prompt optimizer config to Cloud Storage

After define the Vertex AI prompt optimizer configuration, upload them on Cloud Storage bucket.


In [None]:
args = vars(args)

with epath.Path(CONFIG_FILE_URI).open("w") as config_file:
    json.dump(args, config_file)
config_file.close()

#### Run the automatic prompt optimization job

Now you are ready to run your first Vertex AI prompt optimizer job using the Vertex AI SDK for Python.


> This prompt optimization job requires ~ 40 minutes to run.

> Be sure you have provisioned enough queries per minute (QPM) quota implementing the recommended QPM for each model. If you configure the Vertex AI prompt optimizer with a QPM that is higher than the QPM than you have access to, the job might fail. [Check out](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#before-you-begin) the documentation to know more.


In [None]:
WORKER_POOL_SPECS = [
    {
        "machine_spec": {
            "machine_type": "n1-standard-4",
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": APD_CONTAINER_URI,
            "args": ["--config=" + CONFIG_FILE_URI],
        },
    }
]

custom_job = aiplatform.CustomJob(
    display_name=PROMPT_OPTIMIZATION_JOB,
    worker_pool_specs=WORKER_POOL_SPECS,
)

custom_job.run(service_account=SERVICE_ACCOUNT, sync=False)

### Collect and display the optimization results

Vertex AI prompt optimizer returns both optimized templates and evaluation results for either instruction, or demostrations, or both depending on the optimization mode you define as JSONL files on Cloud Storage bucket. Those results help you understand the optimization process.

In this case, you want to collect the optimized templates and evaluation results for the system instruction.

Below you use a helper function to display those results.

In [None]:
results_ui = vapo_lib.ResultsUI(OUTPUT_OPTIMIZATION_RUN_URI)
results_df_html = """

"""

display(HTML(results_df_html))
display(results_ui.get_container())

### Evaluate the new prompt template with the optimized instruction.

#### Generate new responses using the optimized system instruction.

Set the optimized system instruction template you get from Vertex AI prompt optimizer job.

In [None]:
OPTIMIZED_SYSTEM_INSTRUCTION_TEMPLATE = "You are a culinary expert. Use the provided cooking tips and your culinary expertise to answer the following question in a way that is comprehensive, engaging, and easy for a home cook to understand."  # @param {type:"string"}

Prepare optimized prompts using the optimized system instruction template.

In [None]:
OPTIMIZED_PROMPT_TEMPLATE = (
    OPTIMIZED_SYSTEM_INSTRUCTION_TEMPLATE
    + "\nQuestion: \n{question}"
    + "\nContext: \n{context}"
    + "\nAnswer:"
)

optimized_prompts = [
    OPTIMIZED_PROMPT_TEMPLATE.format(question=q, context=c)
    for q, c in zip(
        test_prompt_optimization_df["user_question"].to_list(),
        test_prompt_optimization_df["context"].to_list(),
    )
]

Leverage Gemini API on Vertex AI to send parallel generation requests.

In [None]:
gemini_llm = vapo_lib.init_new_model(model_name="gemini-2.0-flash")

gemini_predictions = [
    vapo_lib.async_generate(p, model=gemini_llm) for p in optimized_prompts
]

gemini_predictions_col = await tqdm_asyncio.gather(*gemini_predictions)

Prepare the test data and visualize the resulting dataset.

In [None]:
test_prompt_optimization_df["optimized_prompt_with_vapo"] = optimized_prompts
test_prompt_optimization_df["gemini_answer_with_vapo"] = gemini_predictions_col

In [None]:
vapo_lib.print_df_rows(test_prompt_optimization_df, n=1)

#### Evaluate new responses using Vertex AI Gen AI evaluation

Use the generated responses with the optimized prompt to run a new round of evaluation with Vertex AI Gen AI Evaluation.


In [None]:
evaluation_qa_results.append(
    (
        "qa_eval_result_new_model_with_prompt_optimization",
        vapo_lib.evaluate_task(
            df=test_prompt_optimization_df,
            prompt_col="optimized_prompt_with_vapo",
            reference_col="reference",
            response_col="gemini_answer_with_vapo",
            experiment_name=EXPERIMENT_NAME,
            eval_metrics=["question_answering_quality", "groundedness"],
            eval_sample_n=len(test_prompt_optimization_df),
        ),
    )
)

Inspect evaluation results.

In [None]:
vapo_lib.plot_eval_metrics(evaluation_qa_results)

## 4. Clean up

In [None]:
delete_bucket = False
delete_job = False
delete_experiment = False
delete_tutorial = False

if delete_bucket:
    ! gsutil rm -r $BUCKET_URI

if delete_job:
    custom_job.delete()

if delete_experiment:
    experiment = aiplatform.Experiment(experiment_name=EXPERIMENT_NAME)
    experiment.delete()

if delete_tutorial:
    import shutil

    shutil.rmtree(str(TUTORIAL_PATH))