In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex Prompt Optimizer Notebook UI (Preview)

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fprompts%2Fprompt_optimizer%2Fvertex_ai_prompt_optimizer_ui.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb">
      <img width="32px" src="https://www.svgrepo.com/download/217753/github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>            

# Overview
This Notebook showcases the Vertex AI prompt optimizer, a tool that iteratively optimizes prompts to suit a target model (e.g., `gemini-2.0-flash`) using target-specific metric(s).

Key Use Cases:

* Prompt Optimization: Enhance the quality of an initial prompt by refining its structure and content to match the target model's optimal input characteristics.

* Prompt Translation: Adapt prompts optimized for one model to work effectively with a different target model.

For the detailed documentation please see [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer).

## Getting Started

### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [1]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

# Step 0: Install packages and libraries

In [None]:
!wget https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/prompts/prompt_optimizer/vapo_lib.py
import vapo_lib

# Step 1: Create a prompt template and system instructions
Provide your system intruction and prompt template below. Refer to [here]( https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#template-si) for instructions.

Prompts consist of two key components:

- System Instruction: System instruction is the instruction that get passed to the model before any user input in the prompt. This is the fixed part of the prompt template shared across all queries for a given task.
- Prompt template: A task is the text in the prompt that you want the model to provide a response for. Context is information that you include in the prompt that the model uses or references when generating a response. These are the dynamic parts of the prompt template that changes based on the task.

Prompt Optimizer enables the optimization or translation of the System Instruction template, while the prompt template remains essential for evaluating and selecting the best System Instruction template.

In [None]:
SYSTEM_INSTRUCTION = (
    "Answer the following question. Let's think step by step.\n"  # @param
)
PROMPT_TEMPLATE = "Question: {question}\n\nAnswer: {target}"  # @param

# Step 2: Configure project settings
To optimize the prompt for your target Google model, provide a CSV or JSONL file containing labeled validation samples (input, ground truth output pairs). Refer to [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#prepare-sample-prompts) for instructions.

Focus on examples that specifically demonstrate the issues you want to address.
Recommendation: Use 50-100 distinct samples for reliable results. However, the tool can still be effective with as few as 5 samples.
For prompt translation (e.g. 3P model to Google model, PaLM 2 to Gemini):

Consider using the source model to label examples that the target model struggles with, helping to identify areas for improvement.
When you select a source model, you don't need to provide labels for the input examples.
While the source model selection is limited to Google models, it still supports labeled inputs from non-Google models. If you wish to select a non-Google source model, you will need to provide labels for your input examples.


In [None]:
# @markdown **Project setup**: <br/>
PROJECT_ID = "[YOUR_PROJECT]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
OUTPUT_PATH = "[OUTPUT_PATH]"  # @param {type:"string"}
INPUT_DATA_PATH = "[INPUT_DATA_PATH]"  # @param {type:"string"}

# Step 3: Configure optimization settings
The optimization configurations are defaulted to the values that are most commonly used, which we recommend using as the initial set-up.

The most important settings are:

* Target Model: Which model you are trying to optimize your prompts to.
* Optimization Mode: The mode in which you are trying to optimize your prompt with.
* Evaluation Metrics: The evaluation metrics in which you are trying to optimize your prompts against.
* Translation Source Field Name: fill in with the corresponding field name of the source text in the data if translation metrics like Comet or MetricX are selected. Otherwise, leave it as empty.

Note that all evaluation metrics are expected to have the larger-the-better property. Therefore, we have modified the MetricX value to between 0 (worst) and 25 (best).
Refer [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#configuration) to learn more about the different configuration settings and how to best utilize them.

In [None]:
TARGET_MODEL = "gemini-2.0-flash-001"  # @param ["gemini-2.0-flash-lite-001", "gemini-2.0-flash-001"]
OPTIMIZATION_MODE = "instruction_and_demo"  # @param ["instruction", "demonstration", "instruction_and_demo"]
EVAL_METRIC = "question_answering_correctness"  # @param ["bleu", "coherence", "comet", "exact_match", "fluency", "groundedness", "metricx", "text_quality", "verbosity", "rouge_1", "rouge_2", "rouge_l", "rouge_l_sum", "safety", "question_answering_correctness", "question_answering_quality", "summarization_quality", "tool_name_match", "tool_parameter_key_match", "tool_parameter_kv_match", "tool_call_valid"] {type:"string"}
TRANSLATION_SOURCE_FIELD_NAME = ""  # @param {type:"string"}

# Step 4: Configure advanced optimization settings [Optional]
Refer [here](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer#configuration) to learn more about the different configuration settings and how to best utilize them.

In [None]:
# @markdown **Instruction Optimization Configs**: <br/>
NUM_INST_OPTIMIZATION_STEPS = 10  # @param {type:"integer"}

# @markdown **Demonstration Optimization Configs**: <br/>
NUM_DEMO_OPTIMIZATION_STEPS = 10  # @param {type:"integer"}
NUM_DEMO_PER_PROMPT = 3  # @param {type:"integer"}

# @markdown **Model Configs**: <br/>
TARGET_MODEL_QPS = 3.0  # @param {type:"number"}
EVAL_QPS = 3.0  # @param {type:"number"}

# @markdown **Multi-metric Configs**: <br/>
# @markdown Use this section only if you need more than one metric for optimization. This will override the metric you picked above.
EVAL_METRIC_1 = "NA"  # @param ["NA", "bleu", "coherence", "comet", "exact_match", "fluency", "groundedness", "metricx", "text_quality", "verbosity", "rouge_1", "rouge_2", "rouge_l", "rouge_l_sum", "safety", "question_answering_correctness", "question_answering_quality", "summarization_quality", "tool_name_match", "tool_parameter_key_match", "tool_parameter_kv_match", "tool_call_valid"] {type:"string"}
EVAL_METRIC_1_WEIGHT = 0.0  # @param {type:"number"}
EVAL_METRIC_2 = "NA"  # @param ["NA", "bleu", "coherence", "comet", "exact_match", "fluency", "groundedness", "metricx", "text_quality", "verbosity", "rouge_1", "rouge_2", "rouge_l", "rouge_l_sum", "safety", "question_answering_correctness", "question_answering_quality", "summarization_quality", "tool_name_match", "tool_parameter_key_match", "tool_parameter_kv_match", "tool_call_valid"] {type:"string"}
EVAL_METRIC_2_WEIGHT = 0.0  # @param {type:"number"}
EVAL_METRIC_3 = "NA"  # @param ["NA", "bleu", "coherence", "comet", "exact_match", "fluency", "groundedness", "metricx", "text_quality", "verbosity", "rouge_1", "rouge_2", "rouge_l", "rouge_l_sum", "safety", "question_answering_correctness", "question_answering_quality", "summarization_quality", "tool_name_match", "tool_parameter_key_match", "tool_parameter_kv_match", "tool_call_valid"] {type:"string"}
EVAL_METRIC_3_WEIGHT = 0.0  # @param {type:"number"}
METRIC_AGGREGATION_TYPE = "weighted_sum"  # @param ["weighted_sum", "weighted_average"]

# @markdown **Misc Configs**: <br/>
PLACEHOLDER_TO_VALUE = "{}"  # @param
RESPONSE_MIME_TYPE = "text/plain"  # @param ["text/plain", "application/json", "text/x.enum"] {type:"string"}
RESPONSE_SCHEMA = ""
TARGET_LANGUAGE = "English"  # @param ["English", "French", "German", "Hebrew", "Hindi", "Italian", "Japanese", "Korean", "Portuguese", "Simplified Chinese", "Spanish", "Traditional Chinese"] {type:"string"}
TOOLS = ""  # @param
TOOL_CONFIG = ""  # @param

# Step 5: Run Prompt Optimizer
A progress bar will appear to let you know how long the job takes.

In [None]:
import datetime
import json
import time

timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
display_name = f"pt_{timestamp}"

label_enforced = vapo_lib.is_run_target_required(
    [
        EVAL_METRIC,
        EVAL_METRIC_1,
        EVAL_METRIC_2,
        EVAL_METRIC_3,
    ],
    "",
)
input_data_path = f"{INPUT_DATA_PATH}"
vapo_lib.validate_prompt_and_data(
    "\n".join([SYSTEM_INSTRUCTION, PROMPT_TEMPLATE]),
    input_data_path,
    PLACEHOLDER_TO_VALUE,
    label_enforced,
)

output_path = f"{OUTPUT_PATH}/{display_name}"

params = {
    "project": PROJECT_ID,
    "num_steps": NUM_INST_OPTIMIZATION_STEPS,
    "system_instruction": SYSTEM_INSTRUCTION,
    "prompt_template": PROMPT_TEMPLATE,
    "target_model": TARGET_MODEL,
    "target_model_qps": TARGET_MODEL_QPS,
    "target_model_location": LOCATION,
    "eval_qps": EVAL_QPS,
    "optimization_mode": OPTIMIZATION_MODE,
    "num_demo_set_candidates": NUM_DEMO_OPTIMIZATION_STEPS,
    "demo_set_size": NUM_DEMO_PER_PROMPT,
    "aggregation_type": METRIC_AGGREGATION_TYPE,
    "data_limit": 50,
    "input_data_path": input_data_path,
    "output_path": output_path,
    "response_mime_type": RESPONSE_MIME_TYPE,
    "response_schema": RESPONSE_SCHEMA,
    "language": TARGET_LANGUAGE,
    "placeholder_to_content": json.loads(PLACEHOLDER_TO_VALUE),
    "tools": TOOLS,
    "tool_config": TOOL_CONFIG,
    "translation_source_field_name": TRANSLATION_SOURCE_FIELD_NAME,
}

if EVAL_METRIC_1 == "NA":
    params["eval_metrics_types"] = [EVAL_METRIC]
    params["eval_metrics_weights"] = [1.0]
else:
    metrics = []
    weights = []
    for metric, weight in zip(
        [EVAL_METRIC_1, EVAL_METRIC_2, EVAL_METRIC_3],
        [EVAL_METRIC_1_WEIGHT, EVAL_METRIC_2_WEIGHT, EVAL_METRIC_3_WEIGHT],
    ):
        if metric == "NA":
            break
        metrics.append(metric)
        weights.append(weight)
    params["eval_metrics_types"] = metrics
    params["eval_metrics_weights"] = weights

job = vapo_lib.run_apd(params, OUTPUT_PATH, display_name)
print(f"Job ID: {job.name}")

progress_form = vapo_lib.ProgressForm(params)
while progress_form.monitor_progress(job):
    time.sleep(5)

# Step 6: Inspect the results
For a clearer look at the specific responses generated by each prompt template during the optimization process, use the cell below.
This will allow you to inspect all the predictions made by all the
generated templates during one or multiple vertex prompt optimizer runs.


In [None]:
from IPython.display import HTML, display

RESULT_PATH = "[OUTPUT_PATH]"  # @param {type:"string"}

results_ui = vapo_lib.ResultsUI(RESULT_PATH)

results_df_html = """
<style>
  .scrollable {
    width: 100%;
    height: 80px;
    overflow-y: auto;
    overflow-x: hidden;  /* Hide horizontal scrollbar */
  }
  tr:nth-child(odd) {
    background: var(--colab-highlighted-surface-color);
  }
  tr:nth-child(even) {
    background-color: var(--colab-primary-surface-color);
  }
  th {
    background-color: var(--colab-highlighted-surface-color);
  }
</style>
"""

display(HTML(results_df_html))
display(results_ui.get_container())