In [None]:
# Copyright 2025 DeepMind Technologies Limited. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/google/genai-processors/blob/main/notebooks/research_example.ipynb)

# üìñ Research Agent Example

This notebook demonstrates how to build a research agent using the modular components of the `genai-processors` library.

You will see how a complex task, like researching a topic, can be broken down into a series of smaller, reusable processors. We will explore each component individually and then combine them into a powerful, automated research pipeline.

In this notebook, we will cover:

*   **Setup**: Getting your environment ready and configuring an API key.
*   **Individual Processors**: Exploring the building blocks `TopicGenerator`, `TopicResearcher`, and `TopicVerbalizer`.
*   **Chaining**: Learning how to connect processors to create a seamless workflow.
*   **The Complete Agent**: Running the `ResearchAgent` to see how all the pieces come together for an end-to-end task.

Note: You will need to authorize colab to use your GitHub account, in order to import the research example.

## üç≥ Setup

In [None]:
# {display-mode: "form"}
# @markdown Run this cell to import libraries and perform initializations.

# NOTE: we need to import the code from GitHub since it is not included in the
# PyPI package.

import os

try:
  !pip install genai-processors --q
  !git clone --filter=blob:none --q --no-checkout --depth 1 https://github.com/google-gemini/genai-processors.git /content/temp_repo
  %cd /content/temp_repo
  !git sparse-checkout set --no-cone examples/research
  !git checkout
  %cd /content

  downloaded_subdir_path = "/content/temp_repo/examples/research"
  if not os.path.exists(downloaded_subdir_path):
    print("Download failed using 'git sparse-checkout")
  else:
    print("‚úÖ Successfully downloaded 'research' examples.")

  !mv /content/temp_repo/examples/research /content/research_latest
  !rm -rf /content/temp_repo

  import shutil
  import genai_processors

  package_path = genai_processors.__path__[0]
  examples_path = os.path.join(package_path, "examples")
  installed_research_path = os.path.join(examples_path, "research")

  if os.path.exists(installed_research_path):
    shutil.rmtree(installed_research_path)

  shutil.move("/content/research_latest", installed_research_path)
  print("‚úÖ Successfully replaced 'research' examples with the latest version.")

except Exception as e:
  print(f"An error occurred during import: {e}")

from genai_processors import content_api
from genai_processors import processor
from genai_processors import streams
from genai_processors.core import jinja_template
from genai_processors.examples import research
from google.colab import userdata
from IPython.display import Markdown, display

ProcessorPart = processor.ProcessorPart


def render_part(part: ProcessorPart) -> None:
  if part.substream_name == "status":
    display(Markdown(f"--- \n *Status*: {part.text}"))
  else:
    try:
      display(Markdown(part.text))
    except Exception:
      display(Markdown(f" {part.text} "))

## üîê Auth

To use the research processors, you will need an API key. If you have not
done so already, obtain your API key from Google AI Studio, and import it as a
secret in Colab (recommended) or directly set it below.

In [None]:
GOOGLE_API_KEY = None

try:
  GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
except Exception:
  print('Failed to obtain `GOOGLE_API_KEY`.')

## üèó Processors

In [None]:
USER_PROMPT = "Research the best things about owning dalmatians!"  # @param { "type": "string" }

### ‚úç `TopicGenerator`

The `TopicGenerator` processor generates a list of research topics, given the user's content.

In [None]:
p_generator = research.TopicGenerator(api_key=GOOGLE_API_KEY)

topic_parts = []
input_stream = streams.stream_content([ProcessorPart(USER_PROMPT)])
async for content_part in p_generator(input_stream):
  if content_part.mimetype == 'application/json; type=Topic':
    topic_parts.append(content_part)
  else:
    render_part(content_part)

### üîç `TopicResearcher`


Next, we add `TopicResearcher` to `TopicGenerator` to generate `Topic` objects.

In [None]:
topics = []
p_researcher = research.TopicResearcher(api_key=GOOGLE_API_KEY)

pipeline = p_generator + p_researcher

input_stream = streams.stream_content([ProcessorPart(USER_PROMPT)])
async for content_part in pipeline(input_stream):
  if content_part.mimetype == 'application/json; type=Topic':
    topics.append(content_part.get_dataclass(research.interfaces.Topic))
  elif content_part.substream_name == 'status':
    render_part(content_part)

print(f'Pipeline produced {len(topics)} `Topic` `ProcessorParts`:\n\n')

for t in topics:
  print(t)

### üó£ `TopicVerbalizer`

A Jinja2 processor is used to convert `TopicResearch` parts into human-readable research text.

In [None]:
p_verbalizer = jinja_template.RenderDataClass(
    template_str=(
        "## {{ data.topic }}\n"
        "*{{ data.relationship_to_user_content }}*"
        "{% if data.research_text|trim != '' %}"
        "\n\n### Research\n\n{{ data.research_text }}"
        "{% endif %}"
    ),
    data_class=research.interfaces.Topic,
)

pipeline = p_generator + p_researcher + p_verbalizer

input_stream = streams.stream_content([ProcessorPart(USER_PROMPT)])
async for content_part in pipeline(input_stream):
  # We exclude printing status to demonstrate the verbalization.
  if content_part.substream_name != "status":
    render_part(content_part)

## ü§ñ Agent

Now we have all our building blocks, we can chain these together inside our agent, resulting in a seamless flow of Content.

In [None]:
input_stream = streams.stream_content([ProcessorPart(USER_PROMPT)])

output_parts = content_api.ProcessorContent()
async for content_part in research.ResearchAgent(api_key=GOOGLE_API_KEY)(input_stream):
  if content_part.substream_name == 'status':
    render_part(content_part)
  output_parts += content_part

render_part(ProcessorPart(f"""# Final synthesized research

{content_api.as_text(output_parts, substream_name='')}"""))