### <font color='#4285f4'>Overview</font>

- Demostrats how to use Veo 2 REST API for creating videos
- Uses Gemini to re-write your original text-to-video prompt and incorporates best pratices for generating genai-videos.

### <font color='#4285f4'>License</font>

```
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
```

Author: Adam Paternostro

In [None]:
from PIL import Image
from IPython.display import HTML
from IPython.display import Audio
from functools import reduce
import IPython.display
import google.auth
import requests
import json
import uuid
import base64
import os
import cv2
import random
import time
import datetime
import base64
import random
import datetime

### <font color='#4285f4'>Initialize</font>

In [None]:
# Set these (run this cell to verify the output)

bigquery_location = "${bigquery_location}"
region = "${region}"
location = "${location}"
storage_account = "${chocolate_ai_bucket}"

#bigquery_location = "us"
#region = "us-central1"
#location = "us-central1"
#storage_account = "your-bucket"

# Get the current date and time
now = datetime.datetime.now()

# Format the date and time as desired
formatted_date = now.strftime("%Y-%m-%d-%H-%M")

# Get some values using gcloud
project_id = !(gcloud config get-value project)
user = !(gcloud auth list --filter=status:ACTIVE --format="value(account)")

if len(project_id) != 1:
  raise RuntimeError(f"project_id is not set: {project_id}")
project_id = project_id[0]

if len(user) != 1:
  raise RuntimeError(f"user is not set: {user}")
user = user[0]

print(f"project_id = {project_id}")
print(f"user = {user}")

### <font color='#4285f4'>Pip installs</font>

In [None]:
# To read/write to/from Kafka
import sys

# https://pypi.org/project/moviepy/
# !{sys.executable} -m pip install moviepy

### <font color='#4285f4'>Helper Methods</font>

#### restAPIHelper
Calls the Google Cloud REST API using the current users credentials.

In [None]:
def restAPIHelper(url: str, http_verb: str, request_body: str) -> str:
  """Calls the Google Cloud REST API passing in the current users credentials"""

  import requests
  import google.auth
  import json

  # Get an access token based upon the current user
  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request()
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
    "Content-Type" : "application/json",
    "Authorization" : "Bearer " + access_token
  }

  if http_verb == "GET":
    response = requests.get(url, headers=headers)
  elif http_verb == "POST":
    response = requests.post(url, json=request_body, headers=headers)
  elif http_verb == "PUT":
    response = requests.put(url, json=request_body, headers=headers)
  elif http_verb == "PATCH":
    response = requests.patch(url, json=request_body, headers=headers)
  elif http_verb == "DELETE":
    response = requests.delete(url, headers=headers)
  else:
    raise RuntimeError(f"Unknown HTTP verb: {http_verb}")

  if response.status_code == 200:
    return json.loads(response.content)
    #image_data = json.loads(response.content)["predictions"][0]["bytesBase64Encoded"]
  else:
    error = f"Error restAPIHelper -> ' Status: '{response.status_code}' Text: '{response.text}'"
    raise RuntimeError(error)

#### generateVideo
Generates the video and waits for it to complete.  Saves the prompt with the video and returns the file name.

In [None]:
def generateVideo(prompt, storage_account, output_gcs_path):
  """Calls text-to-video to create the video and waits for the output (which can be several minutes).  Saves the prompt/parameters with the vidoe.  Returns the outputted path."""

  full_output_gcs_path = f"gs://{storage_account}/{output_gcs_path}"
  model = "veo-2.0-generate-001"
  url = f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/publishers/google/models/{model}:predictLongRunning"

  request_body = {
      "instances": [
          {
              "prompt": prompt
          }
        ],
      "parameters": {
          "storageUri": full_output_gcs_path,
          "aspectRatio":"16:9"
          }
      }

  rest_api_parameters = request_body.copy()

  print(f"url: {url}")
  print(f"request_body: {request_body}")
  json_result = restAPIHelper(url, "POST", request_body)
  print(f"json_result: {json_result}")
  operation_name = json_result["name"] # odd this is name

  url = f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/publishers/google/models/{model}:fetchPredictOperation"

  request_body = {
      "operationName": operation_name
      }

  status = False
  # {
  # "name": "projects/chocolate-ai-demo-xxxxxx/locations/us-central1/publishers/google/models/veo-2.0-generate-001/operations/6d737b7c-5824-4f44-bc58-2e8d8226d2c2",
  # "done": True,
  # "response": {
  #      "@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse",
  #      "raiMediaFilteredCount": 0,
  #      "videos": [
  #          {
  #              "gcsUri": "gs: //chocolate-ai-data-xxxxxx/text-to-video/text-to-video-2025-04-15-13-59/9874965778463625250/sample_0.mp4",
  #              "mimeType": "video/mp4"
  #          }
  #      ]
  #  }
  # }

  while status == False:
    time.sleep(10)
    print(f"url: {url}")
    print(f"request_body: {request_body}")
    json_result = restAPIHelper(url, "POST", request_body)
    print(f"json_result: {json_result}")
    if "done" in json_result:
      status = bool(json_result["done"]) # in the future might be a status of running
    else:
      print("Status 'done' JSON attribute not present.  Assuming not done...")

  # Get the filename of our video
  filename = json_result["response"]["videos"][0]["gcsUri"]

  # Save our prompt (this was we know what we used to generate the video)
  json_filename = "text-to-video-prompt.json"
  with open(json_filename, "w") as f:
    f.write(json.dumps(rest_api_parameters))

  # get the random number directory from text-to-video
  text_to_video_output_directory = filename.replace(full_output_gcs_path,"")
  text_to_video_output_directory = text_to_video_output_directory.split("/")[1]
  text_to_video_output_directory

  # Write the prompt to the same path as our outputted video.  Saving the prompt allow us to know how to regenerate it (you should also save the seed and any other settings)
  copy_file_to_gcs(json_filename, storage_account, f"{output_gcs_path}/{text_to_video_output_directory}/{json_filename}")
  delete_file(json_filename)

  return filename

#### Gemini

In [None]:
def GeminiLLM(prompt, model = "gemini-2.0-flash", response_schema = None,
                 temperature = 1, topP = 1, topK = 32):

  # https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#supported_models
  # gemini-2.0-flash

  llm_response = None
  if temperature < 0:
    temperature = 0

  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request() # required to acess access token
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
      "Content-Type" : "application/json",
      "Authorization" : "Bearer " + access_token
  }

  # https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
  url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers/google/models/{model}:generateContent"

  generation_config = {
    "temperature": temperature,
    "topP": topP,
    "maxOutputTokens": 8192,
    "candidateCount": 1,
    "responseMimeType": "application/json",
  }

  # Add inthe response schema for when it is provided
  if response_schema is not None:
    generation_config["responseSchema"] = response_schema

  if model == "gemini-pro" or model == "gemini-1.0-pro" or model == "gemini-1.0-pro-vision-001":
    generation_config["topK"] = topK

  payload = {
    "contents": {
      "role": "user",
      "parts": {
          "text": prompt
      },
    },
    "generation_config": {
      **generation_config
    },
    "safety_settings": {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "threshold": "BLOCK_LOW_AND_ABOVE"
    }
  }

  response = requests.post(url, json=payload, headers=headers)

  if response.status_code == 200:
    try:
      json_response = json.loads(response.content)
    except Exception as error:
      raise RuntimeError(f"An error occurred parsing the JSON: {error}")

    if "candidates" in json_response:
      candidates = json_response["candidates"]
      if len(candidates) > 0:
        candidate = candidates[0]
        if "content" in candidate:
          content = candidate["content"]
          if "parts" in content:
            parts = content["parts"]
            if len(parts):
              part = parts[0]
              if "text" in part:
                text = part["text"]
                llm_response = text
              else:
                raise RuntimeError("No text in part: {response.content}")
            else:
              raise RuntimeError("No parts in content: {response.content}")
          else:
            raise RuntimeError("No parts in content: {response.content}")
        else:
          raise RuntimeError("No content in candidate: {response.content}")
      else:
        raise RuntimeError("No candidates: {response.content}")
    else:
      raise RuntimeError("No candidates: {response.content}")

    # Remove some typically response characters (if asking for a JSON reply)
    llm_response = llm_response.replace("```json","")
    llm_response = llm_response.replace("```","")
    llm_response = llm_response.replace("\n","")

    return llm_response

  else:
    raise RuntimeError(f"Error with prompt:'{prompt}'  Status:'{response.status_code}' Text:'{response.text}'")

#### GCS file helpers

In [None]:
# This was generated by GenAI

def copy_file_to_gcs(local_file_path, bucket_name, destination_blob_name):
  """Copies a file from a local drive to a GCS bucket.

  Args:
      local_file_path: The full path to the local file.
      bucket_name: The name of the GCS bucket to upload to.
      destination_blob_name: The desired name of the uploaded file in the bucket.

  Returns:
      None
  """

  import os
  from google.cloud import storage

  # Ensure the file exists locally
  if not os.path.exists(local_file_path):
      raise FileNotFoundError(f"Local file '{local_file_path}' not found.")

  # Create a storage client
  storage_client = storage.Client()

  # Get a reference to the bucket
  bucket = storage_client.bucket(bucket_name)

  # Create a blob object with the desired destination path
  blob = bucket.blob(destination_blob_name)

  # Upload the file from the local filesystem
  content_type = ""
  if local_file_path.endswith(".html"):
    content_type = "text/html; charset=utf-8"

  if local_file_path.endswith(".json"):
    content_type = "application/json; charset=utf-8"

  if content_type == "":
    blob.upload_from_filename(local_file_path)
  else:
    blob.upload_from_filename(local_file_path, content_type = content_type)

  print(f"File '{local_file_path}' uploaded to GCS bucket '{bucket_name}' as '{destination_blob_name}.  Content-Type: {content_type}'.")

In [None]:
def download_from_gcs(destination_file_name, gcs_storage_bucket, object_name):
  # prompt: Write python code to download a blob from a gcs bucket.  do not use the requests method

  from google.cloud import storage
  storage_client = storage.Client()
  bucket = storage_client.bucket(gcs_storage_bucket)

  # Construct a client side representation of a blob.
  # Note `Bucket.blob` differs from `Bucket.get_blob` as it doesn't retrieve
  # any content from Google Cloud Storage. As we don't need additional data,
  # using `Bucket.blob` is preferred here.
  blob = bucket.blob(object_name)
  blob.download_to_filename(destination_file_name)

  print(
      "Downloaded storage object {} from bucket {} to local file {}.".format(
          object_name, gcs_storage_bucket, destination_file_name
      )
  )

In [None]:
# prompt: python to delete a file even if it does not exist

def delete_file(filename):
  try:
    os.remove(filename)
    print(f"File '{filename}' deleted successfully.")
  except FileNotFoundError:
    print(f"File '{filename}' not found.")

### <font color='#4285f4'>Teach the LLM how to write text-to-video prompts</font>

In [None]:
# We need to tell the LLM how to write text-to-video prompts

text_to_video_prompt_guide = """
Text-to-Video Prompt Writing Help:
<text-to-video-prompt-guide>
Here are some our best practices for text-to-video prompts:

Detailed prompts = better videos:
  - More details you add, the more control you’ll have over the video.
  - A prompt should look like this: "Camera dollies to show a close up of a desperate man in a green trench coat is making a call on a rotary style wall-phone, green neon light, movie scene."
    - Here is a break down of elements need to create a text-to-video prompt using the above prompt as an example:
      - "Camera dollies to show" = "Camera Motion"
      - "A close up of" = "Composition"
      - "A desperate man in a green trench coat" = "Subject"
      - "Is making a call" = "Action"
      - "On a roary style wall-phone" = "Scene"
      - "Green Neon light" = "Ambiance"
      - "Movie Scene" = "Style"

Use the right keywords for better control:
  - Here is a list of some keywords that work well with text-to-video, use these in your prompts to get the desired camera motion or style.
  - Subject: Who or what is the main focus of the shot.  Example: "happy woman in her 30s".
  - Scene: Where is the location of the shot. Example "on a busy street, in space".
  - Action: What is the subject doing Examples: "walking", "running", "turning head".
  - Camera Motion: What the camera is doing. Example: "POV shot", "Aerial View", "Tracking Drone view", "Tracking Shot".

Example text-to-video prompt using the above keywords:
  - Example text-to-video prompt: "Tracking drone view of a man driving a red convertible car in Palm Springs, 1970s, warm sunlight, long shadows"
  - Example text-to-video prompt: "A POV shot from a vintage car driving in the rain, Canada at night, cinematic"

Styles:
   - Overall aesthetic. Consider using specific film style keywords.  Examples: "horror film", "film noir, "animated styles", "3D cartoon style render".
  - Example text-to-video prompt: "Over the shoulder of a young woman in a car, 1970s, film grain, horror film, cinematic he Film noir style, man and woman walk on the street, mystery, cinematic, black and white"
  - Example text-to-video prompt: "A cute creatures with snow leopard-like fur is walking in winter forest, 3D cartoon style render. An architectural rendering of a white concrete apartment building with flowing organic shapes, seamlessly blending with lush greenery and futuristic elements."

Composition:
  - How the shot is framed. This is often relative to the subject e.g. wide shot, close-up, low angle
  - Example text-to-video prompt: "Extreme close-up of a an eye with city reflected in it. A wide shot of surfer walking on a beach with a surfboard, beautiful sunset, cinematic"

Ambiance & Emotions:
  - How the color and light contribute to the scene (blue tones, night)
  - Example text-to-video prompt: "A close-up of a girl holding adorable golden retriever puppy in the park, sunlight Cinematic close-up shot of a sad woman riding a bus in the rain, cool blue tones, sad mood"

Cinematic effects:
  - e.g. double exposure, projected, glitch camera effect.
  - Example text-to-video prompt: "A double exposure of silhouetted profile of a woman walking and lake, walking in a forest Close-up shot of a model with blue light with geometric shapes projected on her face"
  - Example text-to-video prompt: "Silhouette of a man walking in collage of cityscapes Glitch camera effect, close up of woman’s face speaking, neon colors"
</text-to-video-prompt-guide>
"""

### <font color='#4285f4'>Text-to-Video Example</font>
- Provide a basic text-to-video prompt
- Use Gemini to rewrite the text-to-video prompt incorporating best pratices
- Provide Gemini with instructions for text-to-video
- Call Gemini
- Run the enhanced prompt

In [None]:
# Write me the json in  OpenAPI 3.0 schema object for the below object.
# Make all fields required.
#  {
#    "text-to-video-prompt" : "text"
#  }
response_schema = {
  "type": "object",
  "required": [
    "text-to-video-prompt"
  ],
  "properties": {
    "text-to-video-prompt": {
      "type": "string"
    }
  }
}

# ---> Enter your original prompt here <---
original_prompt = "a cat watching tv"

gemini_prompt = f"""Rewrite the following "original prompt" using the text-to-video instructions below.
You want the video to be creative and artistic..

Original Prompt:
"{original_prompt}"

Output Fields:
- "text-to-video-prompt":

Instructions:
 - Read the  "Text-to-Video Prompt Writing Help" to learn more about how to create good text-to-video prompts.
 - Make sure you include all the relevant best practices when creating the text-to-video prompt.
 - Do not include "text overlays" in the text-to-video prompt.
 - Do not include children in the text-to-video prompt.

{text_to_video_prompt_guide}
"""

print(gemini_prompt)
llm_result = GeminiLLM(gemini_prompt, response_schema=response_schema)
gemini_results_dict = json.loads(llm_result)

print()
print(json.dumps(gemini_results_dict))

In [None]:
# Text-to-Video Parameters
# prompt = "A customer smiles while taking a bite of an exquisite dessert, eyes closed in pure enjoyment, soft focus background, warm lighting" # hardcoded example
prompt = gemini_results_dict["text-to-video-prompt"]
output_gcs_path = f"text-to-video/text-to-video-{formatted_date}"

# Generate the video and wait for it to complete.  The prompt / text-to-video parameters will be saved in the same storage location as the video.
filename = generateVideo(prompt, storage_account, output_gcs_path)
print(f"filename: {filename}")

In [None]:
# download the video and play it
download_from_gcs("text-to-video.mp4", storage_account, filename.replace(f"gs://{storage_account}/",""))

video_mp4 = open("text-to-video.mp4", 'rb').read()
video_url = "data:video/mp4;base64," + base64.b64encode(video_mp4).decode()

In [None]:
# 16:9 aspect ratio
HTML(f"""
<p>Text-to-Video (no audio)</p>
<video width=600 height=337 controls>
      <source src="{video_url}" type="video/mp4">
</video>
""")

### <font color='#4285f4'>Clean Up</font>

In [None]:
# Placeholder

### <font color='#4285f4'>Reference Links</font>


- [Google.com](https://www.google.com)