build.py (294 lines of code) (raw):

# Copyright 2025 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # pylint: disable=C0301, R1714, R0917, W0719, W0718, W0621, W1510 """Builds and deploys a custom agent based on the user configuration The account running this script should have the following recommended roles: - roles/serviceusage.serviceUsageAdmin - roles/resourcemanager.projectIamAdmin - roles/iam.serviceAccountAdmin - roles/iam.serviceAccountUser - roles/storage.admin - roles/artifactregistry.admin - roles/run.admin - roles/cloudbuild.builds.editor """ import os import re import subprocess import sys from uuid import uuid4 # TODO: Install pip install poetry and run poetry install from google.cloud import discoveryengine from google.api_core.client_options import ClientOptions import vertexai import yaml def read_yaml_file(filepath: str) -> dict: """Reads a yaml and returns file contents as a dict. Defaults to utf-8 encoding. Args: filepath (str): Path to the yaml. Returns: dict: Contents of the yaml. Raises: Exception: If an error is encountered reading the file. """ try: with open(filepath, "r", encoding="utf-8") as file: file_dict = yaml.safe_load(file) file.close() except yaml.YAMLError as err: raise yaml.YAMLError(f"Error reading file. {err}") from err return file_dict ENV_TAG = "dev" DEPLOY_TO_AGENT_ENGINE = False # Cloud Run services config. BACKEND_PATH = "Runtime_env" BACKEND_CONFIG_FILE = f"{BACKEND_PATH}/deployment/config/{ENV_TAG}.yaml" BACKEND_BUILD_FILE = f"{BACKEND_PATH}/deployment/cd/{ENV_TAG}.yaml" FRONTEND_PATH = "ChatbotUI" FRONTEND_CONFIG_FILE = f"{FRONTEND_PATH}/src/environments/environment.ts" FRONTEND_BUILD_FILE = f"{FRONTEND_PATH}/deployment/cd/{ENV_TAG}.yaml" backend_yaml_config = read_yaml_file(BACKEND_CONFIG_FILE) # Check that keys exist required_keys = [ "PROJECT_ID", "VERTEX_AI_LOCATION", "AGENT_FOUNDATION_MODEL", "AGENT_INDUSTRY_TYPE", "AGENT_ORCHESTRATION_FRAMEWORK", "USER_AGENT", "AGENT_DESCRIPTION", "AGENT_BUILDER_LOCATION" ] if not all(key in backend_yaml_config for key in required_keys): raise KeyError(f"Missing Required keys in {BACKEND_CONFIG_FILE}. Required Keys: {required_keys}") # Grab env vars previously set by user PROJECT_ID = backend_yaml_config["PROJECT_ID"] REGION = backend_yaml_config["VERTEX_AI_LOCATION"] AGENT_FOUNDATION_MODEL = backend_yaml_config["AGENT_FOUNDATION_MODEL"] AGENT_INDUSTRY_TYPE = backend_yaml_config["AGENT_INDUSTRY_TYPE"] AGENT_ORCHESTRATION_FRAMEWORK = backend_yaml_config["AGENT_ORCHESTRATION_FRAMEWORK"] AGENT_NAME = backend_yaml_config["USER_AGENT"] AGENT_DESCRIPTION = backend_yaml_config["AGENT_DESCRIPTION"] DATA_STORE_LOCATION = backend_yaml_config["AGENT_BUILDER_LOCATION"] # Terraform Constants. TERRAFORM_DIRECTORY = f"{BACKEND_PATH}/deployment/terraform" TERRAFORM_VAR_FILE = "vars/env.tfvars" # GCP resources constants. ARTIFACT_REGISTRY_REPOSITORY = f"{PROJECT_ID.lower().replace(' ', '-')}-{AGENT_NAME.lower().replace(' ', '-')}-repository" CLOUD_RUN_BACKEND_SERVICE_NAME = AGENT_NAME.lower().replace(" ", "-") + "-backend" CLOUD_RUN_FRONTEND_SERVICE_NAME = AGENT_NAME.lower().replace(" ", "-") + "-frontend" DATASTORE_INDUSTRY_SOURCES_MAP = { "finance": "gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/*.pdf", "healthcare": "gs://cloud-samples-data/vertex-ai/medlm/primock57/transcripts/*.txt", "retail": "gs://cloud-samples-data/dialogflow-cx/google-store/*.html", } DATA_STORE_ID = f"agent_smithy_data_store_{uuid4()}" DATA_STORE_NAME = f"{PROJECT_ID.lower().replace(' ', '-')}-{AGENT_NAME.lower().replace(' ', '-')}-datastore" SEARCH_APP_ENGINE_ID = f"agent_smithy_search_engine_{uuid4()}" GCS_STAGING_BUCKET = f"gs://{PROJECT_ID.lower().replace(' ', '-')}-agents-staging" vertexai.init( project=PROJECT_ID, location=REGION, staging_bucket=GCS_STAGING_BUCKET ) def deploy_terraform_infrastructure(directory: str, variables_file: str): init_terraform_command = ["terraform", f"-chdir={directory}", "init"] apply_terraform_command = ["terraform", f"-chdir={directory}", "apply", "--var-file", variables_file] search_and_replace_file(f"{directory}/{variables_file}", r"project_id = \"(.*?)\"", f'project_id = "{PROJECT_ID}"') search_and_replace_file(f"{directory}/{variables_file}", r"region = \"(.*?)\"", f'region = "{REGION}"') search_and_replace_file(f"{directory}/{variables_file}", r"agent_name = \"(.*?)\"", f'agent_name = "{AGENT_NAME}"') search_and_replace_file(f"{directory}/{variables_file}", r"vertex_ai_staging_bucket = \"(.*?)\"", f'vertex_ai_staging_bucket = "{GCS_STAGING_BUCKET.split("/")[2]}"') search_and_replace_file(f"{directory}/{variables_file}", r"artifact_registry_repo_name = \"(.*?)\"", f'artifact_registry_repo_name = "{ARTIFACT_REGISTRY_REPOSITORY}"') search_and_replace_file(f"{directory}/{variables_file}", r"backend_cloud_run_service_name = \"(.*?)\"", f'backend_cloud_run_service_name = "{CLOUD_RUN_BACKEND_SERVICE_NAME}"') search_and_replace_file(f"{directory}/{variables_file}", r"frontend_cloud_run_service_name = \"(.*?)\"", f'frontend_cloud_run_service_name = "{CLOUD_RUN_FRONTEND_SERVICE_NAME}"') subprocess.run(init_terraform_command, check=True) subprocess.run(apply_terraform_command, check=True) def create_data_store() -> str: client_options = ( ClientOptions(api_endpoint=f"{DATA_STORE_LOCATION}-discoveryengine.googleapis.com") if DATA_STORE_LOCATION != "global" else None ) # Create a client client = discoveryengine.DataStoreServiceClient(client_options=client_options) parent = client.collection_path( project=PROJECT_ID, location=DATA_STORE_LOCATION, collection="default_collection", ) data_store = discoveryengine.DataStore( display_name=DATA_STORE_NAME, industry_vertical=discoveryengine.IndustryVertical.GENERIC, solution_types=[discoveryengine.SolutionType.SOLUTION_TYPE_SEARCH], content_config=discoveryengine.DataStore.ContentConfig.CONTENT_REQUIRED, ) request = discoveryengine.CreateDataStoreRequest( parent=parent, data_store_id=DATA_STORE_ID, data_store=data_store, ) operation = client.create_data_store(request=request) print(f"Waiting for operation to complete: {operation.operation.name}") operation.result() def populate_data_store(industry: str): client_options = ( ClientOptions(api_endpoint=f"{DATA_STORE_LOCATION}-discoveryengine.googleapis.com") if DATA_STORE_LOCATION != "global" else None ) # Create a client client = discoveryengine.DocumentServiceClient(client_options=client_options) parent = client.branch_path( project=PROJECT_ID, location=DATA_STORE_LOCATION, data_store=DATA_STORE_ID, branch="default_branch", ) request = discoveryengine.ImportDocumentsRequest( parent=parent, gcs_source=discoveryengine.GcsSource( input_uris=[DATASTORE_INDUSTRY_SOURCES_MAP[industry]], data_schema="content", ), reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL, ) operation = client.import_documents(request=request) print(f"Import operation will keep on running on the background: {operation.operation.name}") def create_search_app() -> str: client_options = ( ClientOptions(api_endpoint=f"{DATA_STORE_LOCATION}-discoveryengine.googleapis.com") if DATA_STORE_LOCATION != "global" else None ) # Create a client client = discoveryengine.EngineServiceClient(client_options=client_options) # The full resource name of the collection # e.g. projects/{project}/locations/{location}/collections/default_collection parent = client.collection_path( project=PROJECT_ID, location=DATA_STORE_LOCATION, collection="default_collection", ) engine = discoveryengine.Engine( display_name=SEARCH_APP_ENGINE_ID, industry_vertical=discoveryengine.IndustryVertical.GENERIC, solution_type=discoveryengine.SolutionType.SOLUTION_TYPE_SEARCH, search_engine_config=discoveryengine.Engine.SearchEngineConfig( search_tier=discoveryengine.SearchTier.SEARCH_TIER_ENTERPRISE, search_add_ons=[discoveryengine.SearchAddOn.SEARCH_ADD_ON_LLM], ), data_store_ids=[DATA_STORE_ID], ) request = discoveryengine.CreateEngineRequest( parent=parent, engine=engine, engine_id=SEARCH_APP_ENGINE_ID, ) operation = client.create_engine(request=request) print(f"Waiting for operation to complete: {operation.operation.name}") operation.result() def run_agent_engine_deployment() -> str: # TODO figure out a better way to dynamically get these env after they are written navigate_to_directory(BACKEND_PATH) sys.path.insert(0, os.getcwd()) from app.orchestration.server_utils import get_agent_from_config from app.utils.utils import deploy_agent_to_agent_engine agent_manager = get_agent_from_config( agent_orchestration_framework=AGENT_ORCHESTRATION_FRAMEWORK, agent_foundation_model=AGENT_FOUNDATION_MODEL, industry_type=AGENT_INDUSTRY_TYPE ) remote_agent = None if AGENT_ORCHESTRATION_FRAMEWORK == "llamaindex_agent": remote_agent = deploy_agent_to_agent_engine( agent_manager, AGENT_NAME, AGENT_DESCRIPTION ) elif AGENT_ORCHESTRATION_FRAMEWORK == "langgraph_vertex_ai_agent_engine_agent" or AGENT_ORCHESTRATION_FRAMEWORK == "langchain_vertex_ai_agent_engine_agent": remote_agent = deploy_agent_to_agent_engine( agent_manager.agent_executor, AGENT_NAME, AGENT_DESCRIPTION ) if not remote_agent.resource_name: raise Exception("Error deploying Agent to Agent Engine.") try: # If AGENT_ENGINE_RESOURCE_ID is set, then the agent will query the remote agent with open(BACKEND_CONFIG_FILE.replace(f"{BACKEND_PATH}/", ""), "a", encoding="utf-8") as f: f.write(f"\nAGENT_ENGINE_RESOURCE_ID: {remote_agent.resource_name}\n") f.close() except FileNotFoundError: print(f"`{BACKEND_CONFIG_FILE.replace(f'{BACKEND_PATH}/', '')}` file not found.") navigate_to_directory(".") # Retrieve the project number associated with your project ID project_number = subprocess.run( ["gcloud", "projects", "describe", PROJECT_ID, "--format=value(projectNumber)"], check=True, capture_output=True, text=True ).stdout.strip() # Add Discovery Engine Editor to the Agent Engine Service account iam_command = [ "gcloud", "projects", "add-iam-policy-binding", PROJECT_ID, f"--member=serviceAccount:service-{project_number}@gcp-sa-aiplatform-re.iam.gserviceaccount.com", "--role=roles/discoveryengine.editor", "--no-user-output-enabled" ] subprocess.run(iam_command, check=True) return remote_agent.resource_name def get_cloud_run_url(region: str, service_name: str) -> str: try: describe = subprocess.run(["gcloud", "run", "services", "describe", service_name, "--region", region], capture_output=True, text=True) if describe.returncode == 0: url_match = re.search(r"\s+URL:\s+(.*?)\n", describe.stdout) if url_match: url = url_match.group(1) return url else: print("URL not found in the output.") return "" else: print("Cloud run service does not exist.") print("Error describing service (non-zero exit code):") print(f"Stdout: {describe.stdout}") print(f"Stderr: {describe.stderr}") return "" except Exception as e: # Catch any other potential errors print(f"An unexpected error occurred: {e}") return "" def configure_backend(gcs_bucket: str, datastore_id: str, frontend_url: str, config_file: str): search_and_replace_file(config_file, r"GCS_STAGING_BUCKET:\s(.*?)*\n", f"GCS_STAGING_BUCKET: {gcs_bucket}\n") search_and_replace_file(config_file, r"DATA_STORE_ID:\s(.*?)*\n", f"DATA_STORE_ID: {datastore_id}\n") search_and_replace_file(config_file, r"FRONTEND_URL:\s(.*?)*\n", f"FRONTEND_URL: {frontend_url}\n") def configure_frontend(agent_name: str, backend_url: str, env_tag: str, config_file: str): search_and_replace_file(config_file, r"const env: string = \"(.*?)\"", f'const env: string = "{env_tag}"') search_and_replace_file(config_file, r"backendURL = \"(.*?)\"", f'backendURL = "{backend_url}/"') search_and_replace_file(config_file, r"chatbotName = \"(.*?)\"", f'chatbotName = "{agent_name}"') def build_and_deploy_cloud_run( project_id: str, region: str, container_name: str, artifact_registry_name: str, service_name: str, build_file_location: str, is_backend: bool, ): push_command = [ "gcloud", "builds", "submit", "--config", build_file_location, "--substitutions", f"_PROJECT_ID={project_id},_REGION={region},_CONTAINER_NAME={container_name},_ARTIFACT_REGISTRY_REPO_NAME={artifact_registry_name},_SERVICE_NAME={service_name}", BACKEND_PATH if is_backend else FRONTEND_PATH ] subprocess.run(push_command, check=True) def navigate_to_directory(directory: str): os.chdir(os.path.dirname(os.path.abspath(__file__)) + f"/{directory}") def search_and_replace_file(file_path: str, search_pattern: str, new_line: str): try: with open(file_path, "r", encoding="utf-8") as f: file_content = f.read() updated_content = re.sub(search_pattern, new_line, file_content) with open(file_path, "w", encoding="utf-8") as f: f.write(updated_content) f.close() except FileNotFoundError: print(f"`{file_path}` file not found.") if __name__ == "__main__": # TODO: Set arguments for calling the script #if len(sys.argv) < 2: # print("Usage: python3 local_deploy.py action (e.g action = (clone, run, redeploy))") # exit(1) deploy_terraform_infrastructure(TERRAFORM_DIRECTORY, TERRAFORM_VAR_FILE) create_data_store() populate_data_store(AGENT_INDUSTRY_TYPE) create_search_app() # Build and deploy BE Service. frontend_url = get_cloud_run_url(REGION, CLOUD_RUN_FRONTEND_SERVICE_NAME) configure_backend( GCS_STAGING_BUCKET, DATA_STORE_ID, frontend_url, BACKEND_CONFIG_FILE ) if DEPLOY_TO_AGENT_ENGINE: run_agent_engine_deployment() build_and_deploy_cloud_run( PROJECT_ID, REGION, "agent_runtime", ARTIFACT_REGISTRY_REPOSITORY, CLOUD_RUN_BACKEND_SERVICE_NAME, BACKEND_BUILD_FILE, True ) # Build and deploy FE Service. backend_url = get_cloud_run_url(REGION, CLOUD_RUN_BACKEND_SERVICE_NAME) configure_frontend(AGENT_NAME, backend_url, ENV_TAG, FRONTEND_CONFIG_FILE) build_and_deploy_cloud_run( PROJECT_ID, REGION, "chatbot_ui", ARTIFACT_REGISTRY_REPOSITORY, CLOUD_RUN_FRONTEND_SERVICE_NAME, FRONTEND_BUILD_FILE, False )