backend-apis/deployment_scripts/vertex_search_operations.py (127 lines of code) (raw):
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
""" Vertex Search datastores creation """
import argparse
from typing import Optional
from google.api_core.client_options import ClientOptions
from google.cloud import discoveryengine_v1alpha as discoveryengine
def import_documents(
project_id: str,
location: str,
data_store_id: str,
gcs_uri: Optional[str] = None,
) -> str:
"""
Import documents to the datastore
Args:
project_id:
Id of the Google Cloud project
location:
Datastore location
data_store_id:
Datastore id
gcs_uri:
Google Cloud Storage URI
Returns:
Google Cloud API Operation name
"""
# For more information, refer to:
# https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
client_options = (
ClientOptions(
api_endpoint=f"{location}-discoveryengine.googleapis.com"
)
if location != "global"
else None
)
# Create a client
client = discoveryengine.DocumentServiceClient(
client_options=client_options
)
# The full resource name of the search engine branch.
# e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch}
parent = client.branch_path(
project=project_id,
location=location,
data_store=data_store_id,
branch="default_branch",
)
request = discoveryengine.ImportDocumentsRequest(
parent=parent,
gcs_source=discoveryengine.GcsSource(
input_uris=[gcs_uri], data_schema="document"
),
)
operation = client.import_documents(request=request)
return operation.operation.name
def create_datastore(
project_id: str,
location: str,
data_store_id: str,
):
"""
Create a datastore
Args:
project_id:
Id of the Google Cloud project
location:
Datastore location
data_store_id:
Datastore id
"""
client = discoveryengine.DataStoreServiceClient()
# Initialize request argument(s)
data_store = discoveryengine.DataStore()
data_store.display_name = data_store_id
data_store.industry_vertical = discoveryengine.IndustryVertical.GENERIC
data_store.content_config = (
discoveryengine.DataStore.ContentConfig.CONTENT_REQUIRED
)
data_store.solution_types = [
discoveryengine.SolutionType.SOLUTION_TYPE_SEARCH
]
collection = client.collection_path(
project=project_id, location=location, collection="default_collection"
)
request = discoveryengine.CreateDataStoreRequest(
parent=collection, data_store=data_store, data_store_id=data_store_id
)
# Make the request
operation = client.create_data_store(request=request)
print("Waiting for operation to complete...")
response = operation.result()
# Handle the response
print(response)
def create_engine(
project_id: str,
location: str,
engine_id: str,
data_store_id: str,
company_name: str,
):
"""
Create a search engine
Args:
project_id:
Id of the Google Cloud project
engine_id:
Id of the search engine
data_store_id:
Id of the datastore
company_name:
Company name
"""
client = discoveryengine.EngineServiceClient()
engine = discoveryengine.Engine()
engine.search_engine_config = discoveryengine.Engine.SearchEngineConfig(
search_tier=discoveryengine.SearchTier.SEARCH_TIER_ENTERPRISE,
search_add_ons=[discoveryengine.SearchAddOn.SEARCH_ADD_ON_LLM],
)
engine.display_name = engine_id
engine.data_store_ids = [data_store_id]
engine.solution_type = discoveryengine.SolutionType.SOLUTION_TYPE_SEARCH
engine.industry_vertical = discoveryengine.IndustryVertical.GENERIC
engine.common_config = discoveryengine.Engine.CommonConfig(
company_name=company_name
)
collection = client.collection_path(
project=project_id, location=location, collection="default_collection"
)
request = discoveryengine.CreateEngineRequest(
parent=collection, engine=engine, engine_id=engine_id
)
operation = client.create_engine(request=request)
response = operation.result()
print(response)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Vertex AI search datastores creation"
)
parser.add_argument("--project_id", required=True)
parser.add_argument("--location", required=True)
parser.add_argument(
"--data_store_id", default="csm-search-datastore", required=False
)
parser.add_argument(
"--engine_id", default="csm-search-engine", required=False
)
parser.add_argument(
"--gcs_uri",
default="gs://csm-solution-dataset/metadata/search_products.jsonl",
required=False,
)
parser.add_argument("--company_name", default="CSM", required=False)
args = parser.parse_args()
print("Creating Datastore")
create_datastore(
project_id=args.project_id,
location=args.location,
data_store_id=args.data_store_id,
)
print("Creating App")
import_documents(
project_id=args.project_id,
location=args.location,
data_store_id=args.data_store_id,
gcs_uri=args.gcs_uri,
)
create_engine(
project_id=args.project_id,
location=args.location,
engine_id=args.engine_id,
data_store_id=args.data_store_id,
company_name=args.company_name,
)