discoveryengine/import_documents_sample.py (332 lines of code) (raw):

# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # [START genappbuilder_import_documents] def import_documents_bigquery_sample( project_id: str, location: str, data_store_id: str, bigquery_dataset: str, bigquery_table: str, ) -> str: # [START genappbuilder_import_documents_bigquery] from google.api_core.client_options import ClientOptions from google.cloud import discoveryengine # TODO(developer): Uncomment these variables before running the sample. # project_id = "YOUR_PROJECT_ID" # location = "YOUR_LOCATION" # Values: "global" # data_store_id = "YOUR_DATA_STORE_ID" # bigquery_dataset = "YOUR_BIGQUERY_DATASET" # bigquery_table = "YOUR_BIGQUERY_TABLE" # For more information, refer to: # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store client_options = ( ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com") if location != "global" else None ) # Create a client client = discoveryengine.DocumentServiceClient(client_options=client_options) # The full resource name of the search engine branch. # e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch} parent = client.branch_path( project=project_id, location=location, data_store=data_store_id, branch="default_branch", ) request = discoveryengine.ImportDocumentsRequest( parent=parent, bigquery_source=discoveryengine.BigQuerySource( project_id=project_id, dataset_id=bigquery_dataset, table_id=bigquery_table, data_schema="custom", ), # Options: `FULL`, `INCREMENTAL` reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL, ) # Make the request operation = client.import_documents(request=request) print(f"Waiting for operation to complete: {operation.operation.name}") response = operation.result() # After the operation is complete, # get information from operation metadata metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata) # Handle the response print(response) print(metadata) # [END genappbuilder_import_documents_bigquery] return operation.operation.name def import_documents_gcs_sample( project_id: str, location: str, data_store_id: str, gcs_uri: str, ) -> str: # [START genappbuilder_import_documents_gcs] from google.api_core.client_options import ClientOptions from google.cloud import discoveryengine # TODO(developer): Uncomment these variables before running the sample. # project_id = "YOUR_PROJECT_ID" # location = "YOUR_LOCATION" # Values: "global" # data_store_id = "YOUR_DATA_STORE_ID" # Examples: # - Unstructured documents # - `gs://bucket/directory/file.pdf` # - `gs://bucket/directory/*.pdf` # - Unstructured documents with JSONL Metadata # - `gs://bucket/directory/file.json` # - Unstructured documents with CSV Metadata # - `gs://bucket/directory/file.csv` # gcs_uri = "YOUR_GCS_PATH" # For more information, refer to: # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store client_options = ( ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com") if location != "global" else None ) # Create a client client = discoveryengine.DocumentServiceClient(client_options=client_options) # The full resource name of the search engine branch. # e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch} parent = client.branch_path( project=project_id, location=location, data_store=data_store_id, branch="default_branch", ) request = discoveryengine.ImportDocumentsRequest( parent=parent, gcs_source=discoveryengine.GcsSource( # Multiple URIs are supported input_uris=[gcs_uri], # Options: # - `content` - Unstructured documents (PDF, HTML, DOC, TXT, PPTX) # - `custom` - Unstructured documents with custom JSONL metadata # - `document` - Structured documents in the discoveryengine.Document format. # - `csv` - Unstructured documents with CSV metadata data_schema="content", ), # Options: `FULL`, `INCREMENTAL` reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL, ) # Make the request operation = client.import_documents(request=request) print(f"Waiting for operation to complete: {operation.operation.name}") response = operation.result() # After the operation is complete, # get information from operation metadata metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata) # Handle the response print(response) print(metadata) # [END genappbuilder_import_documents_gcs] return operation.operation.name # [END genappbuilder_import_documents] def import_documents_cloud_sql_sample( project_id: str, location: str, data_store_id: str, sql_project_id: str, sql_instance_id: str, sql_database_id: str, sql_table_id: str, ) -> str: # [START genappbuilder_import_documents_cloud_sql] from google.api_core.client_options import ClientOptions from google.cloud import discoveryengine # TODO(developer): Uncomment these variables before running the sample. # project_id = "YOUR_PROJECT_ID" # location = "YOUR_LOCATION" # Values: "global" # data_store_id = "YOUR_DATA_STORE_ID" # sql_project_id = "YOUR_SQL_PROJECT_ID" # sql_instance_id = "YOUR_SQL_INSTANCE_ID" # sql_database_id = "YOUR_SQL_DATABASE_ID" # sql_table_id = "YOUR_SQL_TABLE_ID" # For more information, refer to: # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store client_options = ( ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com") if location != "global" else None ) # Create a client client = discoveryengine.DocumentServiceClient(client_options=client_options) # The full resource name of the search engine branch. # e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch} parent = client.branch_path( project=project_id, location=location, data_store=data_store_id, branch="default_branch", ) request = discoveryengine.ImportDocumentsRequest( parent=parent, cloud_sql_source=discoveryengine.CloudSqlSource( project_id=sql_project_id, instance_id=sql_instance_id, database_id=sql_database_id, table_id=sql_table_id, ), # Options: `FULL`, `INCREMENTAL` reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL, ) # Make the request operation = client.import_documents(request=request) print(f"Waiting for operation to complete: {operation.operation.name}") response = operation.result() # After the operation is complete, # get information from operation metadata metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata) # Handle the response print(response) print(metadata) # [END genappbuilder_import_documents_cloud_sql] return operation.operation.name def import_documents_spanner_sample( project_id: str, location: str, data_store_id: str, spanner_project_id: str, spanner_instance_id: str, spanner_database_id: str, spanner_table_id: str, ) -> str: # [START genappbuilder_import_documents_spanner] from google.api_core.client_options import ClientOptions from google.cloud import discoveryengine # TODO(developer): Uncomment these variables before running the sample. # project_id = "YOUR_PROJECT_ID" # location = "YOUR_LOCATION" # Values: "global" # data_store_id = "YOUR_DATA_STORE_ID" # spanner_project_id = "YOUR_SPANNER_PROJECT_ID" # spanner_instance_id = "YOUR_SPANNER_INSTANCE_ID" # spanner_database_id = "YOUR_SPANNER_DATABASE_ID" # spanner_table_id = "YOUR_SPANNER_TABLE_ID" # For more information, refer to: # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store client_options = ( ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com") if location != "global" else None ) # Create a client client = discoveryengine.DocumentServiceClient(client_options=client_options) # The full resource name of the search engine branch. # e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch} parent = client.branch_path( project=project_id, location=location, data_store=data_store_id, branch="default_branch", ) request = discoveryengine.ImportDocumentsRequest( parent=parent, spanner_source=discoveryengine.SpannerSource( project_id=spanner_project_id, instance_id=spanner_instance_id, database_id=spanner_database_id, table_id=spanner_table_id, ), # Options: `FULL`, `INCREMENTAL` reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL, ) # Make the request operation = client.import_documents(request=request) print(f"Waiting for operation to complete: {operation.operation.name}") response = operation.result() # After the operation is complete, # get information from operation metadata metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata) # Handle the response print(response) print(metadata) # [END genappbuilder_import_documents_spanner] return operation.operation.name def import_documents_firestore_sample( project_id: str, location: str, data_store_id: str, firestore_project_id: str, firestore_database_id: str, firestore_collection_id: str, ) -> str: # [START genappbuilder_import_documents_firestore] from google.api_core.client_options import ClientOptions from google.cloud import discoveryengine # TODO(developer): Uncomment these variables before running the sample. # project_id = "YOUR_PROJECT_ID" # location = "YOUR_LOCATION" # Values: "global" # data_store_id = "YOUR_DATA_STORE_ID" # firestore_project_id = "YOUR_FIRESTORE_PROJECT_ID" # firestore_database_id = "YOUR_FIRESTORE_DATABASE_ID" # firestore_collection_id = "YOUR_FIRESTORE_COLLECTION_ID" # For more information, refer to: # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store client_options = ( ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com") if location != "global" else None ) # Create a client client = discoveryengine.DocumentServiceClient(client_options=client_options) # The full resource name of the search engine branch. # e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch} parent = client.branch_path( project=project_id, location=location, data_store=data_store_id, branch="default_branch", ) request = discoveryengine.ImportDocumentsRequest( parent=parent, firestore_source=discoveryengine.FirestoreSource( project_id=firestore_project_id, database_id=firestore_database_id, collection_id=firestore_collection_id, ), # Options: `FULL`, `INCREMENTAL` reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL, ) # Make the request operation = client.import_documents(request=request) print(f"Waiting for operation to complete: {operation.operation.name}") response = operation.result() # After the operation is complete, # get information from operation metadata metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata) # Handle the response print(response) print(metadata) # [END genappbuilder_import_documents_firestore] return operation.operation.name def import_documents_bigtable_sample( project_id: str, location: str, data_store_id: str, bigtable_project_id: str, bigtable_instance_id: str, bigtable_table_id: str, ) -> str: # [START genappbuilder_import_documents_bigtable] from google.api_core.client_options import ClientOptions from google.cloud import discoveryengine # TODO(developer): Uncomment these variables before running the sample. # project_id = "YOUR_PROJECT_ID" # location = "YOUR_LOCATION" # Values: "global" # data_store_id = "YOUR_DATA_STORE_ID" # bigtable_project_id = "YOUR_BIGTABLE_PROJECT_ID" # bigtable_instance_id = "YOUR_BIGTABLE_INSTANCE_ID" # bigtable_table_id = "YOUR_BIGTABLE_TABLE_ID" # For more information, refer to: # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store client_options = ( ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com") if location != "global" else None ) # Create a client client = discoveryengine.DocumentServiceClient(client_options=client_options) # The full resource name of the search engine branch. # e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch} parent = client.branch_path( project=project_id, location=location, data_store=data_store_id, branch="default_branch", ) bigtable_options = discoveryengine.BigtableOptions( families={ "family_name_1": discoveryengine.BigtableOptions.BigtableColumnFamily( type_=discoveryengine.BigtableOptions.Type.STRING, encoding=discoveryengine.BigtableOptions.Encoding.TEXT, columns=[ discoveryengine.BigtableOptions.BigtableColumn( qualifier="qualifier_1".encode("utf-8"), field_name="field_name_1", ), ], ), "family_name_2": discoveryengine.BigtableOptions.BigtableColumnFamily( type_=discoveryengine.BigtableOptions.Type.INTEGER, encoding=discoveryengine.BigtableOptions.Encoding.BINARY, ), } ) request = discoveryengine.ImportDocumentsRequest( parent=parent, bigtable_source=discoveryengine.BigtableSource( project_id=bigtable_project_id, instance_id=bigtable_instance_id, table_id=bigtable_table_id, bigtable_options=bigtable_options, ), # Options: `FULL`, `INCREMENTAL` reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL, ) # Make the request operation = client.import_documents(request=request) print(f"Waiting for operation to complete: {operation.operation.name}") response = operation.result() # After the operation is complete, # get information from operation metadata metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata) # Handle the response print(response) print(metadata) # [END genappbuilder_import_documents_bigtable] return operation.operation.name def import_documents_alloy_db_sample( project_id: str, location: str, data_store_id: str, alloy_db_project_id: str, alloy_db_location_id: str, alloy_db_cluster_id: str, alloy_db_database_id: str, alloy_db_table_id: str, ) -> str: # [START genappbuilder_import_documents_alloy_db] from google.api_core.client_options import ClientOptions from google.cloud import discoveryengine_v1 as discoveryengine # TODO(developer): Uncomment these variables before running the sample. # project_id = "YOUR_PROJECT_ID" # location = "YOUR_LOCATION" # Values: "global" # data_store_id = "YOUR_DATA_STORE_ID" # alloy_db_project_id = "YOUR_ALLOY_DB_PROJECT_ID" # alloy_db_location_id = "YOUR_ALLOY_DB_LOCATION_ID" # alloy_db_cluster_id = "YOUR_ALLOY_DB_CLUSTER_ID" # alloy_db_database_id = "YOUR_ALLOY_DB_DATABASE_ID" # alloy_db_table_id = "YOUR_ALLOY_DB_TABLE_ID" # For more information, refer to: # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store client_options = ( ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com") if location != "global" else None ) # Create a client client = discoveryengine.DocumentServiceClient(client_options=client_options) # The full resource name of the search engine branch. # e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch} parent = client.branch_path( project=project_id, location=location, data_store=data_store_id, branch="default_branch", ) request = discoveryengine.ImportDocumentsRequest( parent=parent, alloy_db_source=discoveryengine.AlloyDbSource( project_id=alloy_db_project_id, location_id=alloy_db_location_id, cluster_id=alloy_db_cluster_id, database_id=alloy_db_database_id, table_id=alloy_db_table_id, ), # Options: `FULL`, `INCREMENTAL` reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL, ) # Make the request operation = client.import_documents(request=request) print(f"Waiting for operation to complete: {operation.operation.name}") response = operation.result() # After the operation is complete, # get information from operation metadata metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata) # Handle the response print(response) print(metadata) # [END genappbuilder_import_documents_alloy_db] return operation.operation.name def import_documents_healthcare_fhir_sample( project_id: str, location: str, data_store_id: str, healthcare_project_id: str, healthcare_location: str, healthcare_dataset_id: str, healthcare_fihr_store_id: str, ) -> str: # [START genappbuilder_import_documents_healthcare_fhir] from google.api_core.client_options import ClientOptions from google.cloud import discoveryengine # TODO(developer): Uncomment these variables before running the sample. # project_id = "YOUR_PROJECT_ID" # location = "YOUR_LOCATION" # Values: "us" # data_store_id = "YOUR_DATA_STORE_ID" # healthcare_project_id = "YOUR_HEALTHCARE_PROJECT_ID" # healthcare_location = "YOUR_HEALTHCARE_LOCATION" # healthcare_dataset_id = "YOUR_HEALTHCARE_DATASET_ID" # healthcare_fihr_store_id = "YOUR_HEALTHCARE_FHIR_STORE_ID" # For more information, refer to: # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store client_options = ( ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com") if location != "global" else None ) # Create a client client = discoveryengine.DocumentServiceClient(client_options=client_options) # The full resource name of the search engine branch. # e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch} parent = client.branch_path( project=project_id, location=location, data_store=data_store_id, branch="default_branch", ) request = discoveryengine.ImportDocumentsRequest( parent=parent, fhir_store_source=discoveryengine.FhirStoreSource( fhir_store=client.fhir_store_path( healthcare_project_id, healthcare_location, healthcare_dataset_id, healthcare_fihr_store_id, ), ), # Options: `FULL`, `INCREMENTAL` reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL, ) # Make the request operation = client.import_documents(request=request) print(f"Waiting for operation to complete: {operation.operation.name}") response = operation.result() # After the operation is complete, # get information from operation metadata metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata) # Handle the response print(response) print(metadata) # [END genappbuilder_import_documents_healthcare_fhir] return operation.operation.name