utils/topic-model-training-evaluation/topic_model_training_eval.py (46 lines of code) (raw):

from google.colab import auth from google.cloud import storage from google.cloud import bigquery import json def import_issue_model_to_bq(gcs_uri, project_id, dataset_id, table_id): """ Imports issue model JSON from GCS to BigQuery in Colab environment Args: gcs_uri (str): GCS URI of the JSON file (gs://bucket/path) project_id (str): Target BigQuery project ID dataset_id (str): Target BigQuery dataset ID table_id (str): Target BigQuery table ID """ # Parse bucket and blob path from GCS URI bucket_name = gcs_uri.split('/')[2] blob_path = '/'.join(gcs_uri.split('/')[3:]) # Read JSON from GCS storage_client = storage.Client(project=project_id) bucket = storage_client.get_bucket(bucket_name) blob = bucket.blob(blob_path) content = json.loads(blob.download_as_string()) # Extract issues data rows = [] for issue in content.get('issues', []): row = { 'name': issue.get('name'), 'displayName': issue.get('displayName'), 'displayDescription': issue.get('displayDescription'), 'sampleUtterances': issue.get('sampleUtterances', []) } rows.append(row) # Define BigQuery schema schema = [ bigquery.SchemaField('name', 'STRING'), bigquery.SchemaField('displayName', 'STRING'), bigquery.SchemaField('displayDescription', 'STRING'), bigquery.SchemaField('sampleUtterances', 'STRING', mode='REPEATED') ] # Initialize BigQuery client bq_client = bigquery.Client(project=project_id) table_ref = f"{project_id}.{dataset_id}.{table_id}" # Create table if it doesn't exist table = bigquery.Table(table_ref, schema=schema) table = bq_client.create_table(table, exists_ok=True) # Load data to BigQuery job_config = bigquery.LoadJobConfig() job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON job_config.schema = schema job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE job = bq_client.load_table_from_json( rows, table_ref, job_config=job_config ) job.result() # Wait for job to complete import_issue_model_to_bq( 'gs://cf-bucket-24812/model_export.json', 'gsd-ccai-insights-offering', 'insights_analytics', 'issue_model_table' )