colab-enterprise/rideshare_llm_step_04_driver_summary.ipynb (666 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "HhgOK3uTr6M-" }, "source": [ "# Create Driver Summary (Preferences)\n", "- This notebook take about 5 to 10 minutes to execute\n", "- We will create 2 summaries\n", " - The driver preferences based upon what themes they mention in their reviews\n", " - A summary of all their reviews for a consolidated overall review" ] }, { "cell_type": "markdown", "metadata": { "id": "4xV3PMBh8Jjd" }, "source": [ "## Create Summary Prompt and run through LLM" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "eGtjlAFZr3pO" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "-- The table is populated by: CALL ${project_id}.${bigquery_rideshare_llm_enriched_dataset}.sp_step_02_extract_driver_attributes()\n", "SELECT *\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver_attribute`\n", "ORDER BY driver_id,\n", " driver_attribute_grouping,\n", " extracted_driver_attribute,\n", " rank_order\n", "LIMIT 100;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "aL5QLba1M9Ia" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "-- Distinct list of driver attributes\n", "\n", "SELECT DISTINCT extracted_driver_attribute\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver_attribute`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "YwWMtN19SAcF" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "-- Insert safe driver if they do not have any \"unsafe attributed\"\n", "INSERT INTO `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver_attribute`\n", "(driver_id, extracted_driver_attribute, driver_attribute_grouping, cnt, rank_order)\n", "SELECT driver_id, 'safe driver', 'safe-driver', 1, 1\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver` AS driver\n", " WHERE driver.include_in_llm_processing = TRUE\n", " AND EXISTS (SELECT *\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver_attribute` AS driver_attribute\n", " WHERE driver.driver_id = driver_attribute.driver_id)\n", " AND NOT EXISTS (SELECT *\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver_attribute` AS driver_attribute\n", " WHERE driver.driver_id = driver_attribute.driver_id\n", " AND TRIM(extracted_driver_attribute) = 'distracted driver')\n", " AND NOT EXISTS (SELECT *\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver_attribute` AS driver_attribute\n", " WHERE driver.driver_id = driver_attribute.driver_id\n", " AND TRIM(extracted_driver_attribute) = 'driving too fast')\n", " AND NOT EXISTS (SELECT *\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver_attribute` AS child\n", " WHERE driver.driver_id = child.driver_id\n", " AND child.extracted_driver_attribute = 'safe driver')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xlwTohJKyGsS" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "-- Sample prompt aggregation of attributes\n", "SELECT driver_id,\n", " STRING_AGG(\n", " CASE WHEN TRIM(extracted_driver_attribute) = 'driver likes music' THEN 'Attribute: the driver likes the radio on\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driver likes no music' THEN 'Attribute: the driver likes the radio off\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'trunk space large' THEN 'Attribute: the driver has a large amount of trunk space\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'trunk space small' THEN 'Attribute: the driver has a small trunk\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'Category: trunk space small' THEN 'Attribute: the driver has a small trunk\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driver likes conversation' THEN 'Attribute: the driver likes to have a conversation with the passengers\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driver likes no conversation' THEN 'Attribute: the driver does not like to talk to the passengers\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driving too fast' THEN 'Attribute: the driver tends to drive too fast\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driving too slow' THEN 'Attribute: the driver drives too slow\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'clean car' THEN 'Attribute: the driver keeps their car clean on the inside and out\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'dirty car' THEN 'Attribute: the driver has a dirty car\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'car too hot' THEN 'Attribute: the driver has a keeps their car too warm inside\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'car too cold' THEN 'Attribute: the driver keeps their car too cold\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driver speaks spanish' THEN 'Attribute: the driver is bilingual\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'distracted driver' THEN 'Attribute: the driver is a distracted driver\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'safe driver' THEN 'Attribute: the driver is a safe driver\\n'\n", " ELSE ''\n", " END,'') AS driver_attribute_agg\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver_attribute`\n", "GROUP BY driver_id\n", "LIMIT 100;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "jNzpvp1qWk6F" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "-- OPTIONAL: Reset all the fields to null\n", "-- If you need to reset you data back to fresh data run the stored procedure: CALL `${project_id}.${bigquery_rideshare_llm_curated_dataset}.sp_reset_demo`();\n", "\n", "/*\n", "UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver` AS driver\n", " SET driver_attribute_llm_summary_prompt = NULL,\n", " llm_summary_driver_attribute_json = NULL,\n", " llm_summary_driver_attribute = NULL\n", " WHERE TRUE;\n", "*/" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "sGmYo5mO1FIA" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "-- Create the LLM prompt\n", "UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver` AS driver\n", " SET driver_attribute_llm_summary_prompt =\n", " CONCAT('Write a 100 to 500 word summary for the following attributes.\\n',\n", " '1. Randomly sort the attributes\\n',\n", " '2. Select the first 3 to 5 attributes\\n',\n", " '3. Write the summary in present tense for only the first 3 to 5 attributes\\n',\n", " '4. The driver\\'s name is ', driver.driver_name, '\\n',\n", " '5. Use sentences with varying lengths\\n',\n", " '6. Write 2 to 5 sentences',\n", " driver_attribute_agg),\n", " llm_summary_driver_attribute_json = NULL,\n", " llm_summary_driver_attribute = NULL\n", " FROM\n", " (SELECT driver_id,\n", " STRING_AGG(\n", " CASE WHEN TRIM(extracted_driver_attribute) = 'driver likes music' THEN 'Attribute: the driver likes the radio on\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driver likes no music' THEN 'Attribute: the driver likes the radio off\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'trunk space large' THEN 'Attribute: the driver has a large amount of trunk space\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'trunk space small' THEN 'Attribute: the driver has a small trunk\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'Category: trunk space small' THEN 'Attribute: the driver has a small trunk\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driver likes conversation' THEN 'Attribute: the driver likes to have a conversation with the passengers\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driver likes no conversation' THEN 'Attribute: the driver does not like to talk to the passengers\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driving too fast' THEN 'Attribute: the driver tends to drive too fast\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driving too slow' THEN 'Attribute: the driver drives too slow\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'clean car' THEN 'Attribute: the driver keeps their car clean on the inside and out\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'dirty car' THEN 'Attribute: the driver has a dirty car\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'car too hot' THEN 'Attribute: the driver has a keeps their car too warm inside\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'car too cold' THEN 'Attribute: the driver keeps their car too cold\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'driver speaks spanish' THEN 'Attribute: the driver is bilingual\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'distracted driver' THEN 'Attribute: the driver is a distracted driver\\n'\n", " WHEN TRIM(extracted_driver_attribute) = 'safe driver' THEN 'Attribute: the driver is a safe driver\\n'\n", " ELSE ''\n", " END,'') AS driver_attribute_agg\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver_attribute`\n", " GROUP BY driver_id\n", " ) AS driver_attribute\n", "WHERE driver.driver_id = driver_attribute.driver_id;\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "rfyk4FRh12do" }, "outputs": [], "source": [ "%%bigquery\n", "SELECT driver_attribute_llm_summary_prompt\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver` AS driver\n", " WHERE driver_attribute_llm_summary_prompt IS NOT NULL\n", " LIMIT 10;" ] }, { "cell_type": "markdown", "metadata": { "id": "UrQsqrtj2x69" }, "source": [ "## Run the LLM to generate a Driver Summary on Discovered Attributes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xEnr3nBp3y0Z" }, "outputs": [], "source": [ "from google.cloud import bigquery\n", "import pandas as pd\n", "\n", "client = bigquery.Client()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "B8uFxM_220li" }, "outputs": [], "source": [ "# Process in batches\n", "batch_size = 100\n", "\n", "# Set the parameters so we are more deterministic and less creative/random responses\n", "llm_temperature = 1\n", "llm_max_output_tokens = 1024\n", "llm_top_p = 1\n", "llm_top_k = 40\n", "\n", "update_sql=\"\"\"\n", "UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver` AS driver\n", " SET llm_summary_driver_attribute_json = child.ml_generate_text_result\n", " FROM (SELECT *\n", " FROM ML.GENERATE_TEXT(MODEL`${project_id}.${bigquery_rideshare_llm_enriched_dataset}.gemini_model`,\n", " (SELECT driver_id,\n", " driver_attribute_llm_summary_prompt AS prompt\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver`\n", " WHERE (llm_summary_driver_attribute_json IS NULL\n", " OR\n", " JSON_VALUE(llm_summary_driver_attribute_json, '$.candidates[0].content.parts[0].text') IS NULL\n", " )\n", " AND include_in_llm_processing = TRUE\n", " AND driver_attribute_llm_summary_prompt IS NOT NULL\n", " LIMIT {batch_size}),\n", " STRUCT(\n", " {llm_temperature} AS temperature,\n", " {llm_max_output_tokens} AS max_output_tokens,\n", " {llm_top_p} AS top_p,\n", " {llm_top_k} AS top_k\n", " ))\n", " ) AS child\n", "WHERE driver.driver_id = child.driver_id\n", " \"\"\".format(batch_size = batch_size,\n", " llm_temperature = llm_temperature,\n", " llm_max_output_tokens = llm_max_output_tokens,\n", " llm_top_p = llm_top_p,\n", " llm_top_k = llm_top_k)\n", "\n", "print(\"SQL: {update_sql}\".format(update_sql=update_sql))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1gFA-Koc3nLx" }, "outputs": [], "source": [ "# Score while records remain\n", "# score in groups of batch_size records (we can do up to 10,000 at a time)\n", "import time\n", "\n", "done = False\n", "displayed_first_sql = False\n", "original_record_count = 0\n", "\n", "while done == False:\n", " # Get the count of records to score\n", " sql = \"\"\"\n", " SELECT COUNT(*) AS cnt\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver`\n", " WHERE (llm_summary_driver_attribute_json IS NULL\n", " OR\n", " JSON_VALUE(llm_summary_driver_attribute_json, '$.candidates[0].content.parts[0].text') IS NULL\n", " )\n", " AND include_in_llm_processing = TRUE\n", " AND driver_attribute_llm_summary_prompt IS NOT NULL;\n", " \"\"\"\n", "\n", " df_record_count = client.query(sql).to_dataframe()\n", " cnt = df_record_count['cnt'].head(1).item()\n", " if displayed_first_sql == False:\n", " original_record_count = cnt\n", " displayed_first_sql = True\n", "\n", " print(\"Remaining records to process: \", cnt, \" out of\", original_record_count, \" batch_size: \", batch_size)\n", "\n", " if cnt == 0:\n", " done = True\n", " else:\n", " # https://github.com/googleapis/python-bigquery/tree/master/samples\n", " job_config = bigquery.QueryJobConfig(priority=bigquery.QueryPriority.INTERACTIVE)\n", " query_job = client.query(update_sql, job_config=job_config)\n", "\n", " # Check on the progress by getting the job's updated state.\n", " query_job = client.get_job(\n", " query_job.job_id, location=query_job.location\n", " )\n", " print(\"Job {} is currently in state {}\".format(query_job.job_id, query_job.state))\n", "\n", " while query_job.state != \"DONE\":\n", " time.sleep(5)\n", " query_job = client.get_job(\n", " query_job.job_id, location=query_job.location\n", " )\n", " print(\"Job {} is currently in state {}\".format(query_job.job_id, query_job.state))\n" ] }, { "cell_type": "markdown", "metadata": { "id": "Lw4oA-qy8gzQ" }, "source": [ "## Parse the LLM JSON results" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "nFM-JNqg39up" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver` AS driver\n", " SET llm_summary_driver_attribute = JSON_VALUE(llm_summary_driver_attribute_json, '$.candidates[0].content.parts[0].text')\n", " WHERE llm_summary_driver_attribute_json IS NOT NULL\n", " AND llm_summary_driver_attribute IS NULL;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "_-YX3rPU39g5" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "SELECT driver_id, driver_attribute_llm_summary_prompt, llm_summary_driver_attribute\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver`\n", " WHERE llm_summary_driver_attribute_json IS NOT NULL\n", " AND llm_summary_driver_attribute IS NOT NULL\n", "LIMIT 20;" ] }, { "cell_type": "markdown", "metadata": { "id": "H98uiO97jhPP" }, "source": [ "# Create Driver Summary baesd upon Customer Reviews" ] }, { "cell_type": "markdown", "metadata": { "id": "Cgu2Gx-vlJJv" }, "source": [ "## Create Summary Prompt and run through LLM" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "biJGzUwwVg9R" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "-- OPTIONAL: Reset all the fields to null\n", "-- If you need to reset you data back to fresh data run the stored procedure: CALL `${project_id}.${bigquery_rideshare_llm_curated_dataset}.sp_reset_demo`();\n", "\n", "/*\n", "UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver` AS driver\n", " SET driver_review_summary_llm_summary_prompt = NULL,\n", " llm_summary_driver_review_summary_json = NULL,\n", " llm_summary_driver_review_summary = NULL\n", " WHERE TRUE;\n", "*/" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "3YvdDQ2ojuRn" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "-- Create the LLM prompt (latest 50 customer reviews per driver)\n", "UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver` AS driver\n", " SET driver_review_summary_llm_summary_prompt =\n", " CONCAT('Write a 100 to 600 word summary for the following customer reviews.\\n',\n", " '1. Write the summary in present tense.\\n',\n", " '2. Write the summary from the customers prespective.\\n',\n", " '3. Do not repeat the same subject in the summary.\\n',\n", " '4. The reviews are for the driver ', driver_name, ' at a rideshare company.\\n',\n", " '5. The reviews are written by different customers.\\n',\n", " '6. Write 3 to 6 sentences.\\n',\n", " customer_review_agg)\n", " FROM (SELECT customer_review.driver_id,\n", " STRING_AGG(CONCAT(\"Review: \",customer_review_text),'\\n') AS customer_review_agg\n", " FROM (SELECT driver_id,\n", " customer_review_text\n", " FROM (SELECT trip.driver_id,\n", " trip.pickup_time,\n", " customer_review.customer_review_text,\n", " ROW_NUMBER() OVER (PARTITION BY trip.driver_id ORDER BY trip.pickup_time) AS row_nbr\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.customer_review` AS customer_review\n", " INNER JOIN `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.trip` AS trip\n", " ON customer_review.trip_id = trip.trip_id\n", " ) AS reviews\n", " WHERE row_nbr < 50 -- top 50 most recent\n", " ) AS customer_review\n", " GROUP BY driver_id) AS top_reviews\n", "WHERE driver.driver_id = top_reviews.driver_id;\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "iTKx8_VFjuGv" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "SELECT driver_review_summary_llm_summary_prompt\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver`\n", "WHERE driver_review_summary_llm_summary_prompt IS NOT NULL\n", "LIMIT 5;\n" ] }, { "cell_type": "markdown", "metadata": { "id": "lMas79holRYH" }, "source": [ "## Run the LLM to generate a driver summary of all customer reviews" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "QtBFZxaDlWmp" }, "outputs": [], "source": [ "# Process in batches\n", "batch_size = 10\n", "\n", "# Set the parameters for more creative\n", "llm_temperature = 1\n", "llm_max_output_tokens = 1024\n", "llm_top_p = 1\n", "llm_top_k = 40\n", "\n", "update_sql=\"\"\"\n", "UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver` AS driver\n", " SET llm_summary_driver_review_summary_json = child.ml_generate_text_result\n", " FROM (SELECT *\n", " FROM ML.GENERATE_TEXT(MODEL`${project_id}.${bigquery_rideshare_llm_enriched_dataset}.gemini_model`,\n", " (SELECT driver_id,\n", " driver_review_summary_llm_summary_prompt AS prompt\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver`\n", " WHERE (llm_summary_driver_review_summary_json IS NULL\n", " OR\n", " JSON_VALUE(llm_summary_driver_review_summary_json, '$.candidates[0].content.parts[0].text') IS NULL\n", " )\n", " AND include_in_llm_processing = TRUE\n", " AND driver_review_summary_llm_summary_prompt IS NOT NULL\n", " LIMIT {batch_size}),\n", " STRUCT(\n", " {llm_temperature} AS temperature,\n", " {llm_max_output_tokens} AS max_output_tokens,\n", " {llm_top_p} AS top_p,\n", " {llm_top_k} AS top_k\n", " ))\n", " ) AS child\n", "WHERE driver.driver_id = child.driver_id\n", " \"\"\".format(batch_size = batch_size,\n", " llm_temperature = llm_temperature,\n", " llm_max_output_tokens = llm_max_output_tokens,\n", " llm_top_p = llm_top_p,\n", " llm_top_k = llm_top_k)\n", "\n", "print(\"SQL: {update_sql}\".format(update_sql=update_sql))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "B7ajlaR1lXCw" }, "outputs": [], "source": [ "# Score while records remain\n", "# score in groups of batch_size records (we can do up to 10,000 at a time)\n", "import time\n", "\n", "done = False\n", "displayed_first_sql = False\n", "original_record_count = 0\n", "\n", "while done == False:\n", " # Get the count of records to score\n", " sql = \"\"\"\n", " SELECT COUNT(*) AS cnt\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver`\n", " WHERE (llm_summary_driver_review_summary_json IS NULL\n", " OR\n", " JSON_VALUE(llm_summary_driver_review_summary_json, '$.candidates[0].content.parts[0].text') IS NULL\n", " )\n", " AND include_in_llm_processing = TRUE\n", " AND driver_review_summary_llm_summary_prompt IS NOT NULL;\n", " \"\"\"\n", "\n", " df_record_count = client.query(sql).to_dataframe()\n", " cnt = df_record_count['cnt'].head(1).item()\n", " if displayed_first_sql == False:\n", " original_record_count = cnt\n", " displayed_first_sql = True\n", "\n", " print(\"Remaining records to process: \", cnt, \" out of\", original_record_count, \" batch_size: \", batch_size)\n", "\n", "\n", " if cnt == 0:\n", " done = True\n", " else:\n", " # https://github.com/googleapis/python-bigquery/tree/master/samples\n", " job_config = bigquery.QueryJobConfig(priority=bigquery.QueryPriority.INTERACTIVE)\n", " query_job = client.query(update_sql, job_config=job_config)\n", "\n", " # Check on the progress by getting the job's updated state.\n", " query_job = client.get_job(\n", " query_job.job_id, location=query_job.location\n", " )\n", " print(\"Job {} is currently in state {}\".format(query_job.job_id, query_job.state))\n", "\n", " while query_job.state != \"DONE\":\n", " time.sleep(5)\n", " query_job = client.get_job(\n", " query_job.job_id, location=query_job.location\n", " )\n", " print(\"Job {} is currently in state {}\".format(query_job.job_id, query_job.state))\n" ] }, { "cell_type": "markdown", "metadata": { "id": "VI9lWCDklf3_" }, "source": [ "## Parse the LLM JSON results" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "6N_CCkPNlduv" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver` driver\n", " SET llm_summary_driver_review_summary = JSON_VALUE(llm_summary_driver_review_summary_json, '$.candidates[0].content.parts[0].text')\n", " WHERE llm_summary_driver_review_summary_json IS NOT NULL\n", " AND llm_summary_driver_review_summary IS NULL;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "2ULcMnEelW4f" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "SELECT driver_id, driver_review_summary_llm_summary_prompt, llm_summary_driver_review_summary_json, llm_summary_driver_review_summary\n", " FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.driver`\n", " WHERE llm_summary_driver_review_summary_json IS NOT NULL\n", " AND llm_summary_driver_review_summary IS NOT NULL\n", "LIMIT 20;\n" ] } ], "metadata": { "colab": { "collapsed_sections": [ "4xV3PMBh8Jjd", "UrQsqrtj2x69", "Lw4oA-qy8gzQ", "Cgu2Gx-vlJJv" ], "name": "BigQuery table", "private_outputs": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }