colab-enterprise/gen-ai-demo/Customer-Reviews-Word-Cloud.ipynb (231 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "k6eIqerFOzyj" }, "source": [ "## <img src=\"https://lh3.googleusercontent.com/mUTbNK32c_DTSNrhqETT5aQJYFKok2HB1G2nk2MZHvG5bSs0v_lmDm_ArW7rgd6SDGHXo0Ak2uFFU96X6Xd0GQ=w160-h128\" width=\"45\" valign=\"top\" alt=\"BigQuery\"> Draw a Word Cloud Visualization based upon our Customer Review Sentiment\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### License" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##################################################################################\n", "# Copyright 2024 Google LLC\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "# \n", "# https://www.apache.org/licenses/LICENSE-2.0\n", "# \n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License.\n", "###################################################################################" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Notebook Overview" ] }, { "cell_type": "markdown", "metadata": { "id": "0zKI-ZR1Spog" }, "source": [ "- Create a word cloud based upon the themes detected in the customer reviews.\n", "- Create a visualization for our postive and negative reviews:\n", " 1. Execute a BigQuery SQL to gather our postive review data\n", " 2. Execute a BigQuery SQL to gather our negative review data\n", " 3. Create a postive word cloud image\n", " 4. Create a negative word cloud image" ] }, { "cell_type": "markdown", "metadata": { "id": "8zy0eEJmHxRZ" }, "source": [ "## Word Cloud of Customer Review Themes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "z4NpP0pCH0pj" }, "outputs": [], "source": [ "import sys\n", "!{sys.executable} -m pip install wordcloud" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "daMwYTnxIOiN" }, "outputs": [], "source": [ "from wordcloud import WordCloud\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "v1urBEM7KAC6" }, "outputs": [], "source": [ "%%bigquery wordcloud_Positive_df\n", "\n", "WITH max_data AS\n", "(\n", " SELECT MAX(review_datetime) AS max_review_datetime\n", " FROM `${project_id}.${bigquery_data_beans_curated_dataset}.customer_review` AS customer_review\n", " INNER JOIN `${project_id}.${bigquery_data_beans_curated_dataset}.city` AS city\n", " ON city.city_id = 1\n", ")\n", ", theme_data AS\n", "(\n", " SELECT CAST(JSON_VALUE(themes,'$.theme') AS STRING) AS review_theme\n", " FROM `${project_id}.${bigquery_data_beans_curated_dataset}.customer_review` AS customer_review\n", " CROSS JOIN UNNEST(JSON_QUERY_ARRAY(llm_detected_theme,'$')) AS themes\n", " CROSS JOIN max_data\n", " INNER JOIN `${project_id}.${bigquery_data_beans_curated_dataset}.city` AS city\n", " ON city.city_id = 1\n", " INNER JOIN `${project_id}.${bigquery_data_beans_curated_dataset}.location_history` AS location_history\n", " ON customer_review.location_id = location_history.location_id\n", " AND customer_review.review_datetime BETWEEN location_history.start_datetime AND location_history.stop_datetime\n", " WHERE customer_review.review_datetime BETWEEN TIMESTAMP_SUB(max_data.max_review_datetime, INTERVAL 7 DAY)\n", " AND max_data.max_review_datetime\n", " AND customer_review.review_sentiment = 'Positive'\n", ")\n", "SELECT STRING_AGG(REPLACE(review_theme,' ','-'), ' ') AS words\n", " FROM theme_data;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "L2_UqEJcLU6Y" }, "outputs": [], "source": [ "%%bigquery wordcloud_Negative_df\n", "\n", "WITH max_data AS\n", "(\n", " SELECT MAX(review_datetime) AS max_review_datetime\n", " FROM `${project_id}.${bigquery_data_beans_curated_dataset}.customer_review` AS customer_review\n", " INNER JOIN `${project_id}.${bigquery_data_beans_curated_dataset}.city` AS city\n", " ON city.city_id = 1\n", ")\n", ", theme_data AS\n", "(\n", " SELECT CAST(JSON_VALUE(themes,'$.theme') AS STRING) AS review_theme\n", " FROM `${project_id}.${bigquery_data_beans_curated_dataset}.customer_review` AS customer_review\n", " CROSS JOIN UNNEST(JSON_QUERY_ARRAY(llm_detected_theme,'$')) AS themes\n", " CROSS JOIN max_data\n", " INNER JOIN `${project_id}.${bigquery_data_beans_curated_dataset}.city` AS city\n", " ON city.city_id = 1\n", " INNER JOIN `${project_id}.${bigquery_data_beans_curated_dataset}.location_history` AS location_history\n", " ON customer_review.location_id = location_history.location_id\n", " AND customer_review.review_datetime BETWEEN location_history.start_datetime AND location_history.stop_datetime\n", " WHERE customer_review.review_datetime BETWEEN TIMESTAMP_SUB(max_data.max_review_datetime, INTERVAL 7 DAY)\n", " AND max_data.max_review_datetime\n", " AND customer_review.review_sentiment = 'Negative'\n", ")\n", "SELECT STRING_AGG(REPLACE(review_theme,' ','-'), ' ') AS words\n", " FROM theme_data;" ] }, { "cell_type": "markdown", "metadata": { "id": "moBB3pk-LZEU" }, "source": [ "### Postive Reviews\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "y6ERdvSVIWrM" }, "outputs": [], "source": [ "wc = WordCloud(background_color='white', width = 1920, height = 1080).generate(wordcloud_Positive_df.iloc[0]['words'])\n", "plt.rcParams['figure.figsize'] = [12, 12]\n", "plt.axis(\"off\")\n", "plt.imshow(wc)" ] }, { "cell_type": "markdown", "metadata": { "id": "GvyZMp5TLhzE" }, "source": [ "### Negative Reviews\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "_Z9oV5GLLiB8" }, "outputs": [], "source": [ "wc = WordCloud(background_color='white', width = 1920, height = 1080).generate(wordcloud_Negative_df.iloc[0]['words'])\n", "plt.rcParams['figure.figsize'] = [12, 12]\n", "plt.axis(\"off\")\n", "plt.imshow(wc)" ] } ], "metadata": { "colab": { "name": "BigQuery table", "private_outputs": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }