colab-enterprise/gen-ai-demo/Weather-Populate-Table.ipynb (405 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "k6eIqerFOzyj" }, "source": [ "## <img src=\"https://lh3.googleusercontent.com/mUTbNK32c_DTSNrhqETT5aQJYFKok2HB1G2nk2MZHvG5bSs0v_lmDm_ArW7rgd6SDGHXo0Ak2uFFU96X6Xd0GQ=w160-h128\" width=\"45\" valign=\"top\" alt=\"BigQuery\"> Weather-Populate-Table\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### License" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##################################################################################\n", "# Copyright 2024 Google LLC\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "# \n", "# https://www.apache.org/licenses/LICENSE-2.0\n", "# \n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License.\n", "###################################################################################" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Notebook Overview" ] }, { "cell_type": "markdown", "metadata": { "id": "0zKI-ZR1Spog" }, "source": [ "- Call a HTTP endpoint and download and parse the hourly weather data data:\n", " 1. For each city: \"New York City\", \"London\", \"Tokyo\", \"San Francisco\" read the current days events\n", " 2. Call to Weather service\n", " 3. Insert the data into BigQuery as JSON\n", "\n", "- Note: The code is currently not calling a London or Japan specific weather service. The code needs to be enhanced.\n", " 1. If you want to code London see: https://www.metoffice.gov.uk/services/data/datapoint\n", " 2. If you want to code Japan see: ?" ] }, { "cell_type": "markdown", "metadata": { "id": "2FHvD3ffG52M" }, "source": [ "## Initialize Python" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Y8loQuV0Xu10" }, "outputs": [], "source": [ "# No need to set these\n", "city_names=[\"New York City\", \"London\", \"Tokyo\", \"San Francisco\"]\n", "city_ids=[1,2,3,4]\n", "nyc_url=\"https://api.weather.gov/gridpoints/OKX/33,35/forecast/hourly\"\n", "san_francisco_url=\"https://api.weather.gov/gridpoints/MTR/85,105/forecast/hourly\"\n", "\n", "# NOTE: These are not used since they are in not in the US and need to be coded\n", "london_url=\"\"\n", "tokyo_url=\"\"\n", "\n", "# NOTE: These are used as \"similar\"\n", "seattle_url=\"https://api.weather.gov/gridpoints/SEW/125,68/forecast/hourly\" # Like London\n", "washington_dc=\"https://api.weather.gov/gridpoints/LWX/96,71/forecast/hourly\" # Like Tokyo\n", "\n", "if london_url == \"\":\n", " london_url = seattle_url\n", "\n", "if tokyo_url == \"\":\n", " tokyo_url = washington_dc\n", "\n", "\n", "city_urls=[nyc_url,london_url,tokyo_url,san_francisco_url]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "HTNQKUNaRsyO" }, "outputs": [], "source": [ "from google.cloud import bigquery\n", "\n", "client = bigquery.Client()" ] }, { "cell_type": "markdown", "metadata": { "id": "KOXpog83RlPG" }, "source": [ "## Create the GenAI Insights table" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "jrWuWfUCSZUv" }, "outputs": [], "source": [ "%%bigquery\n", "CREATE TABLE IF NOT EXISTS `${project_id}.${bigquery_data_beans_curated_dataset}.weather`\n", "--CREATE OR REPLACE TABLE `${project_id}.${bigquery_data_beans_curated_dataset}.weather` -- only use to replace the whole table\n", "(\n", " weather_id INT NOT NULL OPTIONS(description=\"The primary key of the weather.\"),\n", " city_id INT NOT NULL OPTIONS(description=\"The foreign key of the city.\"),\n", " weather_date DATE NOT NULL OPTIONS(description=\"The date of the weather.\"),\n", " weather_json JSON NOT NULL OPTIONS(description=\"The weather JSON payload from the REST API.\"),\n", ")\n", "CLUSTER BY weather_id;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "uQlkQugOuDdk" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "-- Remove the current days events so you re-run\n", "DELETE\n", " FROM `${project_id}.${bigquery_data_beans_curated_dataset}.weather`\n", " WHERE weather_date = CURRENT_DATE();" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Supporting Functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "GZ18slUFRpjG" }, "outputs": [], "source": [ "def RunQuery(sql):\n", " import time\n", "\n", " if (sql.startswith(\"SELECT\")):\n", " df_result = client.query(sql).to_dataframe()\n", " return df_result\n", " else:\n", " job_config = bigquery.QueryJobConfig(priority=bigquery.QueryPriority.INTERACTIVE)\n", " query_job = client.query(sql, job_config=job_config)\n", "\n", " # Check on the progress by getting the job's updated state.\n", " query_job = client.get_job(\n", " query_job.job_id, location=query_job.location\n", " )\n", " print(\"Job {} is currently in state {} with error result of {}\".format(query_job.job_id, query_job.state, query_job.error_result))\n", "\n", " while query_job.state != \"DONE\":\n", " time.sleep(2)\n", " query_job = client.get_job(\n", " query_job.job_id, location=query_job.location\n", " )\n", " print(\"Job {} is currently in state {} with error result of {}\".format(query_job.job_id, query_job.state, query_job.error_result))\n", "\n", " if query_job.error_result == None:\n", " return True\n", " else:\n", " return False" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "qNUW0MFiUOFy" }, "outputs": [], "source": [ "def GetNextPrimaryKey(fully_qualified_table_name, field_name):\n", " sql = f\"\"\"\n", " SELECT IFNULL(MAX({field_name}),0) AS result\n", " FROM `{fully_qualified_table_name}`\n", " \"\"\"\n", " # print(sql)\n", " df_result = client.query(sql).to_dataframe()\n", " # display(df_result)\n", " return df_result['result'].iloc[0] + 1" ] }, { "cell_type": "markdown", "metadata": { "id": "ZrQ_gOebR2vX" }, "source": [ "## Get the Weather per City\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "id": "wHY8TZpgHPSo" }, "source": [ "- https://www.weather.gov/documentation/services-web-api\n", "- NYC: 40.7143,-74.006\n", " 1. https://api.weather.gov/points/40.7143,-74.006\n", " 2. https://api.weather.gov/gridpoints/OKX/33,35/forecast/hourly\n", "\n", "- San Francisco: 37.7749, -122.4194\n", " 1. https://api.weather.gov/points/37.7749,-122.4194\n", " 2. https://api.weather.gov/gridpoints/MTR/85,105/forecast/hourly" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1afARUHGLlh7" }, "outputs": [], "source": [ "from datetime import datetime\n", "import requests\n", "import json\n", "\n", "weather_id = GetNextPrimaryKey(\"${project_id}.${bigquery_data_beans_curated_dataset}.weather\",\"weather_id\")\n", "print(f\"weather_id: {weather_id}\")\n", "\n", "weather_date = datetime.today().strftime('%Y-%m-%d')\n", "\n", "# Loop for each city\n", "for city_index in range(0, 4):\n", " print(f\"City: {city_ids[city_index]}\")\n", " response = requests.get(city_urls[city_index])\n", "\n", " if response.status_code == 200:\n", " # format json\n", " response_json = json.loads(response.text)\n", " weather_json = json.dumps(response_json)\n", "\n", " sql = f\"\"\"INSERT INTO `${project_id}.${bigquery_data_beans_curated_dataset}.weather`\n", " (weather_id, city_id, weather_date, weather_json)\n", " VALUES ({weather_id},{city_ids[city_index]},'{weather_date}',JSON'{weather_json}')\"\"\"\n", "\n", " print(f\"sql: {sql}\")\n", "\n", " RunQuery(sql)\n", " weather_id = weather_id + 1\n", " else:\n", " errorMessage = f\"REAT API (serviceJob) response returned response.status_code: {response.status_code} for city: {city_ids[city_index]}\"\n", " raise Exception(errorMessage)" ] }, { "cell_type": "markdown", "metadata": { "id": "PampTfgdRfqe" }, "source": [ "## See the results" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "RCCogPjFSPHO" }, "outputs": [], "source": [ "%%bigquery\n", "\n", "# Display the results\n", "SELECT *\n", " FROM `${project_id}.${bigquery_data_beans_curated_dataset}.weather`\n", " WHERE weather_date = CURRENT_DATE()\n", "ORDER BY weather_id" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Sample Formatted JSON\n", "\n", "```\n", "{\n", " \"@context\": [\n", " \"https://geojson.org/geojson-ld/geojson-context.jsonld\",\n", " {\n", " \"@version\": \"1.1\",\n", " \"@vocab\": \"https://api.weather.gov/ontology#\",\n", " \"geo\": \"http://www.opengis.net/ont/geosparql#\",\n", " \"unit\": \"http://codes.wmo.int/common/unit/\",\n", " \"wx\": \"https://api.weather.gov/ontology#\"\n", " }\n", " ],\n", " \"properties\": {\n", " \"elevation\": {\n", " \"unitCode\": \"wmoUnit:m\",\n", " \"value\": 2.1336\n", " },\n", " \"forecastGenerator\": \"HourlyForecastGenerator\",\n", " \"generatedAt\": \"2024-03-14T13:50:03+00:00\",\n", " \"periods\": [\n", " {\n", " \"detailedForecast\": \"\",\n", " \"dewpoint\": {\n", " \"unitCode\": \"wmoUnit:degC\",\n", " \"value\": 5.555555555555555\n", " },\n", " \"endTime\": \"2024-03-14T10:00:00-04:00\",\n", " \"icon\": \"https://api.weather.gov/icons/land/day/few,0?size=small\",\n", " \"isDaytime\": true,\n", " \"name\": \"\",\n", " \"number\": 1,\n", " \"probabilityOfPrecipitation\": {\n", " \"unitCode\": \"wmoUnit:percent\",\n", " \"value\": 0\n", " },\n", " \"relativeHumidity\": {\n", " \"unitCode\": \"wmoUnit:percent\",\n", " \"value\": 74\n", " },\n", " \"shortForecast\": \"Sunny\",\n", " \"startTime\": \"2024-03-14T09:00:00-04:00\",\n", " \"temperature\": 50,\n", " \"temperatureTrend\": null,\n", " \"temperatureUnit\": \"F\",\n", " \"windDirection\": \"W\",\n", " \"windSpeed\": \"5 mph\"\n", " }\n", " ],\n", " \"units\": \"us\",\n", " \"updateTime\": \"2024-03-14T11:27:56+00:00\",\n", " \"updated\": \"2024-03-14T11:27:56+00:00\",\n", " \"validTimes\": \"2024-03-14T05:00:00+00:00/P7DT20H\"\n", " },\n", " \"type\": \"Feature\"\n", "}\n", "```" ] } ], "metadata": { "colab": { "collapsed_sections": [ "2FHvD3ffG52M", "KOXpog83RlPG" ], "name": "BigQuery table", "private_outputs": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }