qwiklabs/colab-enterprise/gen-ai-demo/Weather-Populate-Table.ipynb (405 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "k6eIqerFOzyj"
},
"source": [
"## <img src=\"https://lh3.googleusercontent.com/mUTbNK32c_DTSNrhqETT5aQJYFKok2HB1G2nk2MZHvG5bSs0v_lmDm_ArW7rgd6SDGHXo0Ak2uFFU96X6Xd0GQ=w160-h128\" width=\"45\" valign=\"top\" alt=\"BigQuery\"> Weather-Populate-Table\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### License"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"##################################################################################\n",
"# Copyright 2024 Google LLC\n",
"#\n",
"# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"# you may not use this file except in compliance with the License.\n",
"# You may obtain a copy of the License at\n",
"# \n",
"# https://www.apache.org/licenses/LICENSE-2.0\n",
"# \n",
"# Unless required by applicable law or agreed to in writing, software\n",
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"# See the License for the specific language governing permissions and\n",
"# limitations under the License.\n",
"###################################################################################"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Notebook Overview"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0zKI-ZR1Spog"
},
"source": [
"- Call a HTTP endpoint and download and parse the hourly weather data data:\n",
" 1. For each city: \"New York City\", \"London\", \"Tokyo\", \"San Francisco\" read the current days events\n",
" 2. Call to Weather service\n",
" 3. Insert the data into BigQuery as JSON\n",
"\n",
"- Note: The code is currently not calling a London or Japan specific weather service. The code needs to be enhanced.\n",
" 1. If you want to code London see: https://www.metoffice.gov.uk/services/data/datapoint\n",
" 2. If you want to code Japan see: ?"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2FHvD3ffG52M"
},
"source": [
"## Initialize Python"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Y8loQuV0Xu10"
},
"outputs": [],
"source": [
"# No need to set these\n",
"city_names=[\"New York City\", \"London\", \"Tokyo\", \"San Francisco\"]\n",
"city_ids=[1,2,3,4]\n",
"nyc_url=\"https://api.weather.gov/gridpoints/OKX/33,35/forecast/hourly\"\n",
"san_francisco_url=\"https://api.weather.gov/gridpoints/MTR/85,105/forecast/hourly\"\n",
"\n",
"# NOTE: These are not used since they are in not in the US and need to be coded\n",
"london_url=\"\"\n",
"tokyo_url=\"\"\n",
"\n",
"# NOTE: These are used as \"similar\"\n",
"seattle_url=\"https://api.weather.gov/gridpoints/SEW/125,68/forecast/hourly\" # Like London\n",
"washington_dc=\"https://api.weather.gov/gridpoints/LWX/96,71/forecast/hourly\" # Like Tokyo\n",
"\n",
"if london_url == \"\":\n",
" london_url = seattle_url\n",
"\n",
"if tokyo_url == \"\":\n",
" tokyo_url = washington_dc\n",
"\n",
"\n",
"city_urls=[nyc_url,london_url,tokyo_url,san_francisco_url]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "HTNQKUNaRsyO"
},
"outputs": [],
"source": [
"from google.cloud import bigquery\n",
"\n",
"client = bigquery.Client()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KOXpog83RlPG"
},
"source": [
"## Create the GenAI Insights table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "jrWuWfUCSZUv"
},
"outputs": [],
"source": [
"%%bigquery\n",
"CREATE TABLE IF NOT EXISTS `${project_id}.${bigquery_data_beans_curated_dataset}.weather`\n",
"--CREATE OR REPLACE TABLE `${project_id}.${bigquery_data_beans_curated_dataset}.weather` -- only use to replace the whole table\n",
"(\n",
" weather_id INT NOT NULL OPTIONS(description=\"The primary key of the weather.\"),\n",
" city_id INT NOT NULL OPTIONS(description=\"The foreign key of the city.\"),\n",
" weather_date DATE NOT NULL OPTIONS(description=\"The date of the weather.\"),\n",
" weather_json JSON NOT NULL OPTIONS(description=\"The weather JSON payload from the REST API.\"),\n",
")\n",
"CLUSTER BY weather_id;"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "uQlkQugOuDdk"
},
"outputs": [],
"source": [
"%%bigquery\n",
"\n",
"-- Remove the current days events so you re-run\n",
"DELETE\n",
" FROM `${project_id}.${bigquery_data_beans_curated_dataset}.weather`\n",
" WHERE weather_date = CURRENT_DATE();"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Supporting Functions"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "GZ18slUFRpjG"
},
"outputs": [],
"source": [
"def RunQuery(sql):\n",
" import time\n",
"\n",
" if (sql.startswith(\"SELECT\")):\n",
" df_result = client.query(sql).to_dataframe()\n",
" return df_result\n",
" else:\n",
" job_config = bigquery.QueryJobConfig(priority=bigquery.QueryPriority.INTERACTIVE)\n",
" query_job = client.query(sql, job_config=job_config)\n",
"\n",
" # Check on the progress by getting the job's updated state.\n",
" query_job = client.get_job(\n",
" query_job.job_id, location=query_job.location\n",
" )\n",
" print(\"Job {} is currently in state {} with error result of {}\".format(query_job.job_id, query_job.state, query_job.error_result))\n",
"\n",
" while query_job.state != \"DONE\":\n",
" time.sleep(2)\n",
" query_job = client.get_job(\n",
" query_job.job_id, location=query_job.location\n",
" )\n",
" print(\"Job {} is currently in state {} with error result of {}\".format(query_job.job_id, query_job.state, query_job.error_result))\n",
"\n",
" if query_job.error_result == None:\n",
" return True\n",
" else:\n",
" return False"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qNUW0MFiUOFy"
},
"outputs": [],
"source": [
"def GetNextPrimaryKey(fully_qualified_table_name, field_name):\n",
" sql = f\"\"\"\n",
" SELECT IFNULL(MAX({field_name}),0) AS result\n",
" FROM `{fully_qualified_table_name}`\n",
" \"\"\"\n",
" # print(sql)\n",
" df_result = client.query(sql).to_dataframe()\n",
" # display(df_result)\n",
" return df_result['result'].iloc[0] + 1"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZrQ_gOebR2vX"
},
"source": [
"## Get the Weather per City\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wHY8TZpgHPSo"
},
"source": [
"- https://www.weather.gov/documentation/services-web-api\n",
"- NYC: 40.7143,-74.006\n",
" 1. https://api.weather.gov/points/40.7143,-74.006\n",
" 2. https://api.weather.gov/gridpoints/OKX/33,35/forecast/hourly\n",
"\n",
"- San Francisco: 37.7749, -122.4194\n",
" 1. https://api.weather.gov/points/37.7749,-122.4194\n",
" 2. https://api.weather.gov/gridpoints/MTR/85,105/forecast/hourly"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "1afARUHGLlh7"
},
"outputs": [],
"source": [
"from datetime import datetime\n",
"import requests\n",
"import json\n",
"\n",
"weather_id = GetNextPrimaryKey(\"${project_id}.${bigquery_data_beans_curated_dataset}.weather\",\"weather_id\")\n",
"print(f\"weather_id: {weather_id}\")\n",
"\n",
"weather_date = datetime.today().strftime('%Y-%m-%d')\n",
"\n",
"# Loop for each city\n",
"for city_index in range(0, 4):\n",
" print(f\"City: {city_ids[city_index]}\")\n",
" response = requests.get(city_urls[city_index])\n",
"\n",
" if response.status_code == 200:\n",
" # format json\n",
" response_json = json.loads(response.text)\n",
" weather_json = json.dumps(response_json)\n",
"\n",
" sql = f\"\"\"INSERT INTO `${project_id}.${bigquery_data_beans_curated_dataset}.weather`\n",
" (weather_id, city_id, weather_date, weather_json)\n",
" VALUES ({weather_id},{city_ids[city_index]},'{weather_date}',JSON'{weather_json}')\"\"\"\n",
"\n",
" print(f\"sql: {sql}\")\n",
"\n",
" RunQuery(sql)\n",
" weather_id = weather_id + 1\n",
" else:\n",
" errorMessage = f\"REAT API (serviceJob) response returned response.status_code: {response.status_code} for city: {city_ids[city_index]}\"\n",
" raise Exception(errorMessage)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "PampTfgdRfqe"
},
"source": [
"## See the results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RCCogPjFSPHO"
},
"outputs": [],
"source": [
"%%bigquery\n",
"\n",
"# Display the results\n",
"SELECT *\n",
" FROM `${project_id}.${bigquery_data_beans_curated_dataset}.weather`\n",
" WHERE weather_date = CURRENT_DATE()\n",
"ORDER BY weather_id"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Sample Formatted JSON\n",
"\n",
"```\n",
"{\n",
" \"@context\": [\n",
" \"https://geojson.org/geojson-ld/geojson-context.jsonld\",\n",
" {\n",
" \"@version\": \"1.1\",\n",
" \"@vocab\": \"https://api.weather.gov/ontology#\",\n",
" \"geo\": \"http://www.opengis.net/ont/geosparql#\",\n",
" \"unit\": \"http://codes.wmo.int/common/unit/\",\n",
" \"wx\": \"https://api.weather.gov/ontology#\"\n",
" }\n",
" ],\n",
" \"properties\": {\n",
" \"elevation\": {\n",
" \"unitCode\": \"wmoUnit:m\",\n",
" \"value\": 2.1336\n",
" },\n",
" \"forecastGenerator\": \"HourlyForecastGenerator\",\n",
" \"generatedAt\": \"2024-03-14T13:50:03+00:00\",\n",
" \"periods\": [\n",
" {\n",
" \"detailedForecast\": \"\",\n",
" \"dewpoint\": {\n",
" \"unitCode\": \"wmoUnit:degC\",\n",
" \"value\": 5.555555555555555\n",
" },\n",
" \"endTime\": \"2024-03-14T10:00:00-04:00\",\n",
" \"icon\": \"https://api.weather.gov/icons/land/day/few,0?size=small\",\n",
" \"isDaytime\": true,\n",
" \"name\": \"\",\n",
" \"number\": 1,\n",
" \"probabilityOfPrecipitation\": {\n",
" \"unitCode\": \"wmoUnit:percent\",\n",
" \"value\": 0\n",
" },\n",
" \"relativeHumidity\": {\n",
" \"unitCode\": \"wmoUnit:percent\",\n",
" \"value\": 74\n",
" },\n",
" \"shortForecast\": \"Sunny\",\n",
" \"startTime\": \"2024-03-14T09:00:00-04:00\",\n",
" \"temperature\": 50,\n",
" \"temperatureTrend\": null,\n",
" \"temperatureUnit\": \"F\",\n",
" \"windDirection\": \"W\",\n",
" \"windSpeed\": \"5 mph\"\n",
" }\n",
" ],\n",
" \"units\": \"us\",\n",
" \"updateTime\": \"2024-03-14T11:27:56+00:00\",\n",
" \"updated\": \"2024-03-14T11:27:56+00:00\",\n",
" \"validTimes\": \"2024-03-14T05:00:00+00:00/P7DT20H\"\n",
" },\n",
" \"type\": \"Feature\"\n",
"}\n",
"```"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [
"2FHvD3ffG52M",
"KOXpog83RlPG"
],
"name": "BigQuery table",
"private_outputs": true,
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}