dataplex/data-explore/dataplex-explore-notebook.ipynb (147 lines of code) (raw):

{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "cf1bfbd6-60c4-431d-99fe-8d8dca942ad2", "metadata": {}, "outputs": [], "source": [ "%%markdown\n", "\n", "### Use Cases:\n", " - Data Scientists need access to data in your data warehouse. BigQuery makes this extremely easy.\n", "\n", "### Description: \n", " - BigQuery provides a magic keyword ```%%bigquery``` used for executing queries to view the results or to populate a dataframe.\n", "\n", "### Reference:\n", " - https://cloud.google.com/bigquery/docs/visualize-jupyter\n", "\n", "### Clean up / Reset script:\n", " n/a \n" ] }, { "cell_type": "code", "execution_count": null, "id": "138d5714-54aa-4fc6-9c85-a00b3fb079f7", "metadata": {}, "outputs": [], "source": [ "%%bigquery\n", "\n", "-- Query BigQuery Data\n", "WITH WeekdayData AS\n", "(\n", "SELECT FORMAT_DATE(\"%B\", Pickup_DateTime) AS MonthName,\n", " FORMAT_DATE(\"%m\", Pickup_DateTime) AS MonthNumber,\n", " FORMAT_DATE(\"%A\", Pickup_DateTime) AS WeekdayName,\n", " SUM(taxi_trips.Total_Amount) AS Total_Amount\n", " FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips` AS taxi_trips\n", " WHERE taxi_trips.Pickup_DateTime BETWEEN '2020-01-01' AND '2020-12-31' \n", " AND Payment_Type_Id IN (1,2,3,4)\n", " GROUP BY 1, 2, 3\n", ")\n", "SELECT MonthName,\n", " FORMAT(\"%'d\", CAST(Sunday AS INTEGER)) AS Sunday,\n", " FORMAT(\"%'d\", CAST(Monday AS INTEGER)) AS Monday,\n", " FORMAT(\"%'d\", CAST(Tuesday AS INTEGER)) AS Tuesday,\n", " FORMAT(\"%'d\", CAST(Wednesday AS INTEGER)) AS Wednesday,\n", " FORMAT(\"%'d\", CAST(Thursday AS INTEGER)) AS Thursday,\n", " FORMAT(\"%'d\", CAST(Friday AS INTEGER)) AS Friday,\n", " FORMAT(\"%'d\", CAST(Saturday AS INTEGER)) AS Saturday,\n", " FROM WeekdayData\n", " PIVOT(SUM(Total_Amount) FOR WeekdayName IN ('Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'))\n", "ORDER BY MonthNumber;" ] }, { "cell_type": "code", "execution_count": null, "id": "26a463c4-9675-4b2c-b415-108628d85849", "metadata": {}, "outputs": [], "source": [ "%%bigquery myDataframe\n", "WITH MonthlyData AS\n", "(\n", "SELECT FORMAT_DATE(\"%B\", taxi_trips.Pickup_DateTime) AS MonthName,\n", " FORMAT_DATE(\"%m\", taxi_trips.Pickup_DateTime) AS MonthNumber,\n", " CASE WHEN taxi_trips.Payment_Type_Id = 1 THEN 'Credit'\n", " WHEN taxi_trips.Payment_Type_Id = 2 THEN 'Cash'\n", " ELSE 'Other'\n", " END AS PaymentDescription,\n", " SUM(taxi_trips.Total_Amount) AS Total_Amount\n", " FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips` AS taxi_trips\n", " WHERE taxi_trips.Pickup_DateTime BETWEEN '2020-01-01' AND '2020-12-31' \n", " AND Passenger_Count IS NOT NULL\n", " AND Payment_Type_Id IN (1,2,3,4)\n", " GROUP BY 1, 2, 3 \n", ")\n", "SELECT MonthName,\n", " CAST(Credit AS INTEGER) / 1000 AS Credit,\n", " CAST(Cash AS INTEGER) / 1000 AS Cash,\n", " CAST(Other AS INTEGER) / 1000 AS Other\n", " FROM MonthlyData\n", " PIVOT(SUM(Total_Amount) FOR PaymentDescription IN ('Credit', 'Cash', 'Other'))\n", "ORDER BY MonthNumber;" ] }, { "cell_type": "code", "execution_count": null, "id": "dfba797f-9078-4623-93e5-939176eae1b5", "metadata": {}, "outputs": [], "source": [ "display(myDataframe)" ] }, { "cell_type": "code", "execution_count": null, "id": "63716b61-e8d3-4d00-91fb-78b4853e39cd", "metadata": {}, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "id": "7ccd185b-6301-4f5f-94f1-3f2943f84795", "metadata": {}, "outputs": [], "source": [ "ax = myDataframe.plot(kind=\"bar\", x=\"MonthName\", y=[\"Cash\",\"Credit\",\"Other\"], figsize=(15, 7))\n", "ax.set_title(\"Monthly Amounts by Payment Type\")\n", "ax.set_xlabel(\"Month\")\n", "ax.set_ylabel(\"Amount (thousands)\")\n", "ax.set_ylim([0, 100000])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.15" } }, "nbformat": 4, "nbformat_minor": 5 }