dataplex/data-explore/dataplex-explore-notebook.ipynb (147 lines of code) (raw):
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "cf1bfbd6-60c4-431d-99fe-8d8dca942ad2",
"metadata": {},
"outputs": [],
"source": [
"%%markdown\n",
"\n",
"### Use Cases:\n",
" - Data Scientists need access to data in your data warehouse. BigQuery makes this extremely easy.\n",
"\n",
"### Description: \n",
" - BigQuery provides a magic keyword ```%%bigquery``` used for executing queries to view the results or to populate a dataframe.\n",
"\n",
"### Reference:\n",
" - https://cloud.google.com/bigquery/docs/visualize-jupyter\n",
"\n",
"### Clean up / Reset script:\n",
" n/a \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "138d5714-54aa-4fc6-9c85-a00b3fb079f7",
"metadata": {},
"outputs": [],
"source": [
"%%bigquery\n",
"\n",
"-- Query BigQuery Data\n",
"WITH WeekdayData AS\n",
"(\n",
"SELECT FORMAT_DATE(\"%B\", Pickup_DateTime) AS MonthName,\n",
" FORMAT_DATE(\"%m\", Pickup_DateTime) AS MonthNumber,\n",
" FORMAT_DATE(\"%A\", Pickup_DateTime) AS WeekdayName,\n",
" SUM(taxi_trips.Total_Amount) AS Total_Amount\n",
" FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips` AS taxi_trips\n",
" WHERE taxi_trips.Pickup_DateTime BETWEEN '2020-01-01' AND '2020-12-31' \n",
" AND Payment_Type_Id IN (1,2,3,4)\n",
" GROUP BY 1, 2, 3\n",
")\n",
"SELECT MonthName,\n",
" FORMAT(\"%'d\", CAST(Sunday AS INTEGER)) AS Sunday,\n",
" FORMAT(\"%'d\", CAST(Monday AS INTEGER)) AS Monday,\n",
" FORMAT(\"%'d\", CAST(Tuesday AS INTEGER)) AS Tuesday,\n",
" FORMAT(\"%'d\", CAST(Wednesday AS INTEGER)) AS Wednesday,\n",
" FORMAT(\"%'d\", CAST(Thursday AS INTEGER)) AS Thursday,\n",
" FORMAT(\"%'d\", CAST(Friday AS INTEGER)) AS Friday,\n",
" FORMAT(\"%'d\", CAST(Saturday AS INTEGER)) AS Saturday,\n",
" FROM WeekdayData\n",
" PIVOT(SUM(Total_Amount) FOR WeekdayName IN ('Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'))\n",
"ORDER BY MonthNumber;"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26a463c4-9675-4b2c-b415-108628d85849",
"metadata": {},
"outputs": [],
"source": [
"%%bigquery myDataframe\n",
"WITH MonthlyData AS\n",
"(\n",
"SELECT FORMAT_DATE(\"%B\", taxi_trips.Pickup_DateTime) AS MonthName,\n",
" FORMAT_DATE(\"%m\", taxi_trips.Pickup_DateTime) AS MonthNumber,\n",
" CASE WHEN taxi_trips.Payment_Type_Id = 1 THEN 'Credit'\n",
" WHEN taxi_trips.Payment_Type_Id = 2 THEN 'Cash'\n",
" ELSE 'Other'\n",
" END AS PaymentDescription,\n",
" SUM(taxi_trips.Total_Amount) AS Total_Amount\n",
" FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips` AS taxi_trips\n",
" WHERE taxi_trips.Pickup_DateTime BETWEEN '2020-01-01' AND '2020-12-31' \n",
" AND Passenger_Count IS NOT NULL\n",
" AND Payment_Type_Id IN (1,2,3,4)\n",
" GROUP BY 1, 2, 3 \n",
")\n",
"SELECT MonthName,\n",
" CAST(Credit AS INTEGER) / 1000 AS Credit,\n",
" CAST(Cash AS INTEGER) / 1000 AS Cash,\n",
" CAST(Other AS INTEGER) / 1000 AS Other\n",
" FROM MonthlyData\n",
" PIVOT(SUM(Total_Amount) FOR PaymentDescription IN ('Credit', 'Cash', 'Other'))\n",
"ORDER BY MonthNumber;"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfba797f-9078-4623-93e5-939176eae1b5",
"metadata": {},
"outputs": [],
"source": [
"display(myDataframe)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63716b61-e8d3-4d00-91fb-78b4853e39cd",
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7ccd185b-6301-4f5f-94f1-3f2943f84795",
"metadata": {},
"outputs": [],
"source": [
"ax = myDataframe.plot(kind=\"bar\", x=\"MonthName\", y=[\"Cash\",\"Credit\",\"Other\"], figsize=(15, 7))\n",
"ax.set_title(\"Monthly Amounts by Payment Type\")\n",
"ax.set_xlabel(\"Month\")\n",
"ax.set_ylabel(\"Amount (thousands)\")\n",
"ax.set_ylim([0, 100000])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}