sql-scripts/taxi_dataset/sp_demo_bigquery_queries.sql (108 lines of code) (raw):
/*##################################################################################
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###################################################################################*/
/*
Use Cases:
- BigQuery supports full SQL syntax and many analytic functions that make complex queries of lots of data easy
Description:
- Show joins, date functions, rank, partition, pivot
Reference:
- Rank/Partition: https://cloud.google.com/bigquery/docs/reference/standard-sql/analytic-function-concepts
- Pivot: https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#pivot_operator
Clean up / Reset script:
n/a
*/
--Rank, Pivot, Json
-- Query: Get trips over $50 for each day of the week for 6 months.
-- Shows: Date Functions, Joins, Group By, Having, Ordinal Group/Having
SELECT FORMAT_DATE("%w", Pickup_DateTime) AS WeekdayNumber,
FORMAT_DATE("%A", Pickup_DateTime) AS WeekdayName,
vendor.Vendor_Description,
payment_type.Payment_Type_Description,
SUM(taxi_trips.Total_Amount) AS high_value_trips
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips` AS taxi_trips
INNER JOIN `${project_id}.${bigquery_taxi_dataset}.vendor` AS vendor
ON taxi_trips.Vendor_Id = vendor.Vendor_Id
AND taxi_trips.Pickup_DateTime BETWEEN '2020-01-01' AND '2020-06-01'
LEFT JOIN `${project_id}.${bigquery_taxi_dataset}.payment_type` AS payment_type
ON taxi_trips.Payment_Type_Id = payment_type.Payment_Type_Id
GROUP BY 1, 2, 3, 4
HAVING SUM(taxi_trips.Total_Amount) > 50
ORDER BY WeekdayNumber, 3, 4;
-- Query: 6 months of amounts (Cash/Credit) by passenger type
WITH TaxiDataRanking AS
(
SELECT CAST(Pickup_DateTime AS DATE) AS Pickup_Date,
taxi_trips.Payment_Type_Id,
taxi_trips.Passenger_Count,
taxi_trips.Total_Amount,
RANK() OVER (PARTITION BY CAST(Pickup_DateTime AS DATE),
taxi_trips.Payment_Type_Id
ORDER BY taxi_trips.Passenger_Count DESC,
taxi_trips.Total_Amount DESC) AS Ranking
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips` AS taxi_trips
WHERE taxi_trips.Pickup_DateTime BETWEEN '2020-01-01' AND '2020-06-01'
AND taxi_trips.Payment_Type_Id IN (1,2)
)
SELECT Pickup_Date,
Payment_Type_Description,
Passenger_Count,
Total_Amount
FROM TaxiDataRanking
INNER JOIN `${project_id}.${bigquery_taxi_dataset}.payment_type` AS payment_type
ON TaxiDataRanking.Payment_Type_Id = payment_type.Payment_Type_Id
WHERE Ranking = 1
ORDER BY Pickup_Date, Payment_Type_Description;
-- Query: 6 months of data summed by payment type and passenger count, then pivoted based upon payment type
WITH MonthlyData AS
(
SELECT FORMAT_DATE("%B", taxi_trips.Pickup_DateTime) AS MonthName,
FORMAT_DATE("%m", taxi_trips.Pickup_DateTime) AS MonthNumber,
CASE WHEN taxi_trips.Payment_Type_Id = 1 THEN 'Credit'
WHEN taxi_trips.Payment_Type_Id = 2 THEN 'Cash'
WHEN taxi_trips.Payment_Type_Id = 3 THEN 'NoCharge'
WHEN taxi_trips.Payment_Type_Id = 4 THEN 'Dispute'
END AS PaymentDescription,
taxi_trips.Passenger_Count,
taxi_trips.Total_Amount
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips` AS taxi_trips
WHERE taxi_trips.Pickup_DateTime BETWEEN '2020-01-01' AND '2020-06-01'
AND Passenger_Count IS NOT NULL
AND Payment_Type_Id IN (1,2,3,4)
)
SELECT MonthName,
Passenger_Count,
FORMAT("%'d", CAST(Credit AS INTEGER)) AS Credit,
FORMAT("%'d", CAST(Cash AS INTEGER)) AS Cash,
FORMAT("%'d", CAST(NoCharge AS INTEGER)) AS NoCharge,
FORMAT("%'d", CAST(Dispute AS INTEGER)) AS Dispute
FROM MonthlyData
PIVOT(SUM(Total_Amount) FOR PaymentDescription IN ('Credit', 'Cash', 'NoCharge', 'Dispute'))
ORDER BY MonthNumber, Passenger_Count;
-- Query: 1 years worth of data pivoted by payment type
WITH MonthlyData AS
(
SELECT FORMAT_DATE("%B", taxi_trips.Pickup_DateTime) AS MonthName,
FORMAT_DATE("%m", taxi_trips.Pickup_DateTime) AS MonthNumber,
CASE WHEN taxi_trips.Payment_Type_Id = 1 THEN 'Credit'
WHEN taxi_trips.Payment_Type_Id = 2 THEN 'Cash'
WHEN taxi_trips.Payment_Type_Id = 3 THEN 'NoCharge'
WHEN taxi_trips.Payment_Type_Id = 4 THEN 'Dispute'
END AS PaymentDescription,
SUM(taxi_trips.Total_Amount) AS Total_Amount
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips` AS taxi_trips
WHERE taxi_trips.Pickup_DateTime BETWEEN '2020-01-01' AND '2020-12-31'
AND Passenger_Count IS NOT NULL
AND Payment_Type_Id IN (1,2,3,4)
GROUP BY 1, 2, 3
)
SELECT MonthName,
FORMAT("%'d", CAST(Credit AS INTEGER)) AS Credit,
FORMAT("%'d", CAST(Cash AS INTEGER)) AS Cash,
FORMAT("%'d", CAST(NoCharge AS INTEGER)) AS NoCharge,
FORMAT("%'d", CAST(Dispute AS INTEGER)) AS Dispute
FROM MonthlyData
PIVOT(SUM(Total_Amount) FOR PaymentDescription IN ('Credit', 'Cash', 'NoCharge', 'Dispute'))
ORDER BY MonthNumber;
-- Query: See what day of the week in each month has the greatest amount (that's the month/day to work)
WITH WeekdayData AS
(
SELECT FORMAT_DATE("%B", Pickup_DateTime) AS MonthName,
FORMAT_DATE("%m", Pickup_DateTime) AS MonthNumber,
FORMAT_DATE("%A", Pickup_DateTime) AS WeekdayName,
SUM(taxi_trips.Total_Amount) AS Total_Amount
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips` AS taxi_trips
WHERE taxi_trips.Pickup_DateTime BETWEEN '2020-01-01' AND '2020-12-31'
AND Payment_Type_Id IN (1,2,3,4)
GROUP BY 1, 2, 3
)
SELECT MonthName,
FORMAT("%'d", CAST(Sunday AS INTEGER)) AS Sunday,
FORMAT("%'d", CAST(Monday AS INTEGER)) AS Monday,
FORMAT("%'d", CAST(Tuesday AS INTEGER)) AS Tuesday,
FORMAT("%'d", CAST(Wednesday AS INTEGER)) AS Wednesday,
FORMAT("%'d", CAST(Thursday AS INTEGER)) AS Thursday,
FORMAT("%'d", CAST(Friday AS INTEGER)) AS Friday,
FORMAT("%'d", CAST(Saturday AS INTEGER)) AS Saturday,
FROM WeekdayData
PIVOT(SUM(Total_Amount) FOR WeekdayName IN ('Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'))
ORDER BY MonthNumber;