modules/data_warehouse/src/sql/sp_sample_queries.sql (150 lines of code) (raw):

-- Copyright 2023 Google LLC -- -- Licensed under the Apache License, Version 2.0 (the "License"); -- you may not use this file except in compliance with the License. -- You may obtain a copy of the License at -- -- http://www.apache.org/licenses/LICENSE-2.0 -- -- Unless required by applicable law or agreed to in writing, software -- distributed under the License is distributed on an "AS IS" BASIS, -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- See the License for the specific language governing permissions and -- limitations under the License. /* Use Cases: - BigQuery supports full SQL syntax and many analytic functions that make complex queries of lots of data easy Description: - Show joins, date functions, rank, partition, pivot Reference: - Rank/Partition: https://cloud.google.com/bigquery/docs/reference/standard-sql/analytic-function-concepts - Pivot: https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#pivot_operator Clean up / Reset script: n/a */ --Rank, Pivot, Json -- Query: See the order price quartiles for each day of the week. -- Shows: Date Functions, Joins, Group By, Having, Ordinal Group/Having, Quantiles SELECT FORMAT_DATE("%w", created_at) AS WeekdayNumber, FORMAT_DATE("%A", created_at) AS WeekdayName, APPROX_QUANTILES(order_price, 4) AS quartiles FROM ( SELECT created_at, SUM(sale_price) AS order_price FROM `${project_id}.${dataset_id}.order_items` GROUP BY order_id, 1 HAVING SUM(sale_price) > 10) GROUP BY 1, 2 ORDER BY WeekdayNumber ; -- Query: Items with less than 30 days of inventory remaining WITH Orders AS ( SELECT order_items.product_id AS product_id, COUNT(order_items.id) AS count_sold_30d FROM `${project_id}.${dataset_id}.order_items` AS order_items WHERE order_items.created_at > TIMESTAMP_SUB(TIMESTAMP("2023-09-26 00:00:00"), INTERVAL 30 DAY) GROUP BY product_id ), OnHand AS ( SELECT inventory.product_id AS product_id, inventory.product_name AS product_name, COUNT(inventory.id) AS count_in_stock FROM `${project_id}.${dataset_id}.inventory_items` AS inventory WHERE inventory.sold_at IS NULL GROUP BY product_id, product_name ORDER BY count_in_stock DESC ), End30dInventory AS ( SELECT OnHand.*, Orders.count_sold_30d, count_in_stock - count_sold_30d AS expected_inventory_30d FROM OnHand INNER JOIN Orders USING (product_id) ) SELECT RANK() OVER (ORDER BY expected_inventory_30d ASC) AS rank, End30dInventory.product_name, End30dInventory.expected_inventory_30d, End30dInventory.count_in_stock AS current_stock, End30dInventory.count_sold_30d FROM End30dInventory ORDER BY rank ASC, count_sold_30d DESC, current_stock DESC ; -- Query: data summed by month, then pivoted by department with MonthlyData AS( SELECT sold_at, FORMAT_DATE("%B", inventory.sold_at) AS month_name, FORMAT_DATE("%m", inventory.sold_at) AS month_number, SAFE_SUBTRACT(inventory.product_retail_price, inventory.cost) AS profit, inventory.product_department AS product_department FROM `${project_id}.${dataset_id}.inventory_items` AS inventory WHERE sold_at IS NOT NULL ) SELECT month_name, FORMAT("%'d", CAST(Profit_Men AS INTEGER)) AS Profit_Men, FORMAT("%'d", CAST(Profit_Women AS INTEGER)) AS Profit_Women FROM MonthlyData PIVOT (SUM(profit) AS Profit FOR product_department IN ("Men", "Women")) ORDER BY month_number ASC ; -- Query: See what day of the week in each month has the greatest amount of sales(that's the month/day to work) WITH WeekdayData AS ( SELECT FORMAT_DATE("%B", inventory.sold_at) AS month_name, FORMAT_DATE("%m", inventory.sold_at) AS month_number, FORMAT_DATE("%A", inventory.sold_at) AS weekday_name, SUM(inventory.product_retail_price) AS revenue FROM `${project_id}.${dataset_id}.inventory_items` AS inventory WHERE inventory.sold_at IS NOT NULL GROUP BY 1, 2, 3 ) SELECT month_name, FORMAT("%'d", CAST(Sunday AS INTEGER)) AS Sunday, FORMAT("%'d", CAST(Monday AS INTEGER)) AS Monday, FORMAT("%'d", CAST(Tuesday AS INTEGER)) AS Tuesday, FORMAT("%'d", CAST(Wednesday AS INTEGER)) AS Wednesday, FORMAT("%'d", CAST(Thursday AS INTEGER)) AS Thursday, FORMAT("%'d", CAST(Friday AS INTEGER)) AS Friday, FORMAT("%'d", CAST(Saturday AS INTEGER)) AS Saturday, FROM WeekdayData PIVOT(SUM(revenue) FOR weekday_name IN ('Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday')) ORDER BY month_number ; -- Query: Revenue pivoted by category name for each month. -- This query dynamically generates the pivot column names based on the distinct values in the product_category column EXECUTE IMMEDIATE FORMAT(""" with Subset AS( SELECT EXTRACT(MONTH FROM inventory.sold_at) AS month_number, inventory.product_category, inventory.product_retail_price FROM `${project_id}.${dataset_id}.inventory_items` AS inventory WHERE inventory.sold_at IS NOT NULL) SELECT CASE WHEN month_number = 1 THEN 'January' WHEN month_number = 2 THEN 'February' WHEN month_number = 3 THEN 'March' WHEN month_number = 4 THEN 'April' WHEN month_number = 5 THEN 'May' WHEN month_number = 6 THEN 'June' WHEN month_number = 7 THEN 'July' WHEN month_number = 8 THEN 'August' WHEN month_number = 9 THEN 'September' WHEN month_number = 10 THEN 'October' WHEN month_number = 11 THEN 'November' WHEN month_number = 12 THEN 'December' END AS month_name, * EXCEPT (month_number) FROM Subset PIVOT (SUM(Subset.product_retail_price) as Revenue FOR product_category IN %s) ORDER BY month_number; """, ( SELECT CONCAT("(", STRING_AGG(DISTINCT CONCAT("'", product_category, "'"), ','), ")") FROM `${project_id}.${dataset_id}.inventory_items` ) ) ;