Data-Beans-Create-Orders.sql (118 lines of code) (raw):
-- location_id 1 to 50
-- customer_id 1 to 10,702
-- 200 usa
-- 200 london GBR
-- 200 japan JPN
-- company_id 1 to 10
-- menu_id 1 to 212
-- customer reviews 1 to 8,027 (missing some rows)
-- CREATE OR REPLACE TABLE `PROJECT-ID.coffee_curated.order_backup` AS SELECT * FROM `PROJECT-ID.coffee_curated.order`
INSERT INTO
`PROJECT-ID.coffee_curated.order` (
order_id,
location_id,
customer_id,
order_datetime,
order_completion_datetime)
WITH
data_max_id AS (
-- 2021-11-01 03:59:47.952295 UTC, 2023-12-01 14:35:18.628451 UTC, 10000000, 10000000
SELECT MIN(order_datetime) AS min_order_datetime, MAX(order_datetime) AS max_order_datetime, MAX(order_id) AS max_id, COUNT(*) AS record_count
FROM`PROJECT-ID.coffee_curated.order`
),
data_random_data AS (
SELECT CAST(ROUND(1 + RAND() * (10 - 1)) AS INT64) AS location_id,
CAST(ROUND(1 + RAND() * (10702 - 1)) AS INT64) AS customer_id,
TIMESTAMP_ADD(data_max_id.max_order_datetime, INTERVAL CAST(ROUND(1 + RAND() * (30 * 24 * 60 * 2) - 1) AS INT64) MINUTE) AS order_datetime, -- 2 months of minutes
FROM UNNEST(GENERATE_ARRAY(1, 1000000)) AS element
CROSS JOIN data_max_id
),
data_random_all_data AS (
SELECT *,
TIMESTAMP_ADD(order_datetime, INTERVAL CAST(ROUND(60 + RAND() * ((60*15)) - 60) AS INT64) SECOND) AS order_completion_datetime, -- from 60 seconds to 15 minutes
FROM data_random_data
),
data_ranked AS (
SELECT *,
ROW_NUMBER() OVER (ORDER BY order_datetime) AS row_number
FROM data_random_all_data
)
-- SELECT extract(YEAR from order_datetime) as year, extract(MONTH from order_datetime) as month, count(*) FROM data_ranked GROUP BY ALL;
-- SELECT min(order_datetime), max(order_datetime) FROM data_ranked
SELECT row_number + max_id AS order_id,
location_id,
customer_id,
order_datetime,
order_completion_datetime
FROM data_ranked
CROSS JOIN data_max_id;
/*
SELECT extract(YEAR from order_datetime) as year,
extract(MONTH from order_datetime) as month,
count(*)
FROM`PROJECT-ID.coffee_curated.order`
group by all
order by 1 desc, 2 desc
delete
from `PROJECT-ID.coffee_curated.order`
where extract(YEAR from order_datetime) = 2027
and extract(MONTH from order_datetime) > 6;
*/
-- location_id 1 to 50
-- customer_id 1 to 10,702
-- 200 usa
-- 200 london GBR
-- 200 japan JPN
-- company_id 1 to 10
-- menu_id 1 to 212
-- customer reviews 1 to 8,027 (missing some rows)
--truncate table `PROJECT-ID.coffee_curated.order_item`;
-- CREATE OR REPLACE TABLE `PROJECT-ID.coffee_curated.order_item_backup` AS SELECT * FROM `PROJECT-ID.coffee_curated.order_item`
INSERT INTO `PROJECT-ID.coffee_curated.order_item`
(order_item_id, order_id, menu_id, quantity, item_size, item_price, item_total)
WITH
data_max_id AS (
SELECT IFNULL(MAX(order_item_id),0) AS max_id
FROM `PROJECT-ID.coffee_curated.order_item`
),
-- order's without items
data_order AS (
SELECT order_id,
CASE WHEN RAND() <= .5 THEN 1 -- most order are 1 item
ELSE CAST(ROUND(2 + RAND() * (5 - 2)) AS INT64) -- up to 5 items per order
END AS order_item_count
FROM `PROJECT-ID.coffee_curated.order` AS order_t
WHERE NOT EXISTS (SELECT * FROM `PROJECT-ID.coffee_curated.order_item` AS order_item WHERE order_t.order_id = order_item.order_id)
),
data_order_with_array AS
(
select order_id,
order_item_count,
GENERATE_ARRAY(1, order_item_count) as order_item_array,
RAND() AS quantity_rand
from data_order
),
data_random_all_data AS (
SELECT order_id,
order_item AS order_item_id,
CAST(ROUND(1 + RAND() * (212 - 1)) AS INT64) AS menu_id,
CASE WHEN quantity_rand <= .75 THEN 1 -- most orders are 1 quanity
WHEN quantity_rand <= .85 THEN 2
WHEN quantity_rand <= .90 THEN 3
WHEN quantity_rand <= .95 THEN 4
ELSE 5
END AS quantity,
from data_order_with_array
CROSS JOIN UNNEST(order_item_array) AS order_item
),
data_ranked AS (
SELECT *,
ROW_NUMBER() OVER (ORDER BY order_id, order_item_id) AS row_number
FROM data_random_all_data
)
SELECT row_number + max_id AS order_item_id,
data_ranked.order_id,
data_ranked.menu_id,
data_ranked.quantity,
menu.item_size,
menu.item_price,
ROUND(CAST(data_ranked.quantity * menu.item_price AS NUMERIC), 2, "ROUND_HALF_EVEN") AS item_total
FROM data_ranked
CROSS JOIN data_max_id
INNER JOIN `PROJECT-ID.data_beans_curated.menu` AS menu
ON data_ranked.menu_id = menu.menu_id;
SELECT avg(item_total)
from `PROJECT-ID.coffee_curated.order` as parent
inner join `PROJECT-ID.coffee_curated.order_item` as child on parent.order_id = child.order_id
/*
CREATE OR REPLACE TABLE `data-analytics-golden-v1-share.coffee_curated.order` COPY `PROJECT-ID.coffee_curated.order`;
CREATE OR REPLACE TABLE `data-analytics-golden-v1-share.coffee_curated.order_item` COPY `PROJECT-ID.coffee_curated.order_item`;
*/
CREATE OR REPLACE MATERIALIZED VIEW `PROJECT-ID.coffee_curated.looker_databeans_report`
CLUSTER BY sale_date
OPTIONS (enable_refresh = true, refresh_interval_minutes = 30, description='Used for Looker Studio Pro with Duet AI')
AS
SELECT
EXTRACT(DATE FROM TIMESTAMP(order_datetime)) AS sale_date,
city.city_name,
TIMESTAMP_DIFF(order_table.order_completion_datetime,order_table.order_datetime, SECOND) AS seconds_to_make_order,
customer.customer_name,
company.company_name,
order_item.quantity,
menu.item_name,
menu.item_size,
menu.item_price as sale_price
FROM `PROJECT-ID.coffee_curated.order` AS order_table
INNER JOIN `PROJECT-ID.coffee_curated.order_item` AS order_item
ON order_table.order_id = order_item.order_id
INNER JOIN `PROJECT-ID.coffee_curated.menu` AS menu
ON order_item.menu_id = menu.menu_id
INNER join PROJECT-ID.coffee_curated.location as location
ON order_table.location_id = location.location_id
INNER join PROJECT-ID.coffee_curated.city as city
ON location.city_id = city.city_id
INNER join PROJECT-ID.coffee_curated.customer as customer
ON customer.customer_id=order_table.customer_id
INNER join PROJECT-ID.coffee_curated.company as company
ON menu.company_id = company.company_id;
/*
SELECT """EXPORT DATA OPTIONS ( uri = 'gs://PROJECT-ID/data-beans/v1/export/""" || table_name || """/""" || table_name || """_*.avro', format = 'AVRO', overwrite = true) AS ( SELECT * FROM `PROJECT-ID.coffee_curated.""" || table_name || """`);"""
FROM coffee_curated.INFORMATION_SCHEMA.TABLES
WHERE table_type = 'BASE TABLE'
ORDER BY table_name;
EXPORT DATA OPTIONS ( uri = 'gs://PROJECT-ID/data-beans/v1/export/order/order_*.avro', format = 'AVRO', overwrite = true) AS ( SELECT * FROM `PROJECT-ID.coffee_curated.order`);
EXPORT DATA OPTIONS ( uri = 'gs://PROJECT-ID/data-beans/v1/export/order_item/order_item_*.avro', format = 'AVRO', overwrite = true) AS ( SELECT * FROM `PROJECT-ID.coffee_curated.order_item`);
LOAD DATA OVERWRITE `PROJECT-ID.coffee_curated.LOAD_order_item`
FROM FILES ( format = 'AVRO', uris = ['gs://PROJECT-ID/data-beans/v1/export/order_item/order_item_*.avro']);
LOAD DATA OVERWRITE `PROJECT-ID.coffee_curated.load_order` FROM FILES ( format = 'AVRO', uris = ['gs://PROJECT-ID/data-beans/v1/export/order/order_*.avro']);
CREATE TABLE `PROJECT-ID.coffee_curated.LOAD_order`
CLUSTER BY (order_id)
AS
SELECT order_id, location_id, customer_id,
TIMESTAMP_MICROS(order_datetime) AS order_datetime, TIMESTAMP_MICROS(order_completion_datetime) AS order_completion_datetime
FROM `PROJECT-ID.coffee_curated.load_order` ;
DROP TABLE `PROJECT-ID.coffee_curated.load_order`;
-- DELETE (by hand)
gsutil cp gs://PROJECT-ID/data-beans/v1/export/order/* gs://data-analytics-golden-demo/data-beans/v1/export/order/
gsutil cp gs://PROJECT-ID/data-beans/v1/export/order_item/* gs://data-analytics-golden-demo/data-beans/v1/export/order_item/
*/