sql-scripts/wholesale_integrators/generate_analytic_hub_data.sql (41 lines of code) (raw):
/*##################################################################################
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###################################################################################*/
/*
Author: Adam Paternostro
Use Cases:
- Sharing data via Analytic Hub
Description:
- This is the dataset that is shared via Analytic Hub
- A nightly process will refresh the data
Show:
- New schema and table will be created
References:
- https://cloud.google.com/bigquery/docs/analytics-hub-introduction
Clean up / Reset script:
DROP SCHEMA `wholesale_integrators` ;
DROP TABLE IF EXISTS wholesale_integrators.wholesale_orders;
*/
CREATE SCHEMA wholesale_integrators
OPTIONS(
location="us"
);
CREATE OR REPLACE TABLE wholesale_integrators.wholesale_orders
(
order_number INT64, -- wholeseller's order number
order_date DATE, -- date ordered
expected_delivery_date DATE, -- date wholeseller expects to deliver
distribution_center_id INT64, -- theLook distribution_centers.id reference
product_id INT64, -- theLook inventor_items.product_id reference
quantity INT64 -- quantity ordered
)
CLUSTER BY distribution_center_id, product_id;
TRUNCATE TABLE wholesale_integrators.wholesale_orders;
/*
-- Did not generate enough data. Want a deliver per distibution center, per product per day.
INSERT INTO wholesale_integrators.wholesale_orders (order_number, order_date, expected_delivery_date, distribution_center_id, product_id, quantity)
SELECT element AS order_number,
DATE_SUB(CURRENT_DATE, INTERVAL CAST(ROUND(1 + RAND() * (90 - 1)) AS INT64) DAY) AS order_date,
DATE_ADD(DATE_SUB(CURRENT_DATE, INTERVAL 5 DAY), INTERVAL CAST(ROUND(1 + RAND() * (90 - 7)) AS INT64) DAY) AS expected_delivery_date,
CAST(ROUND(1 + RAND() * (10 - 1)) AS INT64) AS distribution_center_id,
CAST(ROUND(1 + RAND() * (29120 - 1)) AS INT64) AS product_id,
CAST(ROUND(10 + RAND() * (200 - 10)) AS INT64) AS quantity
FROM UNNEST(GENERATE_ARRAY(1, 2000000)) AS element
ORDER BY element;
*/
INSERT INTO wholesale_integrators.wholesale_orders (order_number, order_date, expected_delivery_date, distribution_center_id, product_id, quantity)
WITH DeliveryDays AS
(
SELECT DATE_ADD(DATE_SUB(CURRENT_DATE, INTERVAL 5 DAY), INTERVAL element DAY) AS delivery_date,
FROM UNNEST(GENERATE_ARRAY(1, 90)) AS element
)
, FullDataset AS
(
SELECT DeliveryDays.delivery_date AS expected_delivery_date,
distribution_centers.id AS distribution_center_id,
products.id AS product_id
FROM DeliveryDays
CROSS JOIN `bigquery-public-data.thelook_ecommerce.distribution_centers` AS distribution_centers
CROSS JOIN `bigquery-public-data.thelook_ecommerce.products` AS products
)
, NewData AS
(
SELECT ROW_NUMBER() OVER (PARTITION BY 1) AS order_number,
DATE_SUB(CURRENT_DATE, INTERVAL CAST(ROUND(1 + RAND() * (90 - 1)) AS INT64) DAY) AS order_date,
expected_delivery_date,
distribution_center_id,
product_id,
CAST(ROUND(10 + RAND() * (100 - 10)) AS INT64) AS quantity
FROM FullDataset
)
--SELECT * FROM DeliveryDays ORDER BY 1;
--SELECT * FROM FullDataset ORDER BY 1, 2, 3;
SELECT * FROM NewData;