data-analytics-demos/bigquery-data-governance/sql-scripts/governed_data_curated/initialize.sql (252 lines of code) (raw):
------------------------------------------------------------------------------------------------------------
-- Create a customer table with a ML model of Predicted Credit Amount
------------------------------------------------------------------------------------------------------------
-- Step 1: Create training data with the selected features
CREATE OR REPLACE TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.customer_training_data` AS
SELECT c.customer_id,
SUM(d.quantity * d.price) as total_spent,
FROM `${project_id}.${bigquery_governed_data_enriched_dataset}.customer` AS c
INNER JOIN `${project_id}.${bigquery_governed_data_enriched_dataset}.order_header` AS oh
ON c.customer_id = oh.customer_id
INNER JOIN `${project_id}.${bigquery_governed_data_enriched_dataset}.order_detail` AS d
ON oh.order_id = d.order_id
GROUP BY c.customer_id;
-- Step 2: Create your BQML Model (Regression Example)
CREATE OR REPLACE MODEL `${project_id}.${bigquery_governed_data_curated_dataset}.customer_credit_model`
OPTIONS(
model_type='linear_reg',
input_label_cols=['total_spent'],
MODEL_REGISTRY = "vertex_ai"
) AS
SELECT total_spent,
customer_id
FROM `${project_id}.${bigquery_governed_data_curated_dataset}.customer_training_data`;
-- Step 3: Predict Customer Credit
EXECUTE IMMEDIATE """
CREATE OR REPLACE TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.customer`
CLUSTER BY customer_id
AS
SELECT c.*,
ml.predicted_total_spent as predicted_credit_amount
FROM `${project_id}.${bigquery_governed_data_enriched_dataset}.customer` as c
LEFT JOIN ML.PREDICT(MODEL `${project_id}.${bigquery_governed_data_curated_dataset}.customer_credit_model`,
(SELECT customer_id, total_spent from `${project_id}.${bigquery_governed_data_curated_dataset}.customer_training_data`)) as ml
ON c.customer_id = ml.customer_id;""";
CREATE OR REPLACE TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_header`
CLUSTER BY order_id
AS
SELECT *
FROM `${project_id}.${bigquery_governed_data_enriched_dataset}.order_header`;
CREATE OR REPLACE TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_detail`
CLUSTER BY order_id
AS
SELECT *
FROM `${project_id}.${bigquery_governed_data_enriched_dataset}.order_detail` ;
CREATE OR REPLACE TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.product`
CLUSTER BY product_id
AS
SELECT *
FROM `${project_id}.${bigquery_governed_data_enriched_dataset}.product` ;
CREATE OR REPLACE TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.product_category`
CLUSTER BY product_category_id
AS
SELECT *
FROM `${project_id}.${bigquery_governed_data_enriched_dataset}.product_category` ;
-- This table will have its descriptions and column descriptions created by Data Insights
CREATE OR REPLACE TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.sales` AS
SELECT p.product_name,
p.product_description,
pd.product_category_name,
pd.product_category_description,
oh.region,
oh.order_datetime,
od.price,
od.quantity,
c.*
FROM `${project_id}.${bigquery_governed_data_enriched_dataset}.order_header` oh
LEFT JOIN `${project_id}.${bigquery_governed_data_enriched_dataset}.order_detail` od
ON oh.order_id=od.order_id
INNER JOIN `${project_id}.${bigquery_governed_data_enriched_dataset}.product` AS p
ON od.product_id=p.product_id
INNER JOIN `${project_id}.${bigquery_governed_data_enriched_dataset}.product_category` AS pd
ON pd.product_category_id=p.product_category_id
INNER JOIN `${project_id}.${bigquery_governed_data_enriched_dataset}.customer` as c
ON c.customer_id=oh.customer_id;
------------------------------------------------------------------------------------------------------------
-- Set the PK/FK for each table:
------------------------------------------------------------------------------------------------------------
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.customer` ADD PRIMARY KEY (customer_id) NOT ENFORCED;
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.customer_training_data` ADD PRIMARY KEY (customer_id) NOT ENFORCED;
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_header` ADD PRIMARY KEY (order_id) NOT ENFORCED;
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_detail` ADD PRIMARY KEY (order_id, product_id) NOT ENFORCED;
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.product` ADD PRIMARY KEY (product_id) NOT ENFORCED;
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.product_category` ADD PRIMARY KEY (product_category_id) NOT ENFORCED;
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.customer_training_data`
ADD CONSTRAINT fk_customer_customer_training_data FOREIGN KEY (customer_id)
REFERENCES `${project_id}.${bigquery_governed_data_curated_dataset}.customer`(customer_id) NOT ENFORCED;
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_header`
ADD CONSTRAINT fk_customer_order_header FOREIGN KEY (customer_id)
REFERENCES `${project_id}.${bigquery_governed_data_curated_dataset}.customer`(customer_id) NOT ENFORCED;
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_detail`
ADD CONSTRAINT fk_order_header_order_detail FOREIGN KEY (order_id)
REFERENCES `${project_id}.${bigquery_governed_data_curated_dataset}.order_header`(order_id) NOT ENFORCED;
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_detail`
ADD CONSTRAINT fk_order_detail_product FOREIGN KEY (product_id)
REFERENCES `${project_id}.${bigquery_governed_data_curated_dataset}.product`(product_id) NOT ENFORCED;
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.product`
ADD CONSTRAINT fk_product_product_category FOREIGN KEY (product_category_id)
REFERENCES `${project_id}.${bigquery_governed_data_curated_dataset}.product_category`(product_category_id) NOT ENFORCED;
------------------------------------------------------------------------------------------------------------
-- Set the table descriptions for each table:
------------------------------------------------------------------------------------------------------------
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.customer`
SET OPTIONS (
description = 'Curated table containing customer information with predicted credit amount.'
);
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.customer`
ALTER COLUMN customer_id SET OPTIONS (description='Unique identifier for the customer.'),
ALTER COLUMN first_name SET OPTIONS (description='The first name of the customer.'),
ALTER COLUMN last_name SET OPTIONS (description='The last name of the customer.'),
ALTER COLUMN email SET OPTIONS (description='The email address of the customer.'),
ALTER COLUMN phone SET OPTIONS (description='The phone number of the customer.'),
ALTER COLUMN gender SET OPTIONS (description='The gender of the customer.'),
ALTER COLUMN ip_address SET OPTIONS (description='The IP address of the customer.'),
ALTER COLUMN ssn SET OPTIONS (description='The Social Security Number of the customer.'),
ALTER COLUMN address SET OPTIONS (description='The street address of the customer.'),
ALTER COLUMN city SET OPTIONS (description='The city of the customer.'),
ALTER COLUMN state SET OPTIONS (description='The state of the customer.'),
ALTER COLUMN zip SET OPTIONS (description='The zip code of the customer.'),
ALTER COLUMN credit_card_number SET OPTIONS (description='The credit card number of the customer.'),
ALTER COLUMN predicted_credit_amount SET OPTIONS (description='The predicted credit amount for the customer.');
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.customer_training_data`
SET OPTIONS (
description = 'Table containing training data for the customer credit model.'
);
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.customer_training_data`
ALTER COLUMN customer_id SET OPTIONS (description='Unique identifier for the customer.'),
ALTER COLUMN total_spent SET OPTIONS (description='The total amount spent by the customer.');
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_detail`
SET OPTIONS (
description = 'Curated table containing detailed information for each item in a customer order, with primary and foreign key constraints.'
);
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_detail`
ALTER COLUMN order_id SET OPTIONS (description='Unique identifier for the order this item belongs to (Primary Key, Foreign Key).'),
ALTER COLUMN product_id SET OPTIONS (description='Unique identifier for the product in this order item (Primary Key, Foreign Key).'),
ALTER COLUMN quantity SET OPTIONS (description='The quantity of the product in this order item.'),
ALTER COLUMN price SET OPTIONS (description='The price of the product in this order item.');
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_header`
SET OPTIONS (
description = 'Curated table containing header information for customer orders, with primary and foreign key constraints.'
);
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.order_header`
ALTER COLUMN customer_id SET OPTIONS (description='Unique identifier for the customer who placed the order (Foreign Key).'),
ALTER COLUMN order_id SET OPTIONS (description='Unique identifier for the order (Primary Key).'),
ALTER COLUMN region SET OPTIONS (description='The region where the order was placed.'),
ALTER COLUMN order_datetime SET OPTIONS (description='The date and time when the order was placed.');
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.product`
SET OPTIONS (
description = 'Curated table containing product information with a primary key.'
);
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.product`
ALTER COLUMN product_id SET OPTIONS (description='Unique identifier for the product (Primary Key).'),
ALTER COLUMN product_name SET OPTIONS (description='The name of the product.'),
ALTER COLUMN product_description SET OPTIONS (description='Description of the product.'),
ALTER COLUMN product_category_id SET OPTIONS (description='Identifier for the category the product belongs to.');
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.product_category`
SET OPTIONS (
description = 'Curated table containing product category information with a primary key.'
);
ALTER TABLE `${project_id}.${bigquery_governed_data_curated_dataset}.product_category`
ALTER COLUMN product_category_id SET OPTIONS (description='Unique identifier for the product category (Primary Key).'),
ALTER COLUMN product_category_name SET OPTIONS (description='The name of the product category.'),
ALTER COLUMN product_category_description SET OPTIONS (description='Description of the product category.');
------------------------------------------------------------------------------------------------------------
-- Create the views for the analytics hub
------------------------------------------------------------------------------------------------------------
CREATE OR REPLACE VIEW `${project_id}.${bigquery_analytics_hub_publisher_dataset}.customer_view`
OPTIONS(
description="Analytics Hub View of the customer table"
)
AS
SELECT
customer_id AS customer_id, -- Unique identifier for the customer. Primary Key.
first_name AS first_name, -- First name of the customer.
last_name AS last_name, -- Last name of the customer.
email AS email, -- Email address of the customer.
phone AS phone, -- Phone number of the customer.
gender AS gender, -- Gender of the customer.
ip_address AS ip_address, -- IP address associated with the customer.
ssn AS ssn, -- Social Security Number of the customer.
address AS address, -- Street address of the customer.
city AS city, -- City of the customer's address.
state AS state, -- State of the customer's address.
zip AS zip, -- Zip code of the customer's address.
credit_card_number AS credit_card_number, -- Credit card number of the customer.
predicted_credit_amount AS predicted_credit_amount -- Predicted credit amount for the customer.
FROM
`${project_id}.${bigquery_governed_data_curated_dataset}.customer` ;
ALTER VIEW `${project_id}.${bigquery_analytics_hub_publisher_dataset}.customer_view`
ALTER COLUMN customer_id SET OPTIONS(description="Unique identifier for the customer. Primary Key."),
ALTER COLUMN first_name SET OPTIONS(description="First name of the customer."),
ALTER COLUMN last_name SET OPTIONS(description="Last name of the customer."),
ALTER COLUMN email SET OPTIONS(description="Email address of the customer."),
ALTER COLUMN phone SET OPTIONS(description="Phone number of the customer."),
ALTER COLUMN gender SET OPTIONS(description="Gender of the customer."),
ALTER COLUMN ip_address SET OPTIONS(description="IP address associated with the customer."),
ALTER COLUMN ssn SET OPTIONS(description="Social Security Number of the customer."),
ALTER COLUMN address SET OPTIONS(description="Street address of the customer."),
ALTER COLUMN city SET OPTIONS(description="City of the customer's address."),
ALTER COLUMN state SET OPTIONS(description="State of the customer's address."),
ALTER COLUMN zip SET OPTIONS(description="Zip code of the customer's address."),
ALTER COLUMN credit_card_number SET OPTIONS(description="Credit card number of the customer."),
ALTER COLUMN predicted_credit_amount SET OPTIONS(description="Predicted credit amount for the customer.");
CREATE OR REPLACE VIEW `${project_id}.${bigquery_analytics_hub_publisher_dataset}.order_detail_view`
OPTIONS(
description="Analytics Hub View of the order_detail table"
)
AS
SELECT
order_id AS order_id, -- Identifier for the order. Primary Key and Foreign Key.
product_id AS product_id, -- Identifier for the product. Primary Key and Foreign Key.
quantity AS quantity, -- Quantity of the product in the order.
price AS price -- Price of the product at the time of order.
FROM
`${project_id}.${bigquery_governed_data_curated_dataset}.order_detail` ;
ALTER VIEW `${project_id}.${bigquery_analytics_hub_publisher_dataset}.order_detail_view`
ALTER COLUMN order_id SET OPTIONS(description="Identifier for the order. Primary Key and Foreign Key."),
ALTER COLUMN product_id SET OPTIONS(description="Identifier for the product. Primary Key and Foreign Key."),
ALTER COLUMN quantity SET OPTIONS(description="Quantity of the product in the order."),
ALTER COLUMN price SET OPTIONS(description="Price of the product at the time of order.");
CREATE OR REPLACE VIEW `${project_id}.${bigquery_analytics_hub_publisher_dataset}.order_header_view`
OPTIONS(
description="Analytics Hub View of the order_header table"
)
AS
SELECT
customer_id AS customer_id, -- Identifier for the customer who placed the order. Foreign Key.
order_id AS order_id, -- Unique identifier for the order. Primary Key.
region AS region, -- Region where the order was placed.
order_datetime AS order_datetime -- Date and time when the order was placed.
FROM
`${project_id}.${bigquery_governed_data_curated_dataset}.order_header` ;
ALTER VIEW `${project_id}.${bigquery_analytics_hub_publisher_dataset}.order_header_view`
ALTER COLUMN customer_id SET OPTIONS(description="Identifier for the customer who placed the order. Foreign Key."),
ALTER COLUMN order_id SET OPTIONS(description="Unique identifier for the order. Primary Key."),
ALTER COLUMN region SET OPTIONS(description="Region where the order was placed."),
ALTER COLUMN order_datetime SET OPTIONS(description="Date and time when the order was placed.");
CREATE OR REPLACE VIEW `${project_id}.${bigquery_analytics_hub_publisher_dataset}.product_view`
OPTIONS(
description="Analytics Hub View of the product table"
)
AS
SELECT
product_id AS product_id, -- Unique identifier for the product. Primary Key.
product_name AS product_name, -- Name of the product.
product_description AS product_description, -- Detailed description of the product.
product_category_id AS product_category_id -- Identifier for the category the product belongs to. Foreign Key.
FROM
`${project_id}.${bigquery_governed_data_curated_dataset}.product` ;
ALTER VIEW `${project_id}.${bigquery_analytics_hub_publisher_dataset}.product_view`
ALTER COLUMN product_id SET OPTIONS(description="Unique identifier for the product. Primary Key."),
ALTER COLUMN product_name SET OPTIONS(description="Name of the product."),
ALTER COLUMN product_description SET OPTIONS(description="Detailed description of the product."),
ALTER COLUMN product_category_id SET OPTIONS(description="Identifier for the category the product belongs to. Foreign Key.");
CREATE OR REPLACE VIEW `${project_id}.${bigquery_analytics_hub_publisher_dataset}.product_category_view`
OPTIONS(
description="Analytics Hub View of the product_category table"
)
AS
SELECT
product_category_id AS product_category_id, -- Unique identifier for the product category. Primary Key.
product_category_name AS product_category_name, -- Name of the product category.
product_category_description AS product_category_description -- Description of the product category.
FROM
`${project_id}.${bigquery_governed_data_curated_dataset}.product_category` ;
ALTER VIEW `${project_id}.${bigquery_analytics_hub_publisher_dataset}.product_category_view`
ALTER COLUMN product_category_id SET OPTIONS(description="Unique identifier for the product category. Primary Key."),
ALTER COLUMN product_category_name SET OPTIONS(description="Name of the product category."),
ALTER COLUMN product_category_description SET OPTIONS(description="Description of the product category.");