sql-scripts/taxi_dataset/sp_demo_security.sql (105 lines of code) (raw):
/*##################################################################################
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###################################################################################*/
/*
Use Cases:
- Need to secure your datasets, tables, views, columns and rows, BigQuery allows you to have full security
of you data.
- Row level security allows you to have a multitenant warehouse without creating seperate tables for each tenant.
- Data Masking lets you secure columns with Hash (for joins), Default (set to default values), Null (set to null)
Description:
- Show Dataset, Table and Row Level security
- Show an authorized view that uses a table join for security
- Show row level access that filters a table based upon secuirty with cascading permissions
Reference:
- https://cloud.google.com/bigquery/docs/dataset-access-controls
- https://cloud.google.com/bigquery/docs/authorized-views
- https://cloud.google.com/bigquery/docs/row-level-security-intro
- https://cloud.google.com/bigquery/docs/column-level-security
- https://cloud.google.com/bigquery/docs/column-data-masking-intro
Clean up / Reset script:
DROP VIEW IF EXISTS `${project_id}.${bigquery_taxi_dataset}.v_taxi_trips_passenger_amounts`;
DROP TABLE IF EXISTS `${project_id}.${bigquery_taxi_dataset}.user_vendor_security`;
DROP ALL ROW ACCESS POLICIES ON `${project_id}.${bigquery_taxi_dataset}.taxi_trips`;
TRUNCATE TABLE `${project_id}.${bigquery_taxi_dataset}.taxi_trips_with_col_sec` ;
*/
-- Query 1: BigQuery allows security to be placed at a Dataset level
-- Permissions can be set for users, groups, and service accounts on datasets
GRANT `roles/bigquery.dataViewer`
ON SCHEMA `${project_id}.${bigquery_taxi_dataset}`
TO "user:${gcp_account_name}";
-- To View in the BigQuery UI
-- 1. Open dataset
-- 2. Click on Sharing
-- 3. Select Permissions
-- 4. Unselect "Show inherited permissions"
-- 5. Expand "BigQuery Data Viewer"
-- 6. You will see the user has access
-- Query 2: Remove the previously granted permissions (note since you are an admin it does not do much in the demo)
REVOKE `roles/bigquery.dataViewer`
ON SCHEMA `${project_id}.${bigquery_taxi_dataset}`
FROM "user:${gcp_account_name}";
-- Query 3: Table permissions
-- Tables can have access as well as Views
GRANT `roles/bigquery.dataViewer`
ON TABLE `${project_id}.${bigquery_taxi_dataset}.taxi_trips`
TO "user:${gcp_account_name}";
-- Query 4: Revoke table
REVOKE `roles/bigquery.dataViewer`
ON TABLE `${project_id}.${bigquery_taxi_dataset}.taxi_trips`
FROM "user:${gcp_account_name}";
-- Query 5: Create a table to be used in an authorized view. This table holds for each user their
-- allow access to certain "Vendor Ids"
CREATE OR REPLACE TABLE `${project_id}.${bigquery_taxi_dataset}.user_vendor_security`
(
user_id STRING,
Vendor_Id INTEGER
);
-- Query 6: Insert "you" as a test
INSERT INTO `${project_id}.${bigquery_taxi_dataset}.user_vendor_security` (user_id, Vendor_Id)
VALUES ('${gcp_account_name}', 1);
-- Query 5: Create the view that joins to the new table and filters based upon SESSION_USER()
CREATE OR REPLACE VIEW `${project_id}.${bigquery_taxi_dataset}.v_taxi_trips_passenger_amounts` AS
SELECT vendor.Vendor_Id,
Vendor_Description,
CAST(Pickup_DateTime AS DATE) AS Pickup_Date,
Passenger_Count,
Total_Amount
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips` AS taxi_trips
INNER JOIN `${project_id}.${bigquery_taxi_dataset}.user_vendor_security` AS user_vendor_security
ON user_vendor_security.user_id = SESSION_USER()
AND taxi_trips.Vendor_Id = user_vendor_security.Vendor_Id
INNER JOIN `${project_id}.${bigquery_taxi_dataset}.vendor` AS vendor
ON taxi_trips.Vendor_Id = vendor.Vendor_Id;
-- Query 6: Run a SELECT against the view. You will only see Vendors Id of 1
-- Get the max total per day and show the number of passengers
WITH RankingData AS
(
SELECT Vendor_Id,
Vendor_Description,
Pickup_Date,
Passenger_Count,
Total_Amount,
ROW_NUMBER() OVER (PARTITION BY Vendor_Id, Pickup_Date ORDER BY Total_Amount DESC) AS ranking
FROM `${project_id}.${bigquery_taxi_dataset}.v_taxi_trips_passenger_amounts`
WHERE Pickup_Date BETWEEN '2021-01-01' AND '2021-02-01'
)
SELECT * EXCEPT (ranking) -- hide the ranking column
FROM RankingData
WHERE ranking = 1
ORDER BY Vendor_Description, Pickup_Date;
-- Query 6: Use Row Level Access Policy to do table row filtering
-- These policies can be stacked, are used by materialize views and other security pass through
CREATE OR REPLACE ROW ACCESS POLICY rap_taxi_trips_admin_mta_tax
ON `${project_id}.${bigquery_taxi_dataset}.taxi_trips`
GRANT TO ("user:${gcp_account_name}") -- This also works for groups: "group:tax-collectors@altostrat.com"
FILTER USING (MTA_Tax > 0);
-- Query 6: This will only show MTA_Tax > 0
SELECT *
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips`
LIMIT 50;
-- Query 7: Apply another filter
-- Filter again (filters will cascade, in a non-demo, you would be setting this for different groups of users)
CREATE OR REPLACE ROW ACCESS POLICY rap_taxi_trips_admin_mta_tax_vendor_2
ON `${project_id}.${bigquery_taxi_dataset}.taxi_trips`
GRANT TO ("user:${gcp_account_name}")
FILTER USING (Vendor_Id = 2 OR PULocationID = 182);
-- Query 8: Show the results
SELECT *
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips`
LIMIT 50;
-- Query 9: Remove this so the rest of the demo works
DROP ROW ACCESS POLICY rap_taxi_trips_admin_mta_tax_vendor_2 ON `${project_id}.${bigquery_taxi_dataset}.taxi_trips`;
DROP ALL ROW ACCESS POLICIES ON `${project_id}.${bigquery_taxi_dataset}.taxi_trips`;
-- Column Level Security
-- Populate the taxi Trips w/Column Level security
-- Since some fields are protected just the ones with access are populated
INSERT INTO `${project_id}.${bigquery_taxi_dataset}.taxi_trips_with_col_sec`
(
Vendor_Id,
Pickup_DateTime,
Dropoff_DateTime,
Store_And_Forward,
Rate_Code_Id,
PULocationID,
DOLocationID,
Passenger_Count,
Trip_Distance,
Fare_Amount,
Surcharge,
MTA_Tax,
Tolls_Amount,
Improvement_Surcharge,
Payment_Type_Id,
Congestion_Surcharge
)
SELECT
Vendor_Id,
Pickup_DateTime,
Dropoff_DateTime,
Store_And_Forward,
Rate_Code_Id,
PULocationID,
DOLocationID,
Passenger_Count,
Trip_Distance,
Fare_Amount,
Surcharge,
MTA_Tax,
Tolls_Amount,
Improvement_Surcharge,
Payment_Type_Id,
Congestion_Surcharge
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips`;
-- Error: Selecting columns w/o access
-- Access Denied: BigQuery BigQuery: User does not have permission to access policy tag "Business Critical : High security" on columns ${project_id}.${bigquery_taxi_dataset}.taxi_trips_with_col_sec.Tip_Amount, ${project_id}.${bigquery_taxi_dataset}.taxi_trips_with_col_sec.Total_Amount.
SELECT *
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips_with_col_sec`
LIMIT 1000;
-- Valid (with data masking)
-- Open this in a new tab: https://console.cloud.google.com/dataplex/taxonomies
-- The fields: PULocationID and DOLocationID are data masked (to NULL values)
-- On the policy tag, check off the "Data Masking security" and click "MANAGE DATA POLICIES"
-- You will notice that your user is set to the "Nullify_Rule"
SELECT * EXCEPT(Tip_Amount, Total_Amount)
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips_with_col_sec`
LIMIT 1000;