sql-scripts/taxi_dataset/sp_demo_time_travel_snapshots.sql (67 lines of code) (raw):

/*################################################################################## # Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ###################################################################################*/ /* Use Cases: - Time travel and/or Snapshot can be used for reporting. - This can make reports consistent even if data is being loaded or changed. - This also lets you see data at a particular point in time - Time travel is used for restoring data that has been mistakely altered or deleted. Description: - Show time travel. - Data can be accessed for 7 days for any BQ table. - If you need to freeze your data (e.g. weekly) you can create Snapshots to preseve your data for a particular time. - Show snapshots - Show delete data, drop table and then restore a table Reference: - https://cloud.google.com/bigquery/docs/time-travel - https://cloud.google.com/bigquery/docs/table-snapshots-create Clean up / Reset script: DROP SNAPSHOT TABLE IF EXISTS `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel_snapshot` ; */ -- Query: Create a new table for Green Trips and load data for Jan 2021 CREATE OR REPLACE TABLE `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` ( Vendor_Id INTEGER, Pickup_DateTime TIMESTAMP, Dropoff_DateTime TIMESTAMP, PULocationID INTEGER, DOLocationID INTEGER, Trip_Distance FLOAT64, Total_Amount FLOAT64, PartitionDate DATE ) PARTITION BY PartitionDate AS SELECT Vendor_Id, Pickup_DateTime, Dropoff_DateTime, PULocationID, DOLocationID, Trip_Distance, Total_Amount, DATE(year, month, 1) as PartitionDate FROM `${project_id}.${bigquery_taxi_dataset}.ext_green_trips_parquet` WHERE DATE(year, month, 1) = '2021-01-01'; -- Query: 76516 SELECT COUNT(*) AS RecordCount FROM `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` ; -- Query: See the table in the past -- Wait 15 seconds from CREATE TABLE: 76516 (if you get an error "table does not havea a schema, you need to wait 15 seconds") SELECT COUNT(*) AS RecordCount FROM `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 15 SECOND); -- Query: Insert more data INSERT INTO `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` (Vendor_Id, Pickup_DateTime, Dropoff_DateTime, PULocationID, DOLocationID,Trip_Distance,Total_Amount,PartitionDate) SELECT Vendor_Id, Pickup_DateTime, Dropoff_DateTime, PULocationID, DOLocationID, Trip_Distance, Total_Amount, DATE(year, month, 1) as PartitionDate FROM `${project_id}.${bigquery_taxi_dataset}.ext_green_trips_parquet` WHERE DATE(year, month, 1) = '2021-02-01'; -- Query: 141086 SELECT COUNT(*) AS RecordCount FROM `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` ; -- Query: 76516 - See the prior data before the INSERT -- Wait at least 30 seconds. -- NOTE: You might need to change INTERVAL 30 SECOND to more time if you spend time talking SELECT COUNT(*) AS RecordCount FROM `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 SECOND); -- Query: Create a Snapshot and keep for 1 year -- Typically you would snapshot once a week to keep longer than 7 days the "INTERVAL 1 SECOND" is just for demo. -- You can schedule this BigQuery "Scheduled Queries" https://cloud.google.com/bigquery/docs/table-snapshots-scheduled -- Snapshots are only charged for the "delta" pricing of data from the base table -- NOTE: You can create the snapshot in a different project and then grant Viewer access to the snapshot. This allows you -- to snapshot "prod" data into a "dev" project. CREATE SNAPSHOT TABLE `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel_snapshot` CLONE `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 SECOND) OPTIONS(expiration_timestamp = CAST(DATE_ADD(CURRENT_DATE('America/New_York'), INTERVAL 1 YEAR) AS TIMESTAMP)); -- See all the snapshots you have created SELECT * FROM `${project_id}.${bigquery_taxi_dataset}..INFORMATION_SCHEMA.TABLE_SNAPSHOTS`; -- Query: Whoops: Now less mess up the data. Who granted them delete access anyway? DELETE FROM `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` WHERE Pickup_DateTime BETWEEN '2021-01-25'AND '2021-02-10'; -- Query: 105253 SELECT COUNT(*) AS RecordCount FROM `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` ; -- Query: All records still exist: 141086 SELECT COUNT(*) AS RecordCount FROM `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 MINUTE); -- Query: This user really messed up, time to call an admin... DROP TABLE `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel`; -- ERROR: Not found: Table ${project_id}:${bigquery_taxi_dataset}.green_trips_time_travel was not found in location SELECT COUNT(*) AS RecordCount FROM `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` ; -- STILL ERROR: Not found: Table ${project_id}:${bigquery_taxi_dataset}.green_trips_time_travel@1643126823820 was not found in location SELECT COUNT(*) AS RecordCount FROM `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 MINUTE); -- ############################################################## -- You need to run this command line in the Cloud Console Shell -- ############################################################## -- Restore the table from 1 minute ago (after drop) -- 1 minute: 60000 -- 2 minutes: 120000 (you might have to replace 60000 with this if you wait) -- 5 minutes: 300000 (you might have to replace 60000 with this if you wait) -- SHELL COMMAND (Run This!) -- bq cp "${bigquery_taxi_dataset}.green_trips_time_travel@-60000" "${bigquery_taxi_dataset}.green_trips_time_travel" -- Sample Shell Output: -- admin_@cloudshell:~ (${project_id})$ bq cp "${bigquery_taxi_dataset}.green_trips_timetravel@-120000" "${bigquery_taxi_dataset}.green_trips_timetravel" -- Waiting on bqjob_r4b7df7dfd4abd254_0000017e91fefcc6_1 ... (0s) Current status: DONE -- Table '${project_id}:${bigquery_taxi_dataset}.green_trips_timetravel@-120000' successfully copied to '${project_id}:${bigquery_taxi_dataset}.green_trips_timetravel' -- Query: Records are back SELECT COUNT(*) AS RecordCount FROM `${project_id}.${bigquery_taxi_dataset}.green_trips_time_travel` ;