sql-scripts/rideshare_lakehouse_enriched/sp_process_data.sql (13 lines of code) (raw):

/*################################################################################## # Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ###################################################################################*/ /* Use Cases: - Cleans up the tables and calls the Spark stored procedure Description: - Process all data into the Iceberg format - Generates "fake" credit card number for the system Show: - References: - Clean up / Reset script: DROP EXTERNAL TABLE IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigquery_rideshare_payment_type`; DROP EXTERNAL TABLE IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigquery_rideshare_zone`; DROP EXTERNAL TABLE IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigquery_rideshare_payment_type`; */ -- Remove existing tables (overwrite for demo so we can run over and over again) -- This for when we do not have Iceberg / BigSpark and cannot create an external table. This can be removed in the future. IF EXISTS (SELECT 1 FROM `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}`.INFORMATION_SCHEMA.TABLES WHERE table_name = 'biglake_rideshare_payment_type_iceberg' AND table_type = 'EXTERNAL') THEN DROP EXTERNAL TABLE IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.biglake_rideshare_payment_type_iceberg`; DROP EXTERNAL TABLE IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.biglake_rideshare_trip_iceberg`; DROP EXTERNAL TABLE IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigsearch_bigquery_rideshare_trip`; ELSE DROP TABLE IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.biglake_rideshare_payment_type_iceberg`; DROP TABLE IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.biglake_rideshare_trip_iceberg`; DROP TABLE IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigsearch_bigquery_rideshare_trip`; END IF; /* Manual example w/o BigLake Metastore Service CREATE OR REPLACE EXTERNAL TABLE `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.biglake_rideshare_payment_type_iceberg_1` OPTIONS ( format = 'ICEBERG', uris = ["gs://${gcs_rideshare_lakehouse_enriched_bucket}/iceberg-warehouse/${bigquery_rideshare_lakehouse_enriched_dataset}.db/biglake_rideshare_payment_type_iceberg/metadata/00001-57ce4e42-aef6-4786-8e73-3fa713e12d7c.metadata.json"] ); CREATE OR REPLACE EXTERNAL TABLE `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.biglake_rideshare_zone_iceberg_1` OPTIONS ( format = 'ICEBERG', uris = ["gs://${gcs_rideshare_lakehouse_enriched_bucket}/iceberg-warehouse/${bigquery_rideshare_lakehouse_enriched_dataset}.db/biglake_rideshare_trip_iceberg/metadata/00000-99e91cfc-01d9-4f15-b587-17a7969cc4f3.metadata.json"] ); CREATE OR REPLACE EXTERNAL TABLE `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.biglake_rideshare_trip_iceberg_1` OPTIONS ( format = 'ICEBERG', uris = ["gs://${gcs_rideshare_lakehouse_enriched_bucket}/iceberg-warehouse/${bigquery_rideshare_lakehouse_enriched_dataset}.db/biglake_rideshare_zone_iceberg/metadata/00000-87c9cca0-a5e4-4923-b38e-66ba035b8ee0.metadata.json"] ); */ -- CALL Spark job (if not calling from Airflow DAG you can comment this out) -- CALL `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.sp_iceberg_spark_transformation`();