snowflake-bigquery-iceberg.sql (41 lines of code) (raw):
-- Step 1:
-- Login to Snowflake
-- Click the Create button on the top left
-- Select SQL Worksheet
-- Run this command to switch to an account admin since we have to run some commands that requires this role
USE ROLE accountadmin;
-- Step 2:
-- Create a warehouse to hold the data
-- https://docs.snowflake.com/en/sql-reference/sql/create-warehouse
CREATE OR REPLACE WAREHOUSE ICEBERG_WAREHOUSE WITH WAREHOUSE_SIZE='XSMALL';
-- Step 3:
-- Create a database (the database)
CREATE OR REPLACE DATABASE ICEBERG_DATABASE;
-- Step 4:
-- Create a bucket to hold your BigLake Managed Table
-- Open: https://console.cloud.google.com/storage/browser
-- Click the Create Bucket button
-- Enter your bucket name: iceberg-sharing-snowflake (you can choose a different name)
-- Click Next: Use Region: us-central1 <- must match your snowflake region
-- Click Create at the bottom
-- Step 5:
-- In Snowflake
-- Create our GCS volumne integration. This will create a link between Snowflake and GCS.
-- A service principal will be created and we will grant access to our GCS bucket.
-- https://docs.snowflake.com/en/sql-reference/sql/create-storage-integration
CREATE STORAGE INTEGRATION bigquery_integration
TYPE = EXTERNAL_STAGE
STORAGE_PROVIDER = 'GCS'
ENABLED = TRUE
STORAGE_ALLOWED_LOCATIONS = ('gcs://iceberg-sharing-snowflake');
-- Step 6:
-- Get the service principal that we will grant Storage Object Admin in our GCS bucket
DESC STORAGE INTEGRATION bigquery_integration;
-- Copy the STORAGE_GCP_SERVICE_ACCOUNT
-- e.g. xxxxxxxxx@gcpuscentral1-1dfa.iam.gserviceaccount.com
-- Step 7:
-- https://docs.snowflake.com/en/user-guide/tables-iceberg-configure-external-volume-gcs?_fsi=YFzl41ld&_fsi=YFzl41ld&_fsi=YFzl41ld
-- Create a custom IAM role
-- Create a custom role that has the permissions required to access the bucket and get objects.
-- Open: https://console.cloud.google.com/iam-admin/roles
-- Select Create Role.
-- Enter a Title and optional Description for the custom role. [e.g. Snowflake Storage Admin]
-- Select Add Permissions.
-- In Filter, select Service and then select storage.
-- Filter the list of permissions, and add the following from the list:
-- storage.buckets.get
-- storage.objects.get
-- storage.objects.create
-- storage.objects.delete
-- storage.objects.list
-- Select Add.
-- Select Create.
-- Step 8:
-- Open your storage account you created
-- Open: https://console.cloud.google.com/storage/browser
-- Click on: iceberg-sharing-snowflake (or whatever you named it)
-- Click on Permissions
-- Click Grant Access
-- Paste in the service account name (from Snowflake)
-- For the role select Custom | Snowflake Storage Admin
-- Click Save
-- Step 9:
-- In Snowflake
-- Create an external volume on GCS
-- https://docs.snowflake.com/en/sql-reference/sql/create-external-volume
CREATE EXTERNAL VOLUME snowflake_ext_volume
STORAGE_LOCATIONS =
(
(
NAME = 'us-central1'
STORAGE_PROVIDER = 'GCS'
STORAGE_BASE_URL = 'gcs://iceberg-sharing-snowflake/snowflake-volume/'
)
),
ALLOW_WRITES = TRUE;
-- Step 10:
-- Describe the volume
DESCRIBE EXTERNAL VOLUME snowflake_ext_volume
-- Step 11:
-- Set the current database
USE ICEBERG_DATABASE;
-- Step 12:
-- Create a schema in Snowflake
CREATE SCHEMA iceberg_schema;
-- Step 13:
-- Make the schema active
USE SCHEMA iceberg_schema;
-- Step 14:
-- Create Iceberg table using Snowflake Catalog
-- https://docs.snowflake.com/en/sql-reference/sql/create-iceberg-table-snowflake
CREATE ICEBERG TABLE driver (driver_id int, driver_name string)
CATALOG = 'SNOWFLAKE'
EXTERNAL_VOLUME = 'snowflake_ext_volume'
BASE_LOCATION = 'driver';
-- Step 15:
-- This will show the table just created
SHOW TABLES
-- Step 16:
-- This will insert new data
INSERT INTO driver (driver_id, driver_name) VALUES (1, 'Driver 001');
SELECT * FROM driver;
-- Step 17:
-- This will tell us the latest metadata json file that Snowflake is pointing to
-- We need to point BigQuery to the same place
SELECT REPLACE(JSON_EXTRACT_PATH_TEXT(
SYSTEM$GET_ICEBERG_TABLE_INFORMATION('ICEBERG_DATABASE.iceberg_schema.driver'),
'metadataLocation'), 'gcs://', 'gs://');
-- Step 18:
-- Open your storage account you created
-- Open: https://console.cloud.google.com/storage/browser
-- Click on: iceberg-sharing-snowflake (or whatever you named it)
-- You can now browser the iceberg files
-- Step 19:
-- Create a BigQuery Dataset
CREATE SCHEMA IF NOT EXISTS snowflake_dataset OPTIONS(location = 'us-central1');
-- Step 20:
-- Navigate to BigQuery
-- Open: https://console.cloud.google.com/bigquery
-- Click the Add button
-- Select "Connections to external data sources"
-- Select "Vertex AI remote models, remote functions and BigLake (Cloud Resource)"
-- Select region: us-central1
-- Enter a name: iceberg-connection-snowflake (use the for friendly name and description)
-- Step 21:
-- Expand your project in the left hand panel
-- Expand external connections
-- Double click on us-central1.iceberg-connection-snowflake
-- Copy the service account id: e.g. bqcx-xxxxxxxxxxxx-s3rf@gcp-sa-bigquery-condel.iam.gserviceaccount.com
-- Step 23:
-- Open your storage account you created
-- Open: https://console.cloud.google.com/storage/browser
-- Click on: blmt-snowflake-sharing (or whatever you named it)
-- Click on Permissions
-- Click Grant Access
-- Paste in the service account name
-- For the role select Cloud Storage | Storage Object Viewer [Since Snowflake is the write BigQuery just reads]
-- Click Save
-- Step 24:
-- The uris needs to be from the above Snowflake command
CREATE OR REPLACE EXTERNAL TABLE `snowflake_dataset.driver`
WITH CONNECTION `us-central1.iceberg-connection-snowflake`
OPTIONS (
format = "ICEBERG",
uris = ["gs://iceberg-sharing-snowflake/snowflake-volume/driver/metadata/00001-25a4ee54-8ebc-4551-b013-c3195b01d227.metadata.json"]
);
-- Step 25:
-- View the data in BQ
SELECT * FROM `snowflake_dataset.driver`;
-- Step 26:
-- Now if you add or update data in Snowflake, BigQuery will not see it since we are pointing to a specific snapshot or metadata json
-- You will need to run the "SYSTEM$GET_ICEBERG_TABLE_INFORMATION" command in Snowflake
-- You will then need to update BigQuery
-- https://cloud.google.com/bigquery/docs/iceberg-tables#update-table-metadata