sql-scripts/taxi_dataset/sp_demo_federated_query.sql (102 lines of code) (raw):

/*################################################################################## # Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ###################################################################################*/ /* Use Cases: - Need to query data in CloudSQL (Postgres, SQL Server, MySQL) or Spanner, you can from BigQuery - Description: - Reference: - Clean up / Reset script: n/a */ -- Query data directly in Spanner -- Station Id USW00094728 = NEW YORK CNTRL PK TWR (SELECT * FROM `bigquery-public-data.ghcn_d.ghcnd_stations` WHERE id = 'USW00094728';) -- NOTE: You need to run the Airflow DAG "sample-bigquery-start-spanner" before running this SQL -- You also need to wait for the Dataflow job "importspannerweatherdata" to complete (or else you will not see any data) -- Spanner will automatically be DELETED after 4 hours. Create a calendar reminder to run this DAG before your demo. EXECUTE IMMEDIATE """ SELECT * FROM EXTERNAL_QUERY( 'projects/${project_id}/locations/${spanner_region}/connections/bq_spanner_connection', "SELECT * FROM weather WHERE station_id='USW00094728'"); """; -- Federated Queries that join data to BigQuery require resources to be in the same region -- Create a dataset and sample data in the same region in which Spanner is deployed CREATE SCHEMA ${bigquery_taxi_dataset}_spanner OPTIONS( location="${spanner_region}" ); -- Seed a BigQuery table with data CREATE OR REPLACE TABLE `${project_id}`.${bigquery_taxi_dataset}_spanner.taxi_averages AS SELECT CAST('2020-01-05' AS DATE) AS PickupDate, 'Sunday' AS WeekdayName, 0 AS WeekdayNumber, 177136 AS NumberOfTrips, 3.345020493 AS AvgDistance, 13.44308379 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-04' AS DATE) AS PickupDate, 'Saturday' AS WeekdayName, 6 AS WeekdayNumber, 197060 AS NumberOfTrips, 3.001948239 AS AvgDistance, 12.57408292 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-06' AS DATE) AS PickupDate, 'Monday' AS WeekdayName, 1 AS WeekdayNumber, 195843 AS NumberOfTrips, 3.150297790 AS AvgDistance, 13.42400520 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-02' AS DATE) AS PickupDate, 'Thursday' AS WeekdayName, 4 AS WeekdayNumber, 177191 AS NumberOfTrips, 3.347735946 AS AvgDistance, 13.85145685 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-01' AS DATE) AS PickupDate, 'Wednesday' AS WeekdayName, 3 AS WeekdayNumber, 180439 AS NumberOfTrips, 3.517630446 AS AvgDistance, 14.04735933 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-03' AS DATE) AS PickupDate, 'Friday' AS WeekdayName, 5 AS WeekdayNumber, 199545 AS NumberOfTrips, 3.042911925 AS AvgDistance, 12.95975163 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-07' AS DATE) AS PickupDate, 'Tuesday' AS WeekdayName, 2 AS WeekdayNumber, 218548 AS NumberOfTrips, 2.955926799 AS AvgDistance, 12.88856677 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-11' AS DATE) AS PickupDate, 'Saturday' AS WeekdayName, 6 AS WeekdayNumber, 232678 AS NumberOfTrips, 2.871209483 AS AvgDistance, 12.57551036 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-23' AS DATE) AS PickupDate, 'Thursday' AS WeekdayName, 4 AS WeekdayNumber, 246865 AS NumberOfTrips, 2.863291394 AS AvgDistance, 12.90945193 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-22' AS DATE) AS PickupDate, 'Wednesday' AS WeekdayName, 3 AS WeekdayNumber, 236450 AS NumberOfTrips, 2.784504208 AS AvgDistance, 12.68837720 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-09' AS DATE) AS PickupDate, 'Thursday' AS WeekdayName, 4 AS WeekdayNumber, 250181 AS NumberOfTrips, 2.888248308 AS AvgDistance, 13.01847938 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-17' AS DATE) AS PickupDate, 'Friday' AS WeekdayName, 5 AS WeekdayNumber, 259705 AS NumberOfTrips, 2.832833908 AS AvgDistance, 12.76231355 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-14' AS DATE) AS PickupDate, 'Tuesday' AS WeekdayName, 2 AS WeekdayNumber, 317130 AS NumberOfTrips, 2.830828439 AS AvgDistance, 12.81617005 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-16' AS DATE) AS PickupDate, 'Thursday' AS WeekdayName, 4 AS WeekdayNumber, 254881 AS NumberOfTrips, 2.824201490 AS AvgDistance, 12.82655835 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-29' AS DATE) AS PickupDate, 'Wednesday' AS WeekdayName, 3 AS WeekdayNumber, 241997 AS NumberOfTrips, 2.772662884 AS AvgDistance, 12.71310925 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-24' AS DATE) AS PickupDate, 'Friday' AS WeekdayName, 5 AS WeekdayNumber, 248452 AS NumberOfTrips, 2.837416845 AS AvgDistance, 12.82717004 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-19' AS DATE) AS PickupDate, 'Sunday' AS WeekdayName, 0 AS WeekdayNumber, 187029 AS NumberOfTrips, 3.027041635 AS AvgDistance, 12.55000011 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-27' AS DATE) AS PickupDate, 'Monday' AS WeekdayName, 1 AS WeekdayNumber, 203245 AS NumberOfTrips, 3.000888681 AS AvgDistance, 13.00633393 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-12' AS DATE) AS PickupDate, 'Sunday' AS WeekdayName, 0 AS WeekdayNumber, 200190 AS NumberOfTrips, 3.231275089 AS AvgDistance, 13.34709351 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-26' AS DATE) AS PickupDate, 'Sunday' AS WeekdayName, 0 AS WeekdayNumber, 199961 AS NumberOfTrips, 3.121571156 AS AvgDistance, 13.02878826 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-10' AS DATE) AS PickupDate, 'Friday' AS WeekdayName, 5 AS WeekdayNumber, 246516 AS NumberOfTrips, 2.949418821 AS AvgDistance, 13.20513281 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-13' AS DATE) AS PickupDate, 'Monday' AS WeekdayName, 1 AS WeekdayNumber, 223498 AS NumberOfTrips, 2.976766190 AS AvgDistance, 13.20867829 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-30' AS DATE) AS PickupDate, 'Thursday' AS WeekdayName, 4 AS WeekdayNumber, 257929 AS NumberOfTrips, 2.839820920 AS AvgDistance, 12.93011922 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-28' AS DATE) AS PickupDate, 'Tuesday' AS WeekdayName, 2 AS WeekdayNumber, 230215 AS NumberOfTrips, 3.691287970 AS AvgDistance, 12.50978646 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-20' AS DATE) AS PickupDate, 'Monday' AS WeekdayName, 1 AS WeekdayNumber, 171643 AS NumberOfTrips, 3.236503091 AS AvgDistance, 13.16145208 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-15' AS DATE) AS PickupDate, 'Wednesday' AS WeekdayName, 3 AS WeekdayNumber, 231658 AS NumberOfTrips, 2.872086697 AS AvgDistance, 12.92222772 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-18' AS DATE) AS PickupDate, 'Saturday' AS WeekdayName, 6 AS WeekdayNumber, 201245 AS NumberOfTrips, 2.708089940 AS AvgDistance, 11.95403324 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-08' AS DATE) AS PickupDate, 'Wednesday' AS WeekdayName, 3 AS WeekdayNumber, 234692 AS NumberOfTrips, 2.885778126 AS AvgDistance, 12.74203607 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-25' AS DATE) AS PickupDate, 'Saturday' AS WeekdayName, 6 AS WeekdayNumber, 236541 AS NumberOfTrips, 2.605902613 AS AvgDistance, 11.75244702 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-21' AS DATE) AS PickupDate, 'Tuesday' AS WeekdayName, 2 AS WeekdayNumber, 228019 AS NumberOfTrips, 2.859358387 AS AvgDistance, 12.88700257 AS AvgFareAmount UNION ALL SELECT CAST('2020-01-31' AS DATE) AS PickupDate, 'Friday' AS WeekdayName, 5 AS WeekdayNumber, 251978 AS NumberOfTrips, 2.849273786 AS AvgDistance, 13.00445670 AS AvgFareAmount ; -- Run the Federated query between Spanner and BigQuery EXECUTE IMMEDIATE """ WITH WeatherData AS (SELECT station_id, station_date, snow_mm_amt, precipitation_tenth_mm_amt, min_celsius_temp, max_celsius_temp FROM EXTERNAL_QUERY( 'projects/${project_id}/locations/${spanner_region}/connections/bq_spanner_connection', "SELECT * FROM weather WHERE station_id='USW00094728' AND station_date BETWEEN '2020-01-01' AND '2020-01-31'") ) , TaxiData AS ( SELECT PickupDate, WeekdayName, WeekdayNumber, NumberOfTrips, AvgDistance, AvgFareAmount FROM `${project_id}.${bigquery_taxi_dataset}_spanner.taxi_averages` AS taxi_averages ) SELECT TaxiData.*, WeatherData.* FROM TaxiData INNER JOIN WeatherData ON TaxiData.PickupDate = WeatherData.station_date ORDER BY TaxiData.WeekdayNumber; """;