sql-scripts/rideshare_lakehouse_enriched/sp_create_streaming_view.sql (38 lines of code) (raw):

/*################################################################################## # Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ###################################################################################*/ /* Use Cases: - Creates a View over the streaming data. This will determine the pickup and dropoff location of each ride. The raw telemetry data provide updates during the ride (latitude/longitude) and this ranks/partitions the data to skip the intermediate updates. Description: - Show: - References: - Clean up / Reset script: DROP VIEW IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigquery_streaming_rideshare_trips`; */ CREATE OR REPLACE VIEW `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigquery_streaming_rideshare_trips` AS WITH PickupData AS ( SELECT rideshare_id, latitude, longitude, rideshare_timestamp, total_amount, ride_status, RANK() OVER (PARTITION BY rideshare_id ORDER BY rideshare_timestamp) AS Ranking FROM `${project_id}.${bigquery_rideshare_lakehouse_raw_dataset}.bigquery_streaming_rideshare_trips` WHERE ride_status = 'enroute' ) , DropOffData AS ( SELECT rideshare_id, latitude, longitude, rideshare_timestamp, total_amount, ride_status, RANK() OVER (PARTITION BY rideshare_id ORDER BY rideshare_timestamp DESC) AS Ranking FROM `${project_id}.${bigquery_rideshare_lakehouse_raw_dataset}.bigquery_streaming_rideshare_trips` WHERE ride_status = 'dropoff' ) , results AS ( -- compute miles since ST_DISTANCE is in meters (divide by 1609.34) SELECT PickupData.rideshare_id, CAST(ROUND(1 + RAND() * (263 - 1)) AS INT) AS pickup_location_id, PickupData.rideshare_timestamp AS pickup_datetime, CAST(ROUND(1 + RAND() * (263 - 1)) AS INT) AS dropoff_location_id , DropOffData.rideshare_timestamp AS dropoff_datetime, ROUND(CAST (ST_DISTANCE(ST_GEOGPOINT(PickupData.longitude, PickupData.latitude), ST_GEOGPOINT(DropOffData.longitude, DropOffData.latitude)) / 1609.34 AS NUMERIC), 5, "ROUND_HALF_EVEN") AS ride_distance, TIMESTAMP_DIFF(DropOffData.rideshare_timestamp, PickupData.rideshare_timestamp, MINUTE) AS ride_duration_minutes, ROUND(CAST (DropOffData.total_amount AS NUMERIC), 2, "ROUND_HALF_EVEN") AS total_amount FROM PickupData INNER JOIN DropOffData ON PickupData.rideshare_id = DropOffData.rideshare_id AND PickupData.Ranking = 1 AND DropOffData.Ranking = 1 ) SELECT * FROM results; -- Show the data SELECT * FROM `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigquery_streaming_rideshare_trips`;