sql-scripts/rideshare_lakehouse_enriched/sp_create_streaming_view.sql (38 lines of code) (raw):
/*##################################################################################
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###################################################################################*/
/*
Use Cases:
- Creates a View over the streaming data. This will determine the pickup and dropoff location of each ride.
The raw telemetry data provide updates during the ride (latitude/longitude) and this ranks/partitions the
data to skip the intermediate updates.
Description:
-
Show:
-
References:
-
Clean up / Reset script:
DROP VIEW IF EXISTS `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigquery_streaming_rideshare_trips`;
*/
CREATE OR REPLACE VIEW `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigquery_streaming_rideshare_trips` AS
WITH PickupData AS
(
SELECT rideshare_id, latitude, longitude, rideshare_timestamp, total_amount, ride_status,
RANK() OVER (PARTITION BY rideshare_id
ORDER BY rideshare_timestamp) AS Ranking
FROM `${project_id}.${bigquery_rideshare_lakehouse_raw_dataset}.bigquery_streaming_rideshare_trips`
WHERE ride_status = 'enroute'
)
, DropOffData AS
(
SELECT rideshare_id, latitude, longitude, rideshare_timestamp, total_amount, ride_status,
RANK() OVER (PARTITION BY rideshare_id
ORDER BY rideshare_timestamp DESC) AS Ranking
FROM `${project_id}.${bigquery_rideshare_lakehouse_raw_dataset}.bigquery_streaming_rideshare_trips`
WHERE ride_status = 'dropoff'
)
, results AS
(
-- compute miles since ST_DISTANCE is in meters (divide by 1609.34)
SELECT PickupData.rideshare_id,
CAST(ROUND(1 + RAND() * (263 - 1)) AS INT) AS pickup_location_id,
PickupData.rideshare_timestamp AS pickup_datetime,
CAST(ROUND(1 + RAND() * (263 - 1)) AS INT) AS dropoff_location_id ,
DropOffData.rideshare_timestamp AS dropoff_datetime,
ROUND(CAST (ST_DISTANCE(ST_GEOGPOINT(PickupData.longitude, PickupData.latitude),
ST_GEOGPOINT(DropOffData.longitude, DropOffData.latitude)) / 1609.34 AS NUMERIC), 5, "ROUND_HALF_EVEN") AS ride_distance,
TIMESTAMP_DIFF(DropOffData.rideshare_timestamp, PickupData.rideshare_timestamp, MINUTE) AS ride_duration_minutes,
ROUND(CAST (DropOffData.total_amount AS NUMERIC), 2, "ROUND_HALF_EVEN") AS total_amount
FROM PickupData
INNER JOIN DropOffData
ON PickupData.rideshare_id = DropOffData.rideshare_id
AND PickupData.Ranking = 1
AND DropOffData.Ranking = 1
)
SELECT *
FROM results;
-- Show the data
SELECT *
FROM `${project_id}.${bigquery_rideshare_lakehouse_enriched_dataset}.bigquery_streaming_rideshare_trips`;