sql-scripts/taxi_dataset/sp_demo_external_function.sql (82 lines of code) (raw):
/*##################################################################################
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###################################################################################*/
/*
Use Cases:
- Call custom code from BigQuery that is not achiveable with BQ JavaScript functions or SQL functions
Description:
- The below creates the external function "link" in BigQuery
- The below calls a single cloud function that has several methods
- The "user_defined_context" passes the mode (which method to call in the function)
- The Vision API is used as an example of processing data via a Cloud Function
- There is a Taxi Pickup/DropOff that shows looking up data (that might now be available in the database)
Show:
- Connection
- Security - the connection has a service principal.
- The service principal has been granted Cloud Function Invoker role
- The Cloud Function has been granted access to call to the processed storaged account (if we have images there)
- The Cloud Funcation requires an authenicated call
Download images:
gcloud storage cp gs://cloud-samples-data/vision/object_localization/duck_and_truck.jpg .
gcloud storage cp gs://cloud-samples-data/vision/label/setagaya.jpeg .
gcloud storage cp gs://cloud-samples-data/vision/landmark/eiffel_tower.jpg .
gcloud storage cp gs://cloud-samples-data/vision/logo/google_logo.jpg .
References:
- https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions
Clean up / Reset script:
DROP FUNCTION IF EXISTS `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_localize_objects`;
DROP FUNCTION IF EXISTS `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_detect_labels`;
DROP FUNCTION IF EXISTS `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_detect_landmarks`;
DROP FUNCTION IF EXISTS `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_detect_logos`;
DROP FUNCTION IF EXISTS `${project_id}.${bigquery_taxi_dataset}.ext_udf_taxi_zone_lookup`;
*/
/*
# Example bq command to create a connection:
bq mk --connection \
--display_name="cloud-function" \
--connection_type=CLOUD_RESOURCE \
--project_id="${project_id}" \
--location=US \
"cloud-function"
# Example Cloud Function deployment - Source code is on GitHub
gcloud functions deploy bigquery_external_function \
--project="${project_id}" \
--region="${cloud_function_region}" \
--runtime="python310" \
--ingress-settings="all" \
--no-allow-unauthenticated \
--trigger-http
*/
-- Create the Function Link between BQ and the Cloud Function
CREATE OR REPLACE FUNCTION `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_localize_objects` (uri STRING) RETURNS JSON
REMOTE WITH CONNECTION `${project_id}.${bigquery_region}.cloud-function`
OPTIONS
(endpoint = 'https://${cloud_function_region}-${project_id}.cloudfunctions.net/bigquery_external_function',
user_defined_context = [("mode","localize_objects_uri")]
);
CREATE OR REPLACE FUNCTION `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_detect_labels` (uri STRING) RETURNS JSON
REMOTE WITH CONNECTION `${project_id}.${bigquery_region}.cloud-function`
OPTIONS
(endpoint = 'https://${cloud_function_region}-${project_id}.cloudfunctions.net/bigquery_external_function',
user_defined_context = [("mode","detect_labels_uri")]
);
CREATE OR REPLACE FUNCTION `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_detect_landmarks` (uri STRING) RETURNS JSON
REMOTE WITH CONNECTION `${project_id}.${bigquery_region}.cloud-function`
OPTIONS
(endpoint = 'https://${cloud_function_region}-${project_id}.cloudfunctions.net/bigquery_external_function',
user_defined_context = [("mode","detect_landmarks_uri")]
);
CREATE OR REPLACE FUNCTION `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_detect_logos` (uri STRING) RETURNS JSON
REMOTE WITH CONNECTION `${project_id}.${bigquery_region}.cloud-function`
OPTIONS
(endpoint = 'https://${cloud_function_region}-${project_id}.cloudfunctions.net/bigquery_external_function',
user_defined_context = [("mode","detect_logos_uri")]
);
CREATE OR REPLACE FUNCTION `${project_id}.${bigquery_taxi_dataset}.ext_udf_taxi_zone_lookup` (LocationID INT64) RETURNS JSON
REMOTE WITH CONNECTION `${project_id}.${bigquery_region}.cloud-function`
OPTIONS
(endpoint = 'https://${cloud_function_region}-${project_id}.cloudfunctions.net/bigquery_external_function',
user_defined_context = [("mode","taxi_zone_lookup")]
);
-- Images from: "gcloud storage ls gs://cloud-samples-data/vision"
-- Call the object_localization method of Vision API
-- The Vision API can detect and extract multiple objects in an image with Object Localization.
-- Object localization identifies multiple objects in an image and provides a LocalizedObjectAnnotation for each object in the image.
-- https://cloud.google.com/vision/docs/object-localizer
-- For more images: gcloud storage ls gs://cloud-samples-data/vision/object_localization/
WITH Data AS
(
SELECT `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_localize_objects`('gs://cloud-samples-data/vision/object_localization/duck_and_truck.jpg') AS json_result
)
SELECT item.name,
item.score,
json_result
FROM Data, UNNEST(JSON_QUERY_ARRAY(Data.json_result.localized_object_annotations)) AS item;
-- Call the label_detection method of Vision API
-- The Vision API can detect and extract information about entities in an image, across a broad group of categories.
-- Labels can identify general objects, locations, activities, animal species, products, and more.
-- https://cloud.google.com/vision/docs/labels
-- For more images: gcloud storage ls gs://cloud-samples-data/vision/label
WITH Data AS
(
SELECT `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_detect_labels`('gs://cloud-samples-data/vision/label/setagaya.jpeg') AS json_result
)
SELECT item.description,
item.score,
json_result
FROM Data, UNNEST(JSON_QUERY_ARRAY(Data.json_result.label_annotations)) AS item;
-- Call the landmark_detection method of Vision API
-- For more images: gcloud storage ls gs://cloud-samples-data/vision/landmark
-- Landmark Detection detects popular natural and human-made structures within an image
-- https://cloud.google.com/vision/docs/detecting-landmarks
-- For more images: gcloud storage ls gs://cloud-samples-data/vision/landmark
WITH Data AS
(
SELECT `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_detect_landmarks`('gs://cloud-samples-data/vision/landmark/eiffel_tower.jpg') AS json_result
)
SELECT item.description,
item.score,
json_result
FROM Data, UNNEST(JSON_QUERY_ARRAY(Data.json_result.landmark_annotations)) AS item;
-- Call the logo_detection method of Vision API
-- Logo Detection detects popular product logos within an image.
-- https://cloud.google.com/vision/docs/detecting-logos
-- For more images: gcloud storage ls gs://cloud-samples-data/vision/logo
WITH Data AS
(
SELECT `${project_id}.${bigquery_taxi_dataset}.ext_udf_ai_detect_logos`('gs://cloud-samples-data/vision/logo/google_logo.jpg') AS json_result
)
SELECT item.description,
item.score,
json_result
FROM Data, UNNEST(JSON_QUERY_ARRAY(Data.json_result.logo_annotations)) AS item;
-- Function that looksup the pickup/dropoff location codes
-- Filter the data first (do not call the function and then filter, that would be bad)
WITH Data AS
(
SELECT Pickup_DateTime,
Dropoff_DateTime,
PULocationID,
DOLocationID,
Total_Amount
FROM `${project_id}.${bigquery_taxi_dataset}.taxi_trips`
WHERE PULocationID IS NOT NULL
AND DOLocationID IS NOT NULL
AND Total_Amount IS NOT NULL
AND PartitionDate = '2022-01-01'
LIMIT 10
)
SELECT Pickup_DateTime,
Dropoff_DateTime,
`${project_id}.${bigquery_taxi_dataset}.ext_udf_taxi_zone_lookup`(PULocationID) As PickupDetails,
`${project_id}.${bigquery_taxi_dataset}.ext_udf_taxi_zone_lookup`(DOLocationID) AS DropOffDetails,
Total_Amount
FROM Data;