retail/ltv/bqml/scripts/40_procedure_predict.sql (65 lines of code) (raw):

-- Copyright 2020 Google LLC -- -- Licensed under the Apache License, Version 2.0 (the "License"); -- you may not use this file except in compliance with the License. -- You may obtain a copy of the License at -- -- https://www.apache.org/licenses/LICENSE-2.0 -- -- Unless required by applicable law or agreed to in writing, software -- distributed under the License is distributed on an "AS IS" BASIS, -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- See the License for the specific language governing permissions and -- limitations under the License. CREATE OR REPLACE PROCEDURE PredictLTV( MODEL_NAME STRING, TABLE_DATA STRING, PREDICT_FROM_DATE STRING, WINDOW_LENGTH INT64, TABLE_PREDICTION STRING) BEGIN -- Date at which an input transactions window starts. DECLARE WINDOW_START STRING; IF WINDOW_LENGTH != 0 THEN SET WINDOW_START = (SELECT CAST(DATE_SUB(PREDICT_FROM_DATE, INTERVAL WINDOW_LENGTH DAY) AS STRING)); ELSE SET WINDOW_START = "1900-01-01"; END IF; IF PREDICT_FROM_DATE = 'NULL' THEN SET PREDICT_FROM_DATE = (SELECT CAST(CURRENT_DATE() AS STRING)); END IF; EXECUTE IMMEDIATE """ CREATE OR REPLACE TABLE """ || TABLE_PREDICTION || """ AS SELECT customer_id, monetary AS monetary_so_far, ROUND(predicted_target_monetary, 2) AS monetary_predicted, ROUND(predicted_target_monetary - monetary, 2) AS monetary_future FROM ML.PREDICT( MODEL """ || MODEL_NAME || """, ( SELECT customer_id, ROUND(monetary_orders, 2) AS monetary, cnt_orders AS frequency, recency, T, ROUND(recency/cnt_orders, 2) AS time_between, ROUND(avg_basket_value, 2) AS avg_basket_value, ROUND(avg_basket_size, 2) AS avg_basket_size, has_returns, CEIL(avg_time_to_return) AS avg_time_to_return, num_returns FROM ( SELECT customer_id, SUM(value) AS monetary_orders, DATE_DIFF(MAX(order_day), MIN(order_day), DAY) AS recency, DATE_DIFF(PARSE_DATE('%Y-%m-%d', '""" || PREDICT_FROM_DATE || """'), MIN(order_day), DAY) AS T, COUNT(DISTINCT order_day) AS cnt_orders, AVG(qty_articles) avg_basket_size, AVG(value) avg_basket_value, CASE WHEN SUM(num_returns) > 0 THEN 'y' ELSE 'n' END AS has_returns, AVG(time_to_return) avg_time_to_return, SUM(num_returns) num_returns, FROM """ || TABLE_DATA || """ WHERE order_day <= PARSE_DATE('%Y-%m-%d', '""" || PREDICT_FROM_DATE || """') AND order_day >= PARSE_DATE('%Y-%m-%d', '""" || WINDOW_START || """') GROUP BY customer_id ) ) )"""; END