backfill/2024-04-29-fxa_events/backend/users_services_daily_v2.sql (117 lines of code) (raw):

CREATE TEMP FUNCTION udf_contains_tier1_country(x ANY TYPE) AS ( -- EXISTS( SELECT country FROM UNNEST(x) AS country WHERE country IN ( -- 'United States', 'France', 'Germany', 'United Kingdom', 'Canada' ) ) ); CREATE TEMP FUNCTION count_distinct(arr ANY TYPE) AS ( (SELECT COUNT(DISTINCT x) FROM UNNEST(arr) AS x) ); WITH events_unnested AS ( SELECT e.* EXCEPT (events), event.timestamp AS event_timestamp, CONCAT(event.category, "_", event.name) AS event_name, event.extra AS event_extra FROM `mozdata.analysis.akomar_accounts_backend_events_v1` AS e CROSS JOIN UNNEST(e.events) AS event WITH OFFSET AS event_offset), fxa_events AS ( SELECT submission_timestamp, metrics.string.account_user_id_sha256 AS user_id_sha256, IF( metrics.string.relying_party_oauth_client_id = '', metrics.string.relying_party_service, metrics.string.relying_party_oauth_client_id ) AS service, metrics.string.session_flow_id AS flow_id, metrics.string.session_entrypoint AS entrypoint, event_name AS event_name, -- `access_token_checked` events are triggered on traffic from RP backend services and don't have client's geo data IF(event_name != 'access_token_checked', metadata.geo.country, NULL) AS country, metrics.string.utm_term AS utm_term, metrics.string.utm_medium AS utm_medium, metrics.string.utm_source AS utm_source, metrics.string.utm_campaign AS utm_campaign, metrics.string.utm_content AS utm_content, metadata.user_agent, FROM events_unnested WHERE DATE(submission_timestamp) BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY) AND @submission_date AND event_name IN ( 'access_token_checked', 'access_token_created', -- registration and login events used when deriving the first_seen table 'reg_complete', 'login_complete' ) ), windowed AS ( SELECT submission_timestamp, user_id_sha256, service, udf.mode_last(ARRAY_AGG(country) OVER w1) AS country, udf_contains_tier1_country(ARRAY_AGG(country) OVER w1) AS seen_in_tier1_country, LOGICAL_OR(event_name = 'reg_complete') OVER w1 AS registered, -- we cannot count distinct here because the window is ordered by submission_timestamp ARRAY_AGG( CONCAT( COALESCE(user_agent.browser, ''), '_', COALESCE(user_agent.os, ''), '_', COALESCE(user_agent.version, '') ) ) OVER w1 AS user_agent_devices, FROM fxa_events WHERE DATE(submission_timestamp) = @submission_date QUALIFY ROW_NUMBER() OVER ( PARTITION BY user_id_sha256, service, DATE(submission_timestamp) ORDER BY submission_timestamp ) = 1 WINDOW w1 AS ( PARTITION BY user_id_sha256, service, DATE(submission_timestamp) ORDER BY submission_timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) ) SELECT DATE(@submission_date) AS submission_date, windowed.user_id_sha256, windowed.service, windowed.country, windowed.seen_in_tier1_country, windowed.registered, count_distinct(windowed.user_agent_devices) AS user_agent_device_count, FROM windowed