models/business_models/product/newtab/newtab_visits_v1.sql (330 lines of code) (raw):
{{
config(
materialized='incremental',
partition_by = { 'field': 'submission_date', 'data_type': 'date' },
incremental_strategy = 'insert_overwrite',
require_partition_filter = true,
cluster_by = ["channel", "country_code"]
)
}}
WITH events_unnested AS (
SELECT
DATE(submission_timestamp) AS submission_date,
category AS event_category,
name AS event_name,
timestamp AS event_timestamp,
client_info,
metadata,
normalized_os,
normalized_os_version,
normalized_country_code,
normalized_channel,
ping_info,
extra AS event_details,
metrics
FROM
-- https://dictionary.telemetry.mozilla.org/apps/firefox_desktop/pings/newtab
`moz-fx-data-shared-prod.firefox_desktop_stable.newtab_v1`,
UNNEST(events)
WHERE
normalized_channel = 'nightly'
AND category IN ('newtab', 'topsites', 'newtab.search', 'newtab.search.ad', 'pocket')
AND name IN (
'closed',
'opened',
'impression',
'issued',
'click',
'save',
'topic_click',
'dismiss'
)
AND DATE(submission_timestamp) > DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY) -- limit backfill
{% if is_incremental() %}
AND DATE(submission_timestamp) > (SELECT max(submission_date) FROM {{ this }} WHERE submission_date > '2020-01-01')
{% endif %}
),
visit_metadata AS (
SELECT
mozfun.map.get_key(event_details, "newtab_visit_id") AS newtab_visit_id,
submission_date,
ANY_VALUE(client_info.client_id) AS client_id,
ANY_VALUE(metrics.uuid.legacy_telemetry_client_id) AS legacy_telemetry_client_id,
ANY_VALUE(normalized_os) AS normalized_os,
ANY_VALUE(normalized_os_version) AS normalized_os_version,
ANY_VALUE(normalized_country_code) AS country_code,
ANY_VALUE(normalized_channel) AS channel,
ANY_VALUE(client_info.locale) AS locale,
ANY_VALUE(client_info.app_display_version) AS browser_version,
"Firefox Desktop" AS browser_name,
ANY_VALUE(metrics.string.search_engine_default_engine_id) AS default_search_engine,
ANY_VALUE(metrics.string.search_engine_private_engine_id) AS default_private_search_engine,
ANY_VALUE(metrics.boolean.pocket_is_signed_in) AS pocket_is_signed_in,
ANY_VALUE(metrics.boolean.pocket_enabled) AS pocket_enabled,
ANY_VALUE(metrics.boolean.pocket_sponsored_stories_enabled) AS pocket_sponsored_stories_enabled,
ANY_VALUE(metrics.boolean.topsites_enabled) AS topsites_enabled,
ANY_VALUE(metrics.string.newtab_homepage_category) AS newtab_homepage_category,
ANY_VALUE(metrics.string.newtab_newtab_category) AS newtab_newtab_category,
ANY_VALUE(metrics.boolean.newtab_search_enabled) AS newtab_search_enabled,
ANY_VALUE(metrics.quantity.topsites_rows) AS topsites_rows,
ANY_VALUE(metrics.string_list.newtab_blocked_sponsors) AS newtab_blocked_sponsors,
ANY_VALUE(ping_info.experiments) AS experiments,
MIN(IF(event_name = "opened", event_timestamp, NULL)) AS newtab_visit_started_at,
MIN(IF(event_name = "closed", event_timestamp, NULL)) AS newtab_visit_ended_at,
ANY_VALUE(
IF(event_name = "opened", mozfun.map.get_key(event_details, "source"), NULL)
) AS newtab_open_source,
LOGICAL_OR(event_name IN ("click", "issued", "save")) AS had_non_impression_engagement,
LOGICAL_OR(event_name IN ("click", "save")) AS had_non_search_engagement
FROM
events_unnested
GROUP BY
newtab_visit_id,
submission_date
HAVING
newtab_visit_id IS NOT NULL
),
search_events AS (
SELECT
mozfun.map.get_key(event_details, "newtab_visit_id") AS newtab_visit_id,
`moz-fx-data-shared-prod`.udf.normalize_search_engine(
mozfun.map.get_key(event_details, "telemetry_id")
) AS search_engine,
mozfun.map.get_key(event_details, "search_access_point") AS search_access_point,
COUNTIF(event_name = "issued") AS searches,
COUNTIF(
event_name = "impression"
AND mozfun.map.get_key(event_details, "is_tagged") = "true"
) AS tagged_search_ad_impressions,
COUNTIF(
event_name = "impression"
AND mozfun.map.get_key(event_details, "is_follow_on") = "true"
) AS follow_on_search_ad_impressions,
COUNTIF(
event_name = "impression"
AND mozfun.map.get_key(event_details, "is_follow_on") = "true"
AND mozfun.map.get_key(event_details, "is_tagged") = "true"
) AS tagged_follow_on_search_ad_impressions,
COUNTIF(
event_name = "click"
AND mozfun.map.get_key(event_details, "is_tagged") = "true"
) AS tagged_search_ad_clicks,
COUNTIF(
event_name = "click"
AND mozfun.map.get_key(event_details, "is_follow_on") = "true"
) AS follow_on_search_ad_clicks,
COUNTIF(
event_name = "click"
AND mozfun.map.get_key(event_details, "is_follow_on") = "true"
AND mozfun.map.get_key(event_details, "is_tagged") = "true"
) AS tagged_follow_on_search_ad_clicks
FROM
events_unnested
WHERE
event_category IN ('newtab.search', 'newtab.search.ad')
GROUP BY
newtab_visit_id,
search_engine,
search_access_point
),
search_summary AS (
SELECT
newtab_visit_id,
ARRAY_AGG(
STRUCT(
search_engine,
search_access_point,
searches,
tagged_search_ad_clicks,
tagged_search_ad_impressions,
follow_on_search_ad_clicks,
follow_on_search_ad_impressions,
tagged_follow_on_search_ad_clicks,
tagged_follow_on_search_ad_impressions
)
) AS search_interactions
FROM
search_events
GROUP BY
newtab_visit_id
),
topsites_events AS (
SELECT
mozfun.map.get_key(event_details, "newtab_visit_id") AS newtab_visit_id,
SAFE_CAST(mozfun.map.get_key(event_details, "position") AS INT64) AS topsite_tile_position,
mozfun.map.get_key(event_details, "advertiser_name") AS topsite_tile_advertiser_name,
mozfun.map.get_key(event_details, "tile_id") AS topsite_tile_id,
JSON_EXTRACT(sov, "$.assigned") AS topsite_tile_assigned_sov_branch,
JSON_EXTRACT(sov, "$.chosen") AS topsite_tile_displayed_sov_branch,
COUNTIF(event_name = 'impression') AS topsite_tile_impressions,
COUNTIF(event_name = 'click') AS topsite_tile_clicks,
COUNTIF(
event_name = 'impression'
AND mozfun.map.get_key(event_details, "is_sponsored") = "true"
) AS sponsored_topsite_tile_impressions,
COUNTIF(
event_name = 'impression'
AND mozfun.map.get_key(event_details, "is_sponsored") = "false"
) AS organic_topsite_tile_impressions,
COUNTIF(
event_name = 'click'
AND mozfun.map.get_key(event_details, "is_sponsored") = "true"
) AS sponsored_topsite_tile_clicks,
COUNTIF(
event_name = 'click'
AND mozfun.map.get_key(event_details, "is_sponsored") = "false"
) AS organic_topsite_tile_clicks,
COUNTIF(event_name = 'dismiss') AS topsite_tile_dismissals,
COUNTIF(
event_name = 'dismiss'
AND mozfun.map.get_key(event_details, "is_sponsored") = "true"
) AS sponsored_topsite_tile_dismissals,
COUNTIF(
event_name = 'dismiss'
AND mozfun.map.get_key(event_details, "is_sponsored") = "false"
) AS organic_topsite_tile_dismissals,
FROM
events_unnested
LEFT JOIN
UNNEST(metrics.string_list.newtab_sov_allocation) sov
ON SAFE_CAST(mozfun.map.get_key(event_details, "position") AS INT64) = SAFE_CAST(
JSON_EXTRACT(sov, "$.pos") AS INT64
)
WHERE
event_category = 'topsites'
GROUP BY
newtab_visit_id,
topsite_tile_position,
topsite_tile_advertiser_name,
topsite_tile_id,
topsite_tile_assigned_sov_branch,
topsite_tile_displayed_sov_branch
),
topsites_summary AS (
SELECT
newtab_visit_id,
ARRAY_AGG(
STRUCT(
topsite_tile_advertiser_name,
topsite_tile_position,
topsite_tile_id,
topsite_tile_assigned_sov_branch,
topsite_tile_displayed_sov_branch,
topsite_tile_clicks,
sponsored_topsite_tile_clicks,
organic_topsite_tile_clicks,
topsite_tile_impressions,
sponsored_topsite_tile_impressions,
organic_topsite_tile_impressions,
topsite_tile_dismissals,
sponsored_topsite_tile_dismissals,
organic_topsite_tile_dismissals
)
) AS topsite_tile_interactions
FROM
topsites_events
GROUP BY
newtab_visit_id
),
pocket_events AS (
SELECT
mozfun.map.get_key(event_details, "newtab_visit_id") AS newtab_visit_id,
SAFE_CAST(mozfun.map.get_key(event_details, "position") AS INT64) AS pocket_story_position,
COUNTIF(event_name = 'save') AS pocket_saves,
COUNTIF(event_name = 'click') AS pocket_clicks,
COUNTIF(event_name = 'impression') AS pocket_impressions,
COUNTIF(
event_name = 'click'
AND mozfun.map.get_key(event_details, "is_sponsored") = "true"
) AS sponsored_pocket_clicks,
COUNTIF(
event_name = 'click'
AND mozfun.map.get_key(event_details, "is_sponsored") != "true"
) AS organic_pocket_clicks,
COUNTIF(
event_name = 'impression'
AND mozfun.map.get_key(event_details, "is_sponsored") = "true"
) AS sponsored_pocket_impressions,
COUNTIF(
event_name = 'impression'
AND mozfun.map.get_key(event_details, "is_sponsored") != "true"
) AS organic_pocket_impressions,
COUNTIF(
event_name = 'save'
AND mozfun.map.get_key(event_details, "is_sponsored") = "true"
) AS sponsored_pocket_saves,
COUNTIF(
event_name = 'save'
AND mozfun.map.get_key(event_details, "is_sponsored") != "true"
) AS organic_pocket_saves,
FROM
events_unnested
WHERE
event_category = 'pocket'
GROUP BY
newtab_visit_id,
pocket_story_position
),
pocket_summary AS (
SELECT
newtab_visit_id,
ARRAY_AGG(
STRUCT(
pocket_story_position,
pocket_impressions,
sponsored_pocket_impressions,
organic_pocket_impressions,
pocket_clicks,
sponsored_pocket_clicks,
organic_pocket_clicks,
pocket_saves,
sponsored_pocket_saves,
organic_pocket_saves
)
) AS pocket_interactions
FROM
pocket_events
GROUP BY
newtab_visit_id
),
combined_newtab_activity AS (
SELECT
*
FROM
visit_metadata
LEFT JOIN
search_summary
USING (newtab_visit_id)
LEFT JOIN
topsites_summary
USING (newtab_visit_id)
LEFT JOIN
pocket_summary
USING (newtab_visit_id)
WHERE
-- Keep only rows with interactions, unless we receive a valid newtab.opened event.
-- This is meant to drop only interactions that only have a newtab.closed event on the same partition
-- (these are suspected to be from pre-loaded tabs)
newtab_open_source IS NOT NULL
OR search_interactions IS NOT NULL
OR topsite_tile_interactions IS NOT NULL
OR pocket_interactions IS NOT NULL
),
client_profile_info AS (
SELECT
client_id AS legacy_telemetry_client_id,
ANY_VALUE(is_new_profile) AS is_new_profile,
ANY_VALUE(activity_segment) AS activity_segment
FROM
`moz-fx-data-shared-prod.telemetry_derived.unified_metrics_v1`
WHERE
submission_date > DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY) -- limit backfill
{% if is_incremental() %}
AND submission_date > (SELECT max(submission_date) FROM {{ this }} WHERE submission_date > '2020-01-01')
{% endif %}
GROUP BY
client_id
)
SELECT
*
FROM
combined_newtab_activity
LEFT JOIN
client_profile_info
USING (legacy_telemetry_client_id)