sql/moz-fx-data-shared-prod/contextual_services_derived/event_aggregates_v1/query.sql (390 lines of code) (raw):
WITH blocks AS (
SELECT
b.id,
b.queryType AS query_type,
FROM
`moz-fx-ads-prod.adm.blocks` b
WHERE
b.date <= @submission_date
QUALIFY
1 = ROW_NUMBER() OVER (PARTITION BY b.id ORDER BY b.date DESC)
),
combined AS (
SELECT
metrics.uuid.quick_suggest_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'suggest' AS source,
IF(
metrics.string.quick_suggest_ping_type = "quicksuggest-click",
"click",
"impression"
) AS event_type,
'desktop' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
metrics.string.quick_suggest_advertiser AS advertiser,
client_info.app_channel AS release_channel,
metrics.quantity.quick_suggest_position AS position,
CASE
WHEN NULLIF(metrics.string.quick_suggest_request_id, "") IS NULL
THEN 'remote settings'
ELSE 'merino'
END AS provider,
metrics.string.quick_suggest_match_type AS match_type,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
(metrics.boolean.quick_suggest_improve_suggest_experience) AS suggest_data_sharing_enabled,
blocks.query_type,
FROM
`moz-fx-data-shared-prod.firefox_desktop.quick_suggest` qs
LEFT JOIN
blocks
ON SAFE_CAST(qs.metrics.string.quick_suggest_block_id AS INT) = blocks.id
WHERE
metrics.string.quick_suggest_ping_type IN ("quicksuggest-click", "quicksuggest-impression")
UNION ALL
SELECT
context_id,
DATE(submission_timestamp) AS submission_date,
'suggest' AS source,
'impression' AS event_type,
'desktop' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
advertiser,
release_channel,
position,
CASE
WHEN request_id IS NULL
THEN 'remote settings'
ELSE 'merino'
END AS provider,
match_type,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
(
-- The first check is for Fx 103+, the last two checks are for Fx 102 and prior.
improve_suggest_experience_checked
OR request_id IS NOT NULL
OR scenario = 'online'
) AS suggest_data_sharing_enabled,
CAST(NULL AS STRING) AS query_type,
FROM
`moz-fx-data-shared-prod.contextual_services.quicksuggest_impression`
WHERE
-- For firefox 116+ use `moz-fx-data-shared-prod.firefox_desktop.quick_suggest` instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT
context_id,
DATE(submission_timestamp) AS submission_date,
'suggest' AS source,
'click' AS event_type,
'desktop' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
advertiser,
release_channel,
position,
CASE
WHEN request_id IS NULL
THEN 'remote settings'
ELSE 'merino'
END AS provider,
match_type,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
(
-- The first check is for Fx 103+, the last two checks are for Fx 102 and prior.
improve_suggest_experience_checked
OR request_id IS NOT NULL
OR scenario = 'online'
) AS suggest_data_sharing_enabled,
CAST(NULL AS STRING) AS query_type,
FROM
`moz-fx-data-shared-prod.contextual_services.quicksuggest_click`
WHERE
-- For firefox 116+ use `moz-fx-data-shared-prod.firefox_desktop.quick_suggest` instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
-- Suggest Android
SELECT
metrics.uuid.fx_suggest_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'suggest' AS source,
IF(
metrics.string.fx_suggest_ping_type = "fxsuggest-click",
"click",
"impression"
) AS event_type,
'phone' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
metrics.string.fx_suggest_advertiser AS advertiser,
client_info.app_channel AS release_channel,
metrics.quantity.fx_suggest_position AS position,
-- Only remote settings is in use on mobile
'remote settings' AS provider,
-- Only standard suggestions are in use on mobile
'firefox-suggest' AS match_type,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
-- This is the opt-in for Merino, not in use on mobile
CAST(NULL AS BOOLEAN) AS suggest_data_sharing_enabled,
blocks.query_type,
FROM
`moz-fx-data-shared-prod.fenix.fx_suggest` fs
LEFT JOIN
blocks
ON fs.metrics.quantity.fx_suggest_block_id = blocks.id
WHERE
metrics.string.fx_suggest_ping_type IN ("fxsuggest-click", "fxsuggest-impression")
UNION ALL
-- Suggest iOS
SELECT
metrics.uuid.fx_suggest_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'suggest' AS source,
IF(
metrics.string.fx_suggest_ping_type = "fxsuggest-click",
"click",
"impression"
) AS event_type,
'phone' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
metrics.string.fx_suggest_advertiser AS advertiser,
client_info.app_channel AS release_channel,
metrics.quantity.fx_suggest_position AS position,
-- Only remote settings is in use on mobile
'remote settings' AS provider,
-- Only standard suggestions are in use on mobile
'firefox-suggest' AS match_type,
-- This is now hardcoded, we can use the derived `normalized_os` once
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1773722 is fixed
'iOS' AS normalized_os,
-- This is the opt-in for Merino, not in use on mobile
CAST(NULL AS BOOLEAN) AS suggest_data_sharing_enabled,
blocks.query_type,
FROM
`moz-fx-data-shared-prod.firefox_ios.fx_suggest` fs
LEFT JOIN
blocks
ON fs.metrics.quantity.fx_suggest_block_id = blocks.id
WHERE
metrics.string.fx_suggest_ping_type IN ("fxsuggest-click", "fxsuggest-impression")
UNION ALL
SELECT
metrics.uuid.top_sites_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'topsites' AS source,
IF(metrics.string.top_sites_ping_type = "topsites-click", "click", "impression") AS event_type,
'desktop' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
metrics.string.top_sites_advertiser AS advertiser,
client_info.app_channel AS release_channel,
metrics.quantity.top_sites_position AS position,
CASE
WHEN metrics.url.top_sites_reporting_url IS NULL
THEN 'remote settings'
ELSE 'contile'
END AS provider,
-- `match_type` is only available for `quicksuggest_*` tables
NULL AS match_type,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
-- 'suggest_data_sharing_enabled' is only available for `quicksuggest_*` tables
CAST(NULL AS BOOLEAN) AS suggest_data_sharing_enabled,
CAST(NULL AS STRING) AS query_type,
FROM
`moz-fx-data-shared-prod.firefox_desktop.top_sites`
WHERE
metrics.string.top_sites_ping_type IN ("topsites-click", "topsites-impression")
UNION ALL
SELECT
context_id,
DATE(submission_timestamp) AS submission_date,
'topsites' AS source,
'impression' AS event_type,
'desktop' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
advertiser,
release_channel,
position,
CASE
WHEN reporting_url IS NULL
THEN 'remote settings'
ELSE 'contile'
END AS provider,
-- `match_type` is only available for `quicksuggest_*` tables
NULL AS match_type,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
-- 'suggest_data_sharing_enabled' is only available for `quicksuggest_*` tables
NULL AS suggest_data_sharing_enabled,
CAST(NULL AS STRING) AS query_type,
FROM
`moz-fx-data-shared-prod.contextual_services.topsites_impression`
WHERE
-- For firefox 116+ use `moz-fx-data-shared-prod.firefox_desktop.top_sites` instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT
context_id,
DATE(submission_timestamp) AS submission_date,
'topsites' AS source,
'click' AS event_type,
'desktop' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
advertiser,
release_channel,
position,
CASE
WHEN reporting_url IS NULL
THEN 'remote settings'
ELSE 'contile'
END AS provider,
-- `match_type` is only available for `quicksuggest_*` tables
NULL AS match_type,
SPLIT(metadata.user_agent.os, ' ')[SAFE_OFFSET(0)] AS normalized_os,
-- 'suggest_data_sharing_enabled' is only available for `quicksuggest_*` tables
NULL AS suggest_data_sharing_enabled,
CAST(NULL AS STRING) AS query_type,
FROM
`moz-fx-data-shared-prod.contextual_services.topsites_click`
WHERE
-- For firefox 116+ use `moz-fx-data-shared-prod.firefox_desktop.top_sites` instead
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1836283
SAFE_CAST(metadata.user_agent.version AS INT64) < 116
UNION ALL
SELECT
metrics.uuid.top_sites_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'topsites' AS source,
IF(events[SAFE_OFFSET(0)].name = 'contile_click', 'click', 'impression') AS event_type,
-- The adMarketplace APIs accept form factors out of "desktop", "phone", or "tablet";
-- we are currently always using "phone" for Fenix, so stay consistent with that here.
'phone' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
metrics.string.top_sites_contile_advertiser AS advertiser,
'release' AS release_channel,
SAFE_CAST(
(SELECT value FROM UNNEST(events[SAFE_OFFSET(0)].extra) WHERE key = 'position') AS INT64
) AS position,
-- Only Contile is available for mobile tiles.
'contile' AS provider,
-- `match_type` is only available for `quicksuggest_*` tables
NULL AS match_type,
normalized_os,
-- 'suggest_data_sharing_enabled' is only available for `quicksuggest_*` tables
NULL AS suggest_data_sharing_enabled,
CAST(NULL AS STRING) AS query_type,
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox.topsites_impression`
UNION ALL
SELECT
metrics.uuid.top_sites_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'topsites' AS source,
IF(events[SAFE_OFFSET(0)].name = 'contile_click', 'click', 'impression') AS event_type,
'phone' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
metrics.string.top_sites_contile_advertiser AS advertiser,
'beta' AS release_channel,
SAFE_CAST(
(SELECT value FROM UNNEST(events[SAFE_OFFSET(0)].extra) WHERE key = 'position') AS INT64
) AS position,
-- Only Contile is available for mobile tiles.
'contile' AS provider,
-- `match_type` is only available for `quicksuggest_*` tables
NULL AS match_type,
normalized_os,
-- 'suggest_data_sharing_enabled' is only available for `quicksuggest_*` tables
NULL AS suggest_data_sharing_enabled,
CAST(NULL AS STRING) AS query_type,
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_beta.topsites_impression`
UNION ALL
SELECT
metrics.uuid.top_sites_context_id AS context_id,
DATE(submission_timestamp) AS submission_date,
'topsites' AS source,
IF(events[SAFE_OFFSET(0)].name = 'contile_click', 'click', 'impression') AS event_type,
'phone' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
metrics.string.top_sites_contile_advertiser AS advertiser,
'nightly' AS release_channel,
SAFE_CAST(
(SELECT value FROM UNNEST(events[SAFE_OFFSET(0)].extra) WHERE key = 'position') AS INT64
) AS position,
-- Only Contile is available for mobile tiles.
'contile' AS provider,
-- `match_type` is only available for `quicksuggest_*` tables
NULL AS match_type,
normalized_os,
-- 'suggest_data_sharing_enabled' is only available for `quicksuggest_*` tables
NULL AS suggest_data_sharing_enabled,
CAST(NULL AS STRING) AS query_type,
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix.topsites_impression`
UNION ALL
SELECT
-- Due to the renaming (from 'topsite' to 'topsites'), some legacy Firefox
-- versions are still using the `topsite` key in the telemetry
IFNULL(metrics.uuid.top_sites_context_id, metrics.uuid.top_site_context_id) AS context_id,
DATE(submission_timestamp) AS submission_date,
'topsites' AS source,
IF(events[SAFE_OFFSET(0)].name = 'contile_click', 'click', 'impression') AS event_type,
-- The adMarketplace APIs accept form factors out of "desktop", "phone", or "tablet";
-- we are currently always using "phone" for Fenix, so stay consistent with that here.
'phone' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
IFNULL(
metrics.string.top_sites_contile_advertiser,
metrics.string.top_site_contile_advertiser
) AS advertiser,
'release' AS release_channel,
SAFE_CAST(
(SELECT value FROM UNNEST(events[SAFE_OFFSET(0)].extra) WHERE key = 'position') AS INT64
) AS position,
-- Only Contile is available for mobile tiles.
'contile' AS provider,
-- `match_type` is only available for `quicksuggest_*` tables
NULL AS match_type,
-- This is now hardcoded, we can use the derived `normalized_os` once
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1773722 is fixed
'iOS' AS normalized_os,
-- 'suggest_data_sharing_enabled' is only available for `quicksuggest_*` tables
NULL AS suggest_data_sharing_enabled,
CAST(NULL AS STRING) AS query_type,
FROM
`moz-fx-data-shared-prod.org_mozilla_ios_firefox.topsites_impression`
UNION ALL
SELECT
-- Due to the renaming (from 'topsite' to 'topsites'), some legacy Firefox
-- versions are still using the `topsite` key in the telemetry
IFNULL(metrics.uuid.top_sites_context_id, metrics.uuid.top_site_context_id) AS context_id,
DATE(submission_timestamp) AS submission_date,
'topsites' AS source,
IF(events[SAFE_OFFSET(0)].name = 'contile_click', 'click', 'impression') AS event_type,
-- The adMarketplace APIs accept form factors out of "desktop", "phone", or "tablet";
-- we are currently always using "phone" for Fenix, so stay consistent with that here.
'phone' AS form_factor,
normalized_country_code AS country,
metadata.geo.subdivision1 AS subdivision1,
IFNULL(
metrics.string.top_sites_contile_advertiser,
metrics.string.top_site_contile_advertiser
) AS advertiser,
'beta' AS release_channel,
SAFE_CAST(
(SELECT value FROM UNNEST(events[SAFE_OFFSET(0)].extra) WHERE key = 'position') AS INT64
) AS position,
-- Only Contile is available for mobile tiles.
'contile' AS provider,
-- `match_type` is only available for `quicksuggest_*` tables
NULL AS match_type,
-- This is now hardcoded, we can use the derived `normalized_os` once
-- https://bugzilla.mozilla.org/show_bug.cgi?id=1773722 is fixed
'iOS' AS normalized_os,
-- 'suggest_data_sharing_enabled' is only available for `quicksuggest_*` tables
NULL AS suggest_data_sharing_enabled,
CAST(NULL AS STRING) AS query_type,
FROM
`moz-fx-data-shared-prod.org_mozilla_ios_firefoxbeta.topsites_impression`
),
with_event_count AS (
SELECT
*,
COUNT(*) OVER (
PARTITION BY
submission_date,
context_id,
source,
event_type,
form_factor
) AS user_event_count,
FROM
combined
ORDER BY
context_id
)
SELECT
* EXCEPT (context_id, user_event_count, query_type),
COUNT(*) AS event_count,
COUNT(DISTINCT(context_id)) AS user_count,
query_type,
FROM
with_event_count
WHERE
submission_date = @submission_date
-- Filter out events associated with suspiciously active clients.
AND NOT (user_event_count > 50 AND event_type = 'click')
AND IF(
DATE_DIFF(CURRENT_DATE(), @submission_date, DAY) > 30,
ERROR("Data older than 30 days has been removed"),
NULL
) IS NULL
GROUP BY
submission_date,
source,
event_type,
form_factor,
country,
subdivision1,
advertiser,
release_channel,
position,
provider,
match_type,
normalized_os,
suggest_data_sharing_enabled,
query_type