jetstream/yelp-suggestions-pilot-v3.toml (547 lines of code) (raw):
[experiment]
enrollment_period = 8
segments = ["exposed", "not_exposed"]
[metrics]
overall = [
# Yelp suggestion
"yelp_impressions",
"yelp_clicks",
"yelp_impression_rate",
"yelp_ctr",
"yelp_annoyances",
"saw_yelp",
"clicked_yelp",
# SERP
"serp_impressions",
"serp_impressions_with_ads",
"serp_has_ad_click",
"serp_ad_clicks",
"serp_ad_impression_rate",
"serp_ad_ctr",
"serp_ad_click_rate",
# Exposed urlbar
"urlbar_search_combined_sessions",
"exposed_urlbar_sessions",
"exposed_urlbar_engagements",
"exposure_rate",
"exposed_urlbar_engagement_rate",
"exposed_search_engine_clicks",
"exposed_search_engine_rate",
# Exposed SERP
"exposed_serp_impressions",
"exposed_serp_impressions_with_ads",
"exposed_serp_has_ad_click",
"exposed_serp_ad_clicks",
"exposed_tagged_google_ad_clicks",
"exposed_serp_ad_impression_rate",
"exposed_serp_ad_ctr",
"exposed_serp_ad_click_rate",
]
weekly = [
# Yelp suggestion
"yelp_impressions",
"yelp_clicks",
"yelp_impression_rate",
"yelp_ctr",
"yelp_annoyances",
"saw_yelp",
"clicked_yelp",
# SERP
"serp_impressions",
"serp_impressions_with_ads",
"serp_has_ad_click",
"serp_ad_clicks",
"serp_ad_impression_rate",
"serp_ad_ctr",
"serp_ad_click_rate",
]
#----------------------------------------------
# Yelp suggestion metrics
# These are for exposed sessions, by definition
[metrics.yelp_impressions]
select_expression = """COUNTIF(
is_terminal AND
EXISTS(SELECT * FROM UNNEST(results) r WHERE r.product_result_type = 'yelp_suggestion')
)"""
data_source = "urlbar_events"
friendly_name = "Yelp impressions"
description = "Number of Yelp impressions at engagement time"
[metrics.yelp_impressions.statistics.bootstrap_mean]
[metrics.yelp_impressions.statistics.deciles]
[metrics.yelp_clicks]
select_expression = """COUNTIF(
is_terminal
AND event_action = 'engaged'
AND product_selected_result = 'yelp_suggestion'
)"""
data_source = "urlbar_events"
friendly_name = "Yelp clicks"
description = "Number of clicks on Yelp results"
[metrics.yelp_clicks.statistics.bootstrap_mean]
[metrics.yelp_clicks.statistics.deciles]
[metrics.yelp_impression_rate]
depends_on = ["yelp_impressions", "urlbar_impressions"]
friendly_name = "Yelp impression rate"
description = "Proportion of urlbar sessions with a Yelp impression"
[metrics.yelp_impression_rate.statistics.population_ratio]
numerator = "yelp_impressions"
denominator = "urlbar_impressions"
[metrics.yelp_ctr]
depends_on = ["yelp_clicks", "yelp_impressions"]
friendly_name = "Yelp CTR"
description = "Proportion of urlbar sessions with a Yelp impression where a Yelp was clicked"
[metrics.yelp_ctr.statistics.population_ratio]
numerator = "yelp_clicks"
denominator = "yelp_impressions"
[metrics.yelp_annoyances]
select_expression = """COUNTIF(
event_action = 'annoyance'
AND product_engaged_result_type = 'yelp_suggestion'
)"""
data_source = "urlbar_events"
friendly_name = "Yelp annoyances"
description = "Number of clicks on annoyance signals for Yelp results"
[metrics.yelp_annoyances.statistics.bootstrap_mean]
[metrics.yelp_annoyances.statistics.deciles]
[metrics.saw_yelp]
select_expression = """COUNTIF(
is_terminal AND
EXISTS(SELECT * FROM UNNEST(results) r WHERE r.product_result_type = 'yelp_suggestion')
) > 0"""
data_source = "urlbar_events"
friendly_name = "Proportion had a Yelp impression"
description = "Proportion of clients that had a Yelp impression at the end of a urlbar session (not available on control)"
[metrics.saw_yelp.statistics.binomial]
[metrics.clicked_yelp]
select_expression = """COUNTIF(
is_terminal
AND event_action = 'engaged'
AND product_selected_result = 'yelp_suggestion'
) > 0"""
data_source = "urlbar_events"
friendly_name = "Proportion clicked a Yelp result"
description = "Proportion of clients that clicked a Yelp suggestion (not available on control)"
[metrics.clicked_yelp.statistics.binomial]
#----------------------------------------------
# SERP metrics
# serp_impressions metric definition is in firefox_desktop definitions
[metrics.serp_impressions.statistics.bootstrap_mean]
[metrics.serp_impressions.statistics.deciles]
[metrics.serp_impressions_with_ads]
select_expression = "COUNTIF(num_ads_loaded > 0)"
data_source = "serp_events"
friendly_name = "SERPs with ads"
description = "Number of SERP page loads with ads loaded (all engines). Ads may or may not be visible to the user."
[metrics.serp_impressions_with_ads.statistics.bootstrap_mean]
[metrics.serp_impressions_with_ads.statistics.deciles]
[metrics.serp_has_ad_click]
select_expression = "COUNTIF(num_ad_clicks > 0)"
data_source = "serp_events"
friendly_name = "SERPs with ad clicks"
description = "Number of SERP page loads where an ad was clicked (all engines)"
[metrics.serp_has_ad_click.statistics.bootstrap_mean]
[metrics.serp_has_ad_click.statistics.deciles]
[metrics.serp_ad_clicks]
select_expression = "COALESCE(SUM(num_ad_clicks), 0)"
data_source = "serp_events"
friendly_name = "SERP ad clicks"
description = "Number of ad clicks across SERP page loads (all engines)"
[metrics.serp_ad_clicks.statistics.bootstrap_mean]
[metrics.serp_ad_clicks.statistics.deciles]
[metrics.serp_ad_impression_rate]
depends_on = ["serp_impressions_with_ads", "serp_impressions"]
friendly_name = "SERP ad impression rate"
description = "Proportion of SERP page loads that had ads loaded"
[metrics.serp_ad_impression_rate.statistics.population_ratio]
numerator = "serp_impressions_with_ads"
denominator = "serp_impressions"
[metrics.serp_ad_ctr]
depends_on = ["serp_has_ad_click", "serp_impressions_with_ads"]
friendly_name = "SERP ad CTR"
description = "Proportion of SERP page loads with ads loaded that had an ad click"
[metrics.serp_ad_ctr.statistics.population_ratio]
numerator = "serp_has_ad_click"
denominator = "serp_impressions_with_ads"
[metrics.serp_ad_click_rate]
depends_on = ["serp_ad_clicks", "serp_impressions"]
friendly_name = "SERP ad click rate"
description = "Number of ad clicks per SERP page load"
[metrics.serp_ad_click_rate.statistics.population_ratio]
numerator = "serp_ad_clicks"
denominator = "serp_impressions"
#----------------------------------------------
# Exposed session urlbar metrics
[metrics.urlbar_search_combined_sessions]
select_expression = "COALESCE(SUM(n_sessions), 0)"
data_source = "urlbar_search_session_counts"
friendly_name = "Urlbar search sessions"
description = "Number of urlbar search sessions combining urlbar and serp events"
[metrics.urlbar_search_combined_sessions.statistics.bootstrap_mean]
[metrics.urlbar_search_combined_sessions.statistics.deciles]
[metrics.exposed_urlbar_sessions]
select_expression = "COALESCE(SUM(n_exposed_sessions), 0)"
data_source = "urlbar_search_session_counts"
friendly_name = "Exposed urlbar sessions"
description = "Number of urlbar sessions exposed to Yelp"
[metrics.exposed_urlbar_sessions.statistics.bootstrap_mean]
[metrics.exposed_urlbar_sessions.statistics.deciles]
[metrics.exposed_urlbar_engagements]
select_expression = "COALESCE(SUM(n_exposed_engaged), 0)"
data_source = "urlbar_search_session_counts"
friendly_name = "Exposed urlbar engagements"
description = "Number of urlbar sessions exposed to Yelp with an engagement"
[metrics.exposed_urlbar_engagements.statistics.bootstrap_mean]
[metrics.exposed_urlbar_engagements.statistics.deciles]
[metrics.exposure_rate]
depends_on = ["exposed_urlbar_sessions", "urlbar_search_combined_sessions"]
friendly_name = "Urlbar session exposure rate"
description = "Proportion of urlbar search sessions exposed to Yelp"
[metrics.exposure_rate.statistics.population_ratio]
numerator = "exposed_urlbar_sessions"
denominator = "urlbar_search_combined_sessions"
[metrics.exposed_urlbar_engagement_rate]
depends_on = ["exposed_urlbar_engagements", "exposed_urlbar_sessions"]
friendly_name = "Exposed urlbar engagement rate"
description = "Proportion of urlbar search sessions exposed to Yelp with an engagement"
[metrics.exposed_urlbar_engagement_rate.statistics.population_ratio]
numerator = "exposed_urlbar_engagements"
denominator = "exposed_urlbar_sessions"
[metrics.exposed_search_engine_clicks]
select_expression = "COALESCE(SUM(n_search_clicks), 0)"
data_source = "urlbar_search_session_counts"
description = "Number of exposed urlbar sessions ending with a click leading to a SERP"
friendly_name = "Exposed urlbar sessions ending on a SERP"
[metrics.exposed_search_engine_clicks.statistics.bootstrap_mean]
[metrics.exposed_search_engine_clicks.statistics.deciles]
[metrics.exposed_search_engine_rate]
depends_on = ["exposed_search_engine_clicks", "exposed_urlbar_sessions"]
friendly_name = "Exposed SERP engagement rate"
description = "Proportion of exposed urlbar sessions ending with a click leading to a SERP"
[metrics.exposed_search_engine_rate.statistics.population_ratio]
numerator = "exposed_search_engine_clicks"
denominator = "exposed_urlbar_sessions"
#----------------------------------------------
# Exposed session SERP metrics
[metrics.exposed_serp_impressions]
select_expression = "COALESCE(SUM(n_exposed_serp), 0)"
data_source = "urlbar_search_session_counts"
friendly_name = "Exposed SERP impressions"
description = "Number of SERP page loads following an exposure (all engines)"
[metrics.exposed_serp_impressions.statistics.bootstrap_mean]
[metrics.exposed_serp_impressions.statistics.deciles]
[metrics.exposed_serp_impressions_with_ads]
select_expression = "COALESCE(SUM(n_exposed_serp_with_ads), 0)"
data_source = "urlbar_search_session_counts"
friendly_name = "Exposed SERPs with ads"
description = "Number of SERP page loads with ads loaded following an exposure (all engines)"
[metrics.exposed_serp_impressions_with_ads.statistics.bootstrap_mean]
[metrics.exposed_serp_impressions_with_ads.statistics.deciles]
[metrics.exposed_serp_has_ad_click]
select_expression = "COALESCE(SUM(n_exposed_serp_has_ad_click), 0)"
data_source = "urlbar_search_session_counts"
friendly_name = "Exposed SERPs with ad clicks"
description = "Number of SERP page loads where an ad was clicked following an exposure (all enginges)"
[metrics.exposed_serp_has_ad_click.statistics.bootstrap_mean]
[metrics.exposed_serp_has_ad_click.statistics.deciles]
[metrics.exposed_serp_ad_clicks]
select_expression = "COALESCE(SUM(n_exposed_ad_clicks),0)"
data_source = "urlbar_search_session_counts"
friendly_name = "Exposed SERP ad clicks"
description = "Number of ad clicks on SERPs following an exposure (all engines)"
[metrics.exposed_serp_ad_clicks.statistics.bootstrap_mean]
[metrics.exposed_serp_ad_clicks.statistics.deciles]
[metrics.exposed_tagged_google_ad_clicks]
select_expression = "COALESCE(SUM(n_exposed_tagged_google_ad_clicks),0)"
data_source = "urlbar_search_session_counts"
friendly_name = "Exposed Google ad clicks"
description = "Number of ad clicks on tagged Google SERPs following an exposure"
[metrics.exposed_tagged_google_ad_clicks.statistics.bootstrap_mean]
[metrics.exposed_tagged_google_ad_clicks.statistics.deciles]
[metrics.exposed_serp_ad_impression_rate]
depends_on = ["exposed_serp_impressions_with_ads", "exposed_serp_impressions"]
friendly_name = "Exposed SERP ad impression rate"
description = "Proportion of SERP page loads that had ads loaded following an exposure"
[metrics.exposed_serp_ad_impression_rate.statistics.population_ratio]
numerator = "exposed_serp_impressions_with_ads"
denominator = "exposed_serp_impressions"
[metrics.exposed_serp_ad_ctr]
depends_on = ["exposed_serp_has_ad_click", "exposed_serp_impressions_with_ads"]
friendly_name = "Exposed SERP ad CTR"
description = "Proportion of SERP page loads with ads loaded that had an ad click following an exposure"
[metrics.exposed_serp_ad_ctr.statistics.population_ratio]
numerator = "exposed_serp_has_ad_click"
denominator = "exposed_serp_impressions_with_ads"
[metrics.exposed_serp_ad_click_rate]
depends_on = ["exposed_serp_ad_clicks", "exposed_serp_impressions"]
friendly_name = "Exposed SERP ad click rate"
description = "Number of ad clicks per exposed SERP page load"
[metrics.exposed_serp_ad_click_rate.statistics.population_ratio]
numerator = "exposed_serp_ad_clicks"
denominator = "exposed_serp_impressions"
#----------------------------------------------
[segments]
[segments.exposed]
select_expression = '{{agg_any("is_exposure")}}'
data_source = "urlbar_exposures"
friendly_name = "Exposed clients"
description = "Clients that saw Yelp"
[segments.not_exposed]
select_expression = 'NOT {{agg_any("is_exposure")}}'
data_source = "urlbar_exposures"
friendly_name = "Non-exposed clients"
description = "Clients that never saw Yelp"
[segments.data_sources.urlbar_exposures]
from_expression = """(
SELECT
metrics.uuid.legacy_telemetry_client_id AS client_id,
DATE(submission_timestamp) AS submission_date,
event.name = 'exposure' AS is_exposure,
FROM `mozdata.firefox_desktop.events`, UNNEST(events) AS event
WHERE event.category = 'urlbar'
)"""
friendly_name = "Urlbar exposures"
description = "Glean urlbar exposure events"
window_start = 0
window_end = 34
[data_sources]
[data_sources.urlbar_search_session_counts]
friendly_name = "Urlbar search session counts"
from_expression = "mozdata.analysis.dzeber_yelp_urlbar_serp_session_counts"
description = "Counts related to search sessions initiated in the urlbar including urlbar and related serp Glean events"
client_id_column = "legacy_telemetry_client_id"
submission_date_column = "submission_date"
experiments_column_type = "none"
#------------
# This table was created on a custom basis to simplify experiment analysis using the following query:
#------------
#
# DECLARE min_date DATE DEFAULT '2024-05-07';
#
# DECLARE max_date DATE DEFAULT '2024-06-13';
#
# CREATE TEMP FUNCTION in_experiment(experiment ANY TYPE, sample_id INT) AS (
# mozfun.map.get_key(experiment, 'yelp-suggestions-pilot-v3') IS NOT NULL
# );
#
# CREATE OR REPLACE TABLE
# mozdata.analysis.dzeber_yelp_urlbar_serp_session_counts
# AS
# WITH exposures AS (
# -- urlbar.exposure events sequence info
# SELECT DISTINCT
# client_info.client_id AS glean_client_id,
# DATE(submission_timestamp) AS submission_date,
# ping_info.seq,
# event_timestamp,
# FROM
# `mozdata.firefox_desktop.events_unnested`
# WHERE
# event_category = 'urlbar'
# AND event_name = 'exposure'
# AND DATE(submission_timestamp)
# BETWEEN min_date
# AND max_date
# AND in_experiment(ping_info.experiments, sample_id)
# ),
# urlbar AS (
# -- terminal urlbar events (urlbar sessions)
# SELECT
# *
# FROM
# `mozdata.firefox_desktop.urlbar_events`
# WHERE
# is_terminal
# AND submission_date
# BETWEEN min_date
# AND max_date
# AND in_experiment(experiments, sample_id)
# ),
# combined AS (
# -- take union of events tables in order to annotate exposure from sequencing
# SELECT
# *,
# TRUE AS is_exposure,
# NULL AS urlbar_info
# FROM
# exposures
# UNION ALL
# (
# SELECT
# glean_client_id,
# submission_date,
# seq,
# event_timestamp,
# FALSE AS is_exposure,
# (SELECT AS STRUCT urlbar.*) AS urlbar_info
# FROM
# urlbar
# )
# ),
# combined_with_exp AS (
# -- urlbar event is exposed if it is preceded by an exposure event
# SELECT
# *,
# LAG(is_exposure) OVER (
# -- exposure events have a (seq, event_timestamp) which is either equal or less than
# -- the (seq, event_timestamp) for the corresponding urlbar event
# -- order by is_exposure descending to break ties in case they are equal
# PARTITION BY
# glean_client_id
# ORDER BY
# seq,
# event_timestamp,
# is_exposure DESC
# ) AS exposed
# FROM
# combined
# ),
# urlbar_exp AS (
# -- restrict to urlbar events with exposure indicator
# SELECT
# urlbar_info.*,
# COALESCE(exposed, FALSE) AS exposed
# FROM
# combined_with_exp
# WHERE
# NOT is_exposure
# ),
# serp AS (
# -- serp events
# SELECT
# *
# FROM
# `mozdata.firefox_desktop.serp_events`
# WHERE
# submission_date
# BETWEEN min_date
# AND max_date
# AND in_experiment(experiments, sample_id)
# ),
# urlbar_serp AS (
# -- combine urlbar and serp events into single table
# -- keep only relevant fields from each
# SELECT
# glean_client_id,
# seq,
# event_timestamp,
# legacy_telemetry_client_id,
# submission_date,
# FALSE AS is_serp,
# STRUCT(exposed, event_action, product_selected_result) AS urlbar_fields,
# NULL AS serp_fields,
# FROM
# urlbar_exp
# UNION ALL
# (
# SELECT
# glean_client_id,
# ping_seq AS seq,
# event_timestamp,
# legacy_telemetry_client_id,
# submission_date,
# TRUE AS is_serp,
# NULL AS urlbar_fields,
# STRUCT(
# search_engine,
# is_tagged,
# sap_source,
# num_ads_loaded > 0 AS has_ads_loaded,
# num_ad_clicks
# ) AS serp_fields
# FROM
# serp
# )
# ),
# combined_sess_start AS (
# -- sessions start with either a urlbar event or a SAP-sourced serp load
# SELECT
# *,
# NOT is_serp
# OR serp_fields.sap_source IN (
# 'searchbar',
# 'contextmenu',
# 'webextension',
# 'system'
# ) AS new_session_start
# FROM
# urlbar_serp
# ),
# sessions AS (
# -- group events into 'search sessions' starting from a SAP
# -- and including subsequent follow-on-type serp loads
# SELECT
# *,
# -- use number of sessions seen so far for the client as a session ID
# COUNTIF(new_session_start) OVER (
# PARTITION BY
# glean_client_id
# ORDER BY
# seq,
# event_timestamp
# RANGE BETWEEN
# UNBOUNDED PRECEDING
# AND CURRENT ROW
# ) AS client_session_id,
# FROM
# combined_sess_start
# ),
# urlbar_sessions AS (
# -- restrict to sessions starting from the urlbar
# SELECT
# *,
# -- add session-level exposure indicator
# LOGICAL_OR(urlbar_fields.exposed) OVER (
# PARTITION BY
# glean_client_id,
# client_session_id
# ) AS exposed_session
# FROM
# sessions
# WHERE
# -- drop partial sessions at the start of a window
# client_session_id > 0
# QUALIFY
# LOGICAL_OR(new_session_start AND NOT is_serp) OVER (
# PARTITION BY
# glean_client_id,
# client_session_id
# )
# )
# SELECT
# legacy_telemetry_client_id,
# submission_date,
# COUNTIF(new_session_start) AS n_sessions,
# COUNTIF(exposed_session AND new_session_start) AS n_exposed_sessions,
# COUNTIF(exposed_session AND urlbar_fields.event_action = 'engaged') AS n_exposed_engaged,
# COUNTIF(
# exposed_session
# AND urlbar_fields.event_action = 'engaged'
# AND urlbar_fields.product_selected_result IN (
# 'default_partner_search_suggestion',
# 'search_engine',
# 'trending_suggestion'
# )
# ) AS n_search_clicks,
# COUNTIF(exposed_session AND is_serp) AS n_exposed_serp,
# COUNTIF(exposed_session AND serp_fields.has_ads_loaded) AS n_exposed_serp_with_ads,
# COUNTIF(exposed_session AND serp_fields.num_ad_clicks > 0) AS n_exposed_serp_has_ad_click,
# COALESCE(SUM(IF(exposed_session, serp_fields.num_ad_clicks, 0)), 0) AS n_exposed_ad_clicks,
# COALESCE(
# SUM(
# IF(
# exposed_session
# AND serp_fields.is_tagged
# AND serp_fields.search_engine = 'google',
# serp_fields.num_ad_clicks,
# 0
# )
# ),
# 0
# ) AS n_exposed_tagged_google_ad_clicks,
# FROM
# urlbar_sessions
# GROUP BY
# 1,
# 2
#
#------------