opmon/firefox-background-update.toml (136 lines of code) (raw):

# Monitoring of Firefox background update process. [project] name = "Firefox Background Update" platform = "firefox_desktop" xaxis = "submission_date" start_date = "2022-01-01" # We want continuous monitoring of this data: # end_date = skip_default_metrics = true metrics = [ "client_volume", "ping_volume", "success_rate", "exception_rate", # It turns out that the success rate is ~1.0 and the exception rate ~0.0, so # the exception volume is more useful. "exception_volume", "crash_volume", # TODO: "exit_state_rate", ] [project.population] data_source = "background_update" monitor_entire_population = true dimensions = ["normalized_channel", "windows_version"] [dimensions.windows_version] data_source = "background_update" select_expression = "windows_version_info" friendly_name = "Windows Version" description = "Windows version, like 'Windows 11', 'Windows 10', etc. See https://github.com/mozilla/bigquery-etl/blob/generated-sql/sql/mozfun/norm/windows_version_info/udf.sql." [dimensions.normalized_channel] data_source = "background_update" select_expression = "normalized_channel" friendly_name = "Channel" description = "Release channel, like 'release', 'beta', 'nightly', 'esr', 'devedition'." [data_sources] [data_sources.background_update] from_expression = """ ( SELECT * FROM ( SELECT DATE(submission_timestamp) AS submission_date, -- We don't care about builds and versions at this point. NULL AS build_id, client_info.client_id AS client_id, metrics.uuid.background_update_client_id AS legacy_client_id, mozfun.norm.windows_version_info( "Windows_NT", CONCAT(SPLIT(normalized_os_version, ".")[SAFE_OFFSET(0)], ".", SPLIT(normalized_os_version, ".")[SAFE_OFFSET(1)]), COALESCE(client_info.windows_build_number, 0) -- SAFE_CAST(SPLIT(normalized_os_version, ".")[SAFE_OFFSET(2)] AS INT64) ) AS windows_version_info, normalized_channel AS normalized_channel, -- It's more convenient to always have these defined, and always as integers (0 or 1). CAST(COALESCE(metrics.boolean.background_update_exit_code_success, FALSE) AS INT64) AS exit_code_success, CAST(COALESCE(metrics.boolean.background_update_exit_code_exception, FALSE) AS INT64) AS exit_code_exception, FROM mozdata.firefox_desktop_background_update.background_update ) WHERE -- Some clients are badly configured. windows_version_info IS NOT NULL -- Ancient versions of Windows are not relevant. AND windows_version_info NOT IN ('Windows 95', 'Windows 98', 'Windows NT 4.0', 'Windows 2000', 'Windows XP', 'Windows Vista') ) """ submission_date_column = "submission_date" build_id_column = "build_id" client_id_column = "legacy_client_id" [data_sources.background_update_crashes] from_expression = """ ( SELECT * FROM ( SELECT DATE(submission_timestamp) AS submission_date, -- We don't care about builds and versions at this point. NULL AS build_id, -- This must match the client ID of the `background_update` data source! client_id, mozfun.norm.windows_version_info( "Windows_NT", environment.system.os.version, SAFE_CAST(environment.system.os.windows_build_number AS INT64) ) AS windows_version_info, normalized_channel AS normalized_channel, FROM mozdata.telemetry.crash WHERE payload.metadata.background_task_name = 'backgroundupdate' ) WHERE -- Some clients are badly configured. windows_version_info IS NOT NULL -- Ancient versions of Windows are not relevant. AND windows_version_info NOT IN ('Windows 95', 'Windows 98', 'Windows NT 4.0', 'Windows 2000', 'Windows XP', 'Windows Vista') ) """ submission_date_column = "submission_date" build_id_column = "build_id" client_id_column = "client_id" [metrics] [metrics.ping_volume] data_source = "background_update" select_expression = "COUNT(*)" type = "scalar" [metrics.ping_volume.statistics] sum = {} # TODO: one week and one month smoothed? [metrics.client_volume] data_source = "background_update" select_expression = "COUNT(DISTINCT legacy_client_id)" type = "scalar" [metrics.client_volume.statistics] sum = {} # TODO: one week and one month smoothed? [metrics.success_rate] data_source = "background_update" select_expression = "AVG(exit_code_success)" type = "scalar" [metrics.success_rate.statistics] mean = {} [metrics.exception_rate] data_source = "background_update" select_expression = "AVG(exit_code_exception)" type = "scalar" [metrics.exception_rate.statistics] mean = {} [metrics.exception_volume] data_source = "background_update" select_expression = "SUM(exit_code_exception)" type = "scalar" [metrics.exception_volume.statistics] sum = {} [metrics.crash_volume] data_source = "background_update_crashes" select_expression = "COUNT(*)" type = "scalar" [metrics.crash_volume.statistics] sum = {}