opmon/telemetry-alerts-prototype.toml (68 lines of code) (raw):
[project]
name = "Telemetry Alerts Prototype"
platform = "firefox_desktop"
xaxis = "build_id"
compact_visualization = true
skip_default_metrics = true
start_date = "2022-06-01"
alerts = [
"hist_diff",
"crash_thresholds"
]
metrics = [
"checkerboard_severity",
"gc_ms",
"main_crashes",
"oom_crashes",
"js_pageload_execution_ms",
"memory_total",
"gc_ms_content",
"gc_budget_overrun",
"perf_first_contentful_paint_ms",
"active_ticks",
"subsession_length"
]
[project.population]
data_source = "main"
monitor_entire_population = true
dimensions = ["os"]
channel = "nightly"
[metrics.main_crashes]
data_source = "crash"
select_expression = "SUM(IF(payload.process_type = 'main' OR payload.process_type IS NULL, 1, 0))"
[metrics.main_crashes.statistics]
sum = {}
[metrics.oom_crashes]
data_source = "crash"
select_expression = "SUM(IF(payload.metadata.oom_allocation_size IS NOT NULL, 1, 0))"
[metrics.oom_crashes.statistics]
sum = {}
[data_sources]
[data_sources.main]
from_expression = "mozdata.telemetry.main_nightly"
submission_date_column = "DATE(submission_timestamp)"
[alerts]
[alerts.crash_thresholds]
type = "threshold"
metrics = [
"main_crashes",
"oom_crashes"
]
min = [0]
max = [500]
[alerts.hist_diff]
type = "avg_diff"
metrics = [
"checkerboard_severity",
"gc_ms",
"js_pageload_execution_ms",
"memory_total",
"gc_ms_content",
"gc_budget_overrun",
"perf_first_contentful_paint_ms",
"active_ticks",
"subsession_length"
]
window_size = 7
max_relative_change = 0.25
percentiles = [25, 50, 75, 95]