bigquery_etl/glam/templates/clients_daily_scalar_aggregates_v1.sql (111 lines of code) (raw):

{{ header }} WITH extracted AS ( SELECT *, DATE(submission_timestamp) AS submission_date, client_info.client_id, "{{ ping_type }}" AS ping_type, COALESCE( SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64), 0 ) AS app_version, client_info.os AS os, client_info.app_build AS app_build_id, client_info.app_channel AS channel FROM `moz-fx-data-shared-prod.{{ source_table }}` WHERE DATE(submission_timestamp) = {{ submission_date }} AND client_info.client_id IS NOT NULL ), sampled_data AS ( SELECT * FROM extracted WHERE -- If you're changing this, then you'll also need to change probe_counts_v1, -- where sampling is taken into account for counting clients. channel IN ("nightly", "beta") OR (channel = "release" AND os != "Windows") OR ( channel = "release" AND os = "Windows" AND sample_id < 10) ), unlabeled_metrics AS ( SELECT {{ attributes }}, ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[ {{ unlabeled_metrics }} ] as scalar_aggregates FROM sampled_data GROUP BY {{ attributes }} ), grouped_labeled_metrics AS ( SELECT {{ attributes }}, ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[ {{ labeled_metrics }} ] as metrics FROM sampled_data ), flattened_labeled_metrics AS ( SELECT {{ attributes }}, metrics.name AS metric, metrics.type AS metric_type, value.key AS key, value.value AS value FROM grouped_labeled_metrics CROSS JOIN UNNEST(metrics) AS metrics, UNNEST(metrics.value) AS value ), aggregated_labeled_metrics AS ( SELECT {{ attributes }}, metric, metric_type, key, MAX(value) AS max, MIN(value) AS min, AVG(value) AS avg, SUM(value) AS sum, IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count FROM flattened_labeled_metrics GROUP BY {{ attributes }}, metric, metric_type, key ), labeled_metrics AS ( SELECT {{ attributes }}, ARRAY_CONCAT_AGG( ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[ (metric, metric_type, key, 'max', max), (metric, metric_type, key, 'min', min), (metric, metric_type, key, 'avg', avg), (metric, metric_type, key, 'sum', sum), (metric, metric_type, key, 'count', count) ] ) AS scalar_aggregates FROM aggregated_labeled_metrics GROUP BY {{ attributes }} ) SELECT * FROM unlabeled_metrics UNION ALL SELECT * FROM labeled_metrics