sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v1/query.sql (607 lines of code) (raw):

-- Query first observations for Firefox Android Clients. -- TODO: at the next refactor, should we consider replacing baseline_clients and first_seen CTEs -- with using `fenix.baseline_clients_first_seen` view which already gets all clients from -- the baseline_clients_daily tables whilst filtering for `is_new_profile`. -- This would also match up closer to the corresponding implementation of the firefox_ios_clients table. WITH baseline_clients AS ( SELECT client_id, sample_id, first_seen_date, submission_date, country, isp AS first_reported_isp, DATETIME(first_run_date) AS first_run_datetime, normalized_channel AS channel, device_manufacturer, device_model, normalized_os_version AS os_version, app_display_version AS app_version, locale, is_new_profile, distribution_id, FROM `moz-fx-data-shared-prod.fenix.baseline_clients_daily` WHERE {% if is_init() %} submission_date >= "2020-08-01" {% else %} submission_date = @submission_date {% endif %} AND client_id IS NOT NULL ), first_seen AS ( SELECT client_id, sample_id, first_seen_date, submission_date, country AS first_reported_country, first_reported_isp, first_run_datetime, channel, device_manufacturer, device_model, os_version, app_version, locale, FROM baseline_clients WHERE is_new_profile ), -- Find the most recent activation record per client_id. activations AS ( SELECT client_id, ARRAY_AGG(activated ORDER BY submission_date DESC)[SAFE_OFFSET(0)] > 0 AS activated FROM `moz-fx-data-shared-prod.fenix.new_profile_activation` WHERE {% if is_init() %} submission_date >= "2020-08-01" {% else %} submission_date = @submission_date {% endif %} GROUP BY client_id ), -- Find earliest data per client from the first_session ping. first_session_ping_min_seq AS ( SELECT client_id, sample_id, seq FROM ( SELECT client_info.client_id AS client_id, sample_id, ping_info.seq AS seq, submission_timestamp, ROW_NUMBER() OVER ( PARTITION BY client_info.client_id ORDER BY ping_info.seq, submission_timestamp ) AS RANK FROM `moz-fx-data-shared-prod.fenix.first_session` AS fenix_first_session WHERE ping_info.seq IS NOT NULL AND {% if is_init() %} DATE(submission_timestamp) >= "2020-08-01" {% else %} DATE(submission_timestamp) = @submission_date {% endif %} ) WHERE RANK = 1 -- Pings are sent in sequence, this guarantees that the first one is returned. GROUP BY client_id, sample_id, seq ), first_session_ping AS ( SELECT client_info.client_id AS client_id, MIN(fenix_first_session.sample_id) AS sample_id, DATETIME(MIN(submission_timestamp)) AS min_submission_datetime, MIN(SAFE.PARSE_DATETIME('%F', SUBSTR(client_info.first_run_date, 1, 10))) AS first_run_datetime, ARRAY_AGG(normalized_channel IGNORE NULLS ORDER BY submission_timestamp ASC)[ SAFE_OFFSET(0) ] AS channel, ARRAY_AGG(metrics.string.first_session_campaign IGNORE NULLS ORDER BY submission_timestamp ASC)[ SAFE_OFFSET(0) ] AS adjust_campaign, ARRAY_AGG(metrics.string.first_session_network IGNORE NULLS ORDER BY submission_timestamp ASC)[ SAFE_OFFSET(0) ] AS adjust_network, ARRAY_AGG(metrics.string.first_session_adgroup IGNORE NULLS ORDER BY submission_timestamp ASC)[ SAFE_OFFSET(0) ] AS adjust_ad_group, ARRAY_AGG(metrics.string.first_session_creative IGNORE NULLS ORDER BY submission_timestamp ASC)[ SAFE_OFFSET(0) ] AS adjust_creative, ARRAY_AGG( metrics.string.play_store_attribution_campaign IGNORE NULLS ORDER BY submission_timestamp ASC )[SAFE_OFFSET(0)] AS play_store_attribution_campaign, ARRAY_AGG( metrics.string.play_store_attribution_content IGNORE NULLS ORDER BY submission_timestamp ASC )[SAFE_OFFSET(0)] AS play_store_attribution_content, ARRAY_AGG( metrics.string.play_store_attribution_medium IGNORE NULLS ORDER BY submission_timestamp ASC )[SAFE_OFFSET(0)] AS play_store_attribution_medium, ARRAY_AGG( metrics.string.play_store_attribution_source IGNORE NULLS ORDER BY submission_timestamp ASC )[SAFE_OFFSET(0)] AS play_store_attribution_source, ARRAY_AGG( metrics.string.play_store_attribution_term IGNORE NULLS ORDER BY submission_timestamp ASC )[SAFE_OFFSET(0)] AS play_store_attribution_term, ARRAY_AGG( metrics.text2.play_store_attribution_install_referrer_response IGNORE NULLS ORDER BY submission_timestamp ASC )[SAFE_OFFSET(0)] AS play_store_attribution_install_referrer_response, ARRAY_AGG(metrics.string.meta_attribution_app IGNORE NULLS ORDER BY submission_timestamp ASC)[ SAFE_OFFSET(0) ] AS meta_attribution_app, FROM `moz-fx-data-shared-prod.fenix.first_session` AS fenix_first_session LEFT JOIN first_session_ping_min_seq ON ( client_info.client_id = first_session_ping_min_seq.client_id AND ping_info.seq = first_session_ping_min_seq.seq AND fenix_first_session.sample_id = first_session_ping_min_seq.sample_id ) WHERE {% if is_init() %} DATE(submission_timestamp) >= "2020-08-01" {% else %} DATE(submission_timestamp) = @submission_date {% endif %} AND (first_session_ping_min_seq.client_id IS NOT NULL OR ping_info.seq IS NULL) GROUP BY client_id ), -- Find earliest data per client from the metrics ping. metrics_ping AS ( SELECT client_info.client_id AS client_id, MIN(sample_id) AS sample_id, DATETIME(MIN(submission_timestamp)) AS min_submission_datetime, ARRAY_AGG(normalized_channel IGNORE NULLS ORDER BY submission_timestamp ASC)[ SAFE_OFFSET(0) ] AS channel, ARRAY_AGG( metrics.string.metrics_adjust_campaign IGNORE NULLS ORDER BY submission_timestamp ASC )[SAFE_OFFSET(0)] AS adjust_campaign, ARRAY_AGG(metrics.string.metrics_adjust_network IGNORE NULLS ORDER BY submission_timestamp ASC)[ SAFE_OFFSET(0) ] AS adjust_network, ARRAY_AGG( metrics.string.metrics_adjust_ad_group IGNORE NULLS ORDER BY submission_timestamp ASC )[SAFE_OFFSET(0)] AS adjust_ad_group, ARRAY_AGG( metrics.string.metrics_adjust_creative IGNORE NULLS ORDER BY submission_timestamp ASC )[SAFE_OFFSET(0)] AS adjust_creative, ARRAY_AGG(metrics.string.metrics_install_source IGNORE NULLS ORDER BY submission_timestamp ASC)[ SAFE_OFFSET(0) ] AS install_source, ARRAY_AGG( metrics.string.metrics_adjust_ad_group IGNORE NULLS ORDER BY submission_timestamp DESC )[SAFE_OFFSET(0)] AS last_reported_adjust_ad_group, ARRAY_AGG( metrics.string.metrics_adjust_creative IGNORE NULLS ORDER BY submission_timestamp DESC )[SAFE_OFFSET(0)] AS last_reported_adjust_creative, ARRAY_AGG( metrics.string.metrics_adjust_network IGNORE NULLS ORDER BY submission_timestamp DESC )[SAFE_OFFSET(0)] AS last_reported_adjust_network, ARRAY_AGG( metrics.string.metrics_adjust_campaign IGNORE NULLS ORDER BY submission_timestamp DESC )[SAFE_OFFSET(0)] AS last_reported_adjust_campaign, FROM `moz-fx-data-shared-prod.fenix.metrics` AS fenix_metrics WHERE {% if is_init() %} DATE(submission_timestamp) >= "2020-08-01" {% else %} DATE(submission_timestamp) = @submission_date {% endif %} GROUP BY client_id ), -- Find most recent client details from the baseline ping. baseline_ping AS ( SELECT client_id, MAX(submission_date) AS last_reported_date, ARRAY_AGG(channel IGNORE NULLS ORDER BY submission_date DESC)[ SAFE_OFFSET(0) ] AS last_reported_channel, ARRAY_AGG(country IGNORE NULLS ORDER BY submission_date DESC)[ SAFE_OFFSET(0) ] AS last_reported_country, ARRAY_AGG(device_model IGNORE NULLS ORDER BY submission_date DESC)[ SAFE_OFFSET(0) ] AS last_reported_device_model, ARRAY_AGG(device_manufacturer IGNORE NULLS ORDER BY submission_date DESC)[ SAFE_OFFSET(0) ] AS last_reported_device_manufacturer, ARRAY_AGG(locale IGNORE NULLS ORDER BY submission_date DESC)[ SAFE_OFFSET(0) ] AS last_reported_locale, ARRAY_AGG(distribution_id IGNORE NULLS ORDER BY submission_date DESC)[ SAFE_OFFSET(0) ] AS distribution_id, FROM baseline_clients GROUP BY client_id ), _current AS ( SELECT client_id, COALESCE(first_seen.sample_id, first_session.sample_id, metrics.sample_id) AS sample_id, first_seen.first_seen_date AS first_seen_date, first_seen.submission_date AS submission_date, DATE(first_seen.first_run_datetime) AS first_run_date, first_seen.first_reported_country AS first_reported_country, first_seen.first_reported_isp AS first_reported_isp, COALESCE(first_seen.channel, first_session.channel, metrics.channel) AS channel, first_seen.device_manufacturer AS device_manufacturer, first_seen.device_model AS device_model, first_seen.os_version AS os_version, first_seen.app_version AS app_version, first_seen.locale AS locale, activated AS activated, COALESCE(first_session.adjust_campaign, metrics.adjust_campaign) AS adjust_campaign, COALESCE(first_session.adjust_ad_group, metrics.adjust_ad_group) AS adjust_ad_group, COALESCE(first_session.adjust_creative, metrics.adjust_creative) AS adjust_creative, COALESCE(first_session.adjust_network, metrics.adjust_network) AS adjust_network, metrics.install_source AS install_source, first_session.play_store_attribution_campaign, first_session.play_store_attribution_content, first_session.play_store_attribution_medium, first_session.play_store_attribution_source, first_session.play_store_attribution_term, first_session.play_store_attribution_install_referrer_response, baseline.distribution_id, first_session.meta_attribution_app AS meta_attribution_app, metrics.last_reported_adjust_campaign AS last_reported_adjust_campaign, metrics.last_reported_adjust_ad_group AS last_reported_adjust_ad_group, metrics.last_reported_adjust_creative AS last_reported_adjust_creative, metrics.last_reported_adjust_network AS last_reported_adjust_network, baseline.last_reported_date AS last_reported_date, baseline.last_reported_channel AS last_reported_channel, baseline.last_reported_country AS last_reported_country, baseline.last_reported_device_model AS last_reported_device_model, baseline.last_reported_device_manufacturer AS last_reported_device_manufacturer, baseline.last_reported_locale AS last_reported_locale, STRUCT( CASE WHEN first_session.client_id IS NULL THEN FALSE ELSE TRUE END AS reported_first_session_ping, CASE WHEN metrics.client_id IS NULL THEN FALSE ELSE TRUE END AS reported_metrics_ping, CASE WHEN first_seen.client_id IS NULL THEN FALSE ELSE TRUE END AS reported_baseline_ping, DATE(first_session.min_submission_datetime) AS min_first_session_ping_submission_date, DATE(first_session.first_run_datetime) AS min_first_session_ping_run_date, DATE(metrics.min_submission_datetime) AS min_metrics_ping_submission_date, mozfun.norm.get_earliest_value( [ ( STRUCT( CAST(first_session.adjust_network AS STRING), 'first_session', DATETIME(first_session.min_submission_datetime) ) ), ( STRUCT( CAST(metrics.adjust_network AS STRING), 'metrics', DATETIME(metrics.min_submission_datetime) ) ) ] ).earliest_value_source AS adjust_network__source_ping, CASE WHEN metrics.install_source IS NOT NULL THEN 'metrics' ELSE NULL END AS install_source__source_ping, mozfun.norm.get_earliest_value( [ ( STRUCT( CAST(first_session.adjust_network AS STRING), 'first_session', DATETIME(first_session.min_submission_datetime) ) ), ( STRUCT( CAST(metrics.adjust_network AS STRING), 'metrics', DATETIME(metrics.min_submission_datetime) ) ) ] ).earliest_date AS adjust_network__source_ping_datetime, CASE WHEN metrics.install_source IS NOT NULL THEN metrics.min_submission_datetime ELSE NULL END AS install_source__source_ping_datetime, IF( play_store_attribution_campaign IS NOT NULL, first_session.min_submission_datetime, NULL ) AS play_store_attribution_campaign__ping_datetime, IF( play_store_attribution_content IS NOT NULL, first_session.min_submission_datetime, NULL ) AS play_store_attribution_content__ping_datetime, IF( play_store_attribution_medium IS NOT NULL, first_session.min_submission_datetime, NULL ) AS play_store_attribution_medium__ping_datetime, IF( play_store_attribution_source IS NOT NULL, first_session.min_submission_datetime, NULL ) AS play_store_attribution_source__ping_datetime, IF( play_store_attribution_term IS NOT NULL, first_session.min_submission_datetime, NULL ) AS play_store_attribution_term__ping_datetime, IF( play_store_attribution_install_referrer_response IS NOT NULL, first_session.min_submission_datetime, NULL ) AS play_store_attribution_install_referrer_response__ping_datetime, IF( meta_attribution_app IS NOT NULL, first_session.min_submission_datetime, NULL ) AS meta_attribution_app__ping_datetime ) AS metadata FROM first_seen FULL OUTER JOIN first_session_ping AS first_session USING (client_id) FULL OUTER JOIN metrics_ping AS metrics USING (client_id) FULL OUTER JOIN baseline_ping AS baseline USING (client_id) FULL OUTER JOIN activations USING (client_id) WHERE client_id IS NOT NULL ), --existing clients in firefox_android_clients_v1 _previous AS ( SELECT * FROM `moz-fx-data-shared-prod.fenix_derived.firefox_android_clients_v1` WHERE {% if is_init() %} FALSE {% else %} first_seen_date < @submission_date {% endif %} ) SELECT client_id, COALESCE(_previous.sample_id, _current.sample_id) AS sample_id, COALESCE(_previous.first_seen_date, _current.first_seen_date) AS first_seen_date, COALESCE(_previous.submission_date, _current.submission_date) AS submission_date, COALESCE(_previous.first_run_date, _current.first_run_date) AS first_run_date, COALESCE( _previous.first_reported_country, _current.first_reported_country ) AS first_reported_country, COALESCE(_previous.first_reported_isp, _current.first_reported_isp) AS first_reported_isp, COALESCE(_previous.channel, _current.channel) AS channel, COALESCE(_previous.device_manufacturer, _current.device_manufacturer) AS device_manufacturer, COALESCE(_previous.device_model, _current.device_model) AS device_model, COALESCE(_previous.os_version, _current.os_version) AS os_version, COALESCE(_previous.app_version, _current.app_version) AS app_version, COALESCE(_previous.locale, _current.locale) AS locale, COALESCE(_previous.activated, _current.activated) AS activated, COALESCE(_previous.adjust_campaign, _current.adjust_campaign) AS adjust_campaign, COALESCE(_previous.adjust_ad_group, _current.adjust_ad_group) AS adjust_ad_group, COALESCE(_previous.adjust_creative, _current.adjust_creative) AS adjust_creative, COALESCE(_previous.adjust_network, _current.adjust_network) AS adjust_network, COALESCE(_previous.install_source, _current.install_source) AS install_source, COALESCE(_previous.distribution_id, _current.distribution_id) AS distribution_id, COALESCE(_previous.meta_attribution_app, _current.meta_attribution_app) AS meta_attribution_app, COALESCE( _previous.play_store_attribution_campaign, _current.play_store_attribution_campaign ) AS play_store_attribution_campaign, COALESCE( _previous.play_store_attribution_content, _current.play_store_attribution_content ) AS play_store_attribution_content, COALESCE( _previous.play_store_attribution_medium, _current.play_store_attribution_medium ) AS play_store_attribution_medium, COALESCE( _previous.play_store_attribution_source, _current.play_store_attribution_source ) AS play_store_attribution_source, COALESCE( _previous.play_store_attribution_term, _current.play_store_attribution_term ) AS play_store_attribution_term, COALESCE( _previous.play_store_attribution_install_referrer_response, _current.play_store_attribution_install_referrer_response ) AS play_store_attribution_install_referrer_response, COALESCE( _current.last_reported_adjust_campaign, _previous.last_reported_adjust_campaign ) AS last_reported_adjust_campaign, COALESCE( _current.last_reported_adjust_ad_group, _previous.last_reported_adjust_ad_group ) AS last_reported_adjust_ad_group, COALESCE( _current.last_reported_adjust_creative, _previous.last_reported_adjust_creative ) AS last_reported_adjust_creative, COALESCE( _current.last_reported_adjust_network, _previous.adjust_network ) AS last_reported_adjust_network, COALESCE(_current.last_reported_date, _previous.last_reported_date) AS last_reported_date, COALESCE( _current.last_reported_channel, _previous.last_reported_channel ) AS last_reported_channel, COALESCE( _current.last_reported_country, _previous.last_reported_country ) AS last_reported_country, COALESCE( _current.last_reported_device_model, _previous.last_reported_device_model ) AS last_reported_device_model, COALESCE( _current.last_reported_device_manufacturer, _previous.device_manufacturer ) AS last_reported_device_manufacturer, COALESCE(_current.last_reported_locale, _previous.locale) AS last_reported_locale, STRUCT( COALESCE(_previous.metadata.reported_first_session_ping, FALSE) OR COALESCE( _current.metadata.reported_first_session_ping, FALSE ) AS reported_first_session_ping, COALESCE(_previous.metadata.reported_metrics_ping, FALSE) OR COALESCE(_current.metadata.reported_metrics_ping, FALSE) AS reported_metrics_ping, COALESCE(_previous.metadata.reported_baseline_ping, FALSE) OR COALESCE(_current.metadata.reported_baseline_ping, FALSE) AS reported_baseline_ping, CASE WHEN _previous.metadata.min_first_session_ping_submission_date IS NOT NULL AND _current.metadata.min_first_session_ping_submission_date IS NOT NULL THEN LEAST( _previous.metadata.min_first_session_ping_submission_date, _current.metadata.min_first_session_ping_submission_date ) ELSE COALESCE( _previous.metadata.min_first_session_ping_submission_date, _current.metadata.min_first_session_ping_submission_date ) END AS min_first_session_ping_submission_date, CASE WHEN _previous.metadata.min_first_session_ping_run_date IS NOT NULL AND _current.metadata.min_first_session_ping_run_date IS NOT NULL THEN LEAST( _previous.metadata.min_first_session_ping_run_date, _current.metadata.min_first_session_ping_run_date ) ELSE COALESCE( _previous.metadata.min_first_session_ping_run_date, _current.metadata.min_first_session_ping_run_date ) END AS min_first_session_ping_run_date, CASE WHEN _previous.metadata.min_metrics_ping_submission_date IS NOT NULL AND _current.metadata.min_metrics_ping_submission_date IS NOT NULL THEN LEAST( _previous.metadata.min_metrics_ping_submission_date, _current.metadata.min_metrics_ping_submission_date ) ELSE COALESCE( _previous.metadata.min_metrics_ping_submission_date, _current.metadata.min_metrics_ping_submission_date ) END AS min_metrics_ping_submission_date, COALESCE( _previous.metadata.adjust_network__source_ping, _current.metadata.adjust_network__source_ping ) AS adjust_network__source_ping, COALESCE( _previous.metadata.install_source__source_ping, _current.metadata.install_source__source_ping ) AS install_source__source_ping, COALESCE( _previous.metadata.adjust_network__source_ping_datetime, _current.metadata.adjust_network__source_ping_datetime ) AS adjust_network__source_ping_datetime, COALESCE( _previous.metadata.install_source__source_ping_datetime, _current.metadata.install_source__source_ping_datetime ) AS install_source__source_ping_datetime, COALESCE( _previous.metadata.play_store_attribution_campaign__ping_datetime, _current.metadata.play_store_attribution_campaign__ping_datetime ) AS play_store_attribution_campaign__ping_datetime, COALESCE( _previous.metadata.play_store_attribution_content__ping_datetime, _current.metadata.play_store_attribution_content__ping_datetime ) AS play_store_attribution_content__ping_datetime, COALESCE( _previous.metadata.play_store_attribution_medium__ping_datetime, _current.metadata.play_store_attribution_medium__ping_datetime ) AS play_store_attribution_medium__ping_datetime, COALESCE( _previous.metadata.play_store_attribution_source__ping_datetime, _current.metadata.play_store_attribution_source__ping_datetime ) AS play_store_attribution_source__ping_datetime, COALESCE( _previous.metadata.play_store_attribution_term__ping_datetime, _current.metadata.play_store_attribution_term__ping_datetime ) AS play_store_attribution_term__ping_datetime, COALESCE( _previous.metadata.play_store_attribution_install_referrer_response__ping_datetime, _current.metadata.play_store_attribution_install_referrer_response__ping_datetime ) AS play_store_attribution_install_referrer_response__ping_datetime, COALESCE( _previous.metadata.meta_attribution_app__ping_datetime, _current.metadata.meta_attribution_app__ping_datetime ) AS meta_attribution_app__ping_datetime ) AS metadata FROM _current FULL OUTER JOIN _previous USING (client_id)