sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v3/query.sql (723 lines of code) (raw):

-- Query for telemetry_derived.clients_first_seen_v3 -- Each ping type subquery retrieves all attributes as reported on the first -- ping received and respecting NULLS. -- Once the first_seen_date is identified after comparing all pings, attributes -- are retrieved for each client_id from the ping type that reported it. WITH error_check AS ( SELECT IF( DATE_ADD(MAX(first_seen_date), INTERVAL 1 day) != @submission_date, ERROR("Need to run sequentially, day after max day only"), 0 ) AS result FROM `moz-fx-data-shared-prod.telemetry_derived.clients_first_seen_v3` ), new_profile_ping_agg AS ( SELECT client_id AS client_id, sample_id AS sample_id, MIN(submission_timestamp) AS first_seen_timestamp, ARRAY_AGG(DATE(submission_timestamp) ORDER BY submission_timestamp ASC) AS all_dates, ARRAY_AGG(application.architecture RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS architecture, ARRAY_AGG(environment.build.build_id RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS app_build_id, ARRAY_AGG(normalized_app_name RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS app_name, ARRAY_AGG(environment.settings.locale RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS locale, ARRAY_AGG(application.platform_version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS platform_version, ARRAY_AGG(application.vendor RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS vendor, ARRAY_AGG(application.version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS app_version, ARRAY_AGG(application.xpcom_abi RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS xpcom_abi, ARRAY_AGG(document_id RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS document_id, ARRAY_AGG(environment.partner.distribution_id RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS distribution_id, ARRAY_AGG(environment.partner.distribution_version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS partner_distribution_version, ARRAY_AGG(environment.partner.distributor RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS partner_distributor, ARRAY_AGG(environment.partner.distributor_channel RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS partner_distributor_channel, ARRAY_AGG(environment.partner.partner_id RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS partner_id, ARRAY_AGG( environment.settings.attribution.campaign RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS attribution_campaign, ARRAY_AGG(environment.settings.attribution.content RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS attribution_content, ARRAY_AGG(environment.settings.attribution.dltoken RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS attribution_dltoken, ARRAY_AGG( environment.settings.attribution.dlsource RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS attribution_dlsource, ARRAY_AGG( environment.settings.attribution.experiment RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS attribution_experiment, ARRAY_AGG(environment.settings.attribution.medium RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS attribution_medium, ARRAY_AGG(environment.settings.attribution.source RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS attribution_source, ARRAY_AGG(environment.settings.attribution.ua RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS attribution_ua, ARRAY_AGG( environment.settings.attribution.variation RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS attribution_variation, ARRAY_AGG( environment.settings.default_search_engine_data.load_path RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS engine_data_load_path, ARRAY_AGG( environment.settings.default_search_engine_data.name RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS engine_data_name, ARRAY_AGG( environment.settings.default_search_engine_data.origin RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS engine_data_origin, ARRAY_AGG( environment.settings.default_search_engine_data.submission_url RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS engine_data_submission_url, ARRAY_AGG(environment.system.apple_model_id RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS apple_model_id, ARRAY_AGG(metadata.geo.city RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS city, ARRAY_AGG(metadata.geo.db_version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS db_version, ARRAY_AGG(metadata.geo.subdivision1 RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS subdivision1, ARRAY_AGG(metadata.isp.name RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS isp_name, ARRAY_AGG(normalized_channel RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS normalized_channel, ARRAY_AGG(normalized_country_code RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS country, ARRAY_AGG( mozfun.norm.os(environment.system.os.name) RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS normalized_os, ARRAY_AGG(normalized_os_version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS normalized_os_version, ARRAY_AGG( payload.processes.parent.scalars.startup_profile_selection_reason RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS startup_profile_selection_reason, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_admin_user RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_admin_user, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_default_path RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_default_path, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_failure_reason RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_failure_reason, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_from_msi RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_from_msi, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_install_existed RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_install_existed, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_installer_type RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_installer_type, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_other_inst RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_other_inst, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_other_msix_inst RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_other_msix_inst, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_profdir_existed RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_profdir_existed, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_silent RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_silent, ARRAY_AGG( payload.processes.parent.scalars.installation_first_seen_version RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS installation_first_seen_version, ARRAY_AGG(environment.system.os.name RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS os, ARRAY_AGG(environment.system.os.version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS os_version, ARRAY_AGG( environment.system.os.windows_build_number RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS windows_build_number, ARRAY_AGG(profile_group_id ORDER BY submission_timestamp)[SAFE_OFFSET(0)] AS profile_group_id, FROM `moz-fx-data-shared-prod.telemetry.new_profile` WHERE {% if is_init() %} DATE(submission_timestamp) >= '2010-01-01' AND sample_id = @sample_id {% else %} DATE(submission_timestamp) = @submission_date {% endif %} GROUP BY client_id, sample_id ), new_profile_ping AS ( SELECT *, mozfun.norm.windows_version_info( os, os_version, CAST(windows_build_number AS INT64) ) AS windows_version FROM new_profile_ping_agg ), shutdown_ping_agg AS ( SELECT client_id AS client_id, sample_id AS sample_id, MIN(submission_timestamp) AS first_seen_timestamp, ARRAY_AGG(DATE(submission_timestamp) ORDER BY submission_timestamp ASC) AS all_dates, ARRAY_AGG(application.architecture RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS architecture, ARRAY_AGG(environment.build.build_id RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS app_build_id, ARRAY_AGG(normalized_app_name RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS app_name, ARRAY_AGG(environment.settings.locale RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS locale, ARRAY_AGG(application.platform_version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS platform_version, ARRAY_AGG(application.vendor RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS vendor, ARRAY_AGG(application.version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS app_version, ARRAY_AGG(application.xpcom_abi RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS xpcom_abi, ARRAY_AGG(document_id RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS document_id, ARRAY_AGG(environment.partner.distribution_id RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS distribution_id, ARRAY_AGG(environment.partner.distribution_version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS partner_distribution_version, ARRAY_AGG(environment.partner.distributor RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS partner_distributor, ARRAY_AGG(environment.partner.distributor_channel RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS partner_distributor_channel, ARRAY_AGG(environment.partner.partner_id RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS partner_id, ARRAY_AGG( environment.settings.attribution.campaign RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS attribution_campaign, ARRAY_AGG(environment.settings.attribution.content RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS attribution_content, ARRAY_AGG(environment.settings.attribution.dltoken RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS attribution_dltoken, ARRAY_AGG( environment.settings.attribution.dlsource RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS attribution_dlsource, ARRAY_AGG( environment.settings.attribution.experiment RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS attribution_experiment, ARRAY_AGG(environment.settings.attribution.medium RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS attribution_medium, ARRAY_AGG(environment.settings.attribution.source RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS attribution_source, ARRAY_AGG(environment.settings.attribution.ua RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS attribution_ua, ARRAY_AGG( environment.settings.attribution.variation RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS attribution_variation, ARRAY_AGG( environment.settings.default_search_engine_data.load_path RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS engine_data_load_path, ARRAY_AGG( environment.settings.default_search_engine_data.name RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS engine_data_name, ARRAY_AGG( environment.settings.default_search_engine_data.origin RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS engine_data_origin, ARRAY_AGG( environment.settings.default_search_engine_data.submission_url RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS engine_data_submission_url, ARRAY_AGG(environment.system.apple_model_id RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS apple_model_id, ARRAY_AGG(metadata.geo.city RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS city, ARRAY_AGG(metadata.geo.db_version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS db_version, ARRAY_AGG(metadata.geo.subdivision1 RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS subdivision1, ARRAY_AGG(metadata.isp.name RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS isp_name, ARRAY_AGG(normalized_channel RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS normalized_channel, ARRAY_AGG(normalized_country_code RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS country, ARRAY_AGG( mozfun.norm.os(environment.system.os.name) RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS normalized_os, ARRAY_AGG(normalized_os_version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS normalized_os_version, ARRAY_AGG( payload.processes.parent.scalars.startup_profile_selection_reason RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS startup_profile_selection_reason, CAST(NULL AS BOOL) AS installation_first_seen_admin_user, CAST(NULL AS BOOL) AS installation_first_seen_default_path, CAST(NULL AS STRING) AS installation_first_seen_failure_reason, CAST(NULL AS BOOL) AS installation_first_seen_from_msi, CAST(NULL AS BOOL) AS installation_first_seen_install_existed, CAST(NULL AS STRING) AS installation_first_seen_installer_type, CAST(NULL AS BOOL) AS installation_first_seen_other_inst, CAST(NULL AS BOOL) AS installation_first_seen_other_msix_inst, CAST(NULL AS BOOL) AS installation_first_seen_profdir_existed, CAST(NULL AS BOOL) AS installation_first_seen_silent, CAST(NULL AS STRING) AS installation_first_seen_version, ARRAY_AGG(environment.system.os.name RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS os, ARRAY_AGG(environment.system.os.version RESPECT NULLS ORDER BY submission_timestamp)[ SAFE_OFFSET(0) ] AS os_version, ARRAY_AGG( environment.system.os.windows_build_number RESPECT NULLS ORDER BY submission_timestamp )[SAFE_OFFSET(0)] AS windows_build_number, ARRAY_AGG(profile_group_id ORDER BY submission_timestamp)[SAFE_OFFSET(0)] AS profile_group_id, FROM `moz-fx-data-shared-prod.telemetry.first_shutdown` WHERE {% if is_init() %} DATE(submission_timestamp) >= '2010-01-01' AND sample_id = @sample_id {% else %} DATE(submission_timestamp) = @submission_date {% endif %} GROUP BY client_id, sample_id ), shutdown_ping AS ( SELECT *, mozfun.norm.windows_version_info( os, os_version, CAST(windows_build_number AS INT64) ) AS windows_version FROM shutdown_ping_agg ), main_ping_agg AS ( -- The columns set as NULL are not available in clients_daily_v6 and need to be -- retrieved in the ETL from telemetry_stable.main_v5:<column>. SELECT client_id AS client_id, sample_id AS sample_id, -- The submission_timestamp_min is used to compare with the TIMESTAMP of -- the new_profile and first shutdown pings. -- It was implemented on Dec 16, 2019 and has data from 2018-10-30. IF( MIN(submission_date) >= '2018-10-30', MIN(submission_timestamp_min), TIMESTAMP(MIN(submission_date)) ) AS first_seen_timestamp, ARRAY_AGG(DATE(submission_date) ORDER BY submission_date ASC) AS all_dates, ARRAY_AGG(env_build_arch RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS architecture, ARRAY_AGG(env_build_id RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS app_build_id, ARRAY_AGG(app_name RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS app_name, ARRAY_AGG(locale RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS locale, ARRAY_AGG(env_build_platform_version RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS platform_version, ARRAY_AGG(vendor RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS vendor, ARRAY_AGG(app_version RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS app_version, ARRAY_AGG(env_build_xpcom_abi RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS xpcom_abi, ARRAY_AGG(first_document_id RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS document_id, ARRAY_AGG(distribution_id RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS distribution_id, ARRAY_AGG(distribution_version RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS partner_distribution_version, ARRAY_AGG(distributor RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS partner_distributor, ARRAY_AGG(distributor_channel RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS partner_distributor_channel, ARRAY_AGG(partner_id RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS partner_id, ARRAY_AGG(attribution.campaign RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_campaign, ARRAY_AGG(attribution.content RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_content, ARRAY_AGG(attribution.dltoken RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_dltoken, ARRAY_AGG(attribution.dlsource RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_dlsource, ARRAY_AGG(attribution.experiment RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_experiment, ARRAY_AGG(attribution.medium RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_medium, ARRAY_AGG(attribution.source RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_source, ARRAY_AGG(attribution.ua RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_ua, ARRAY_AGG(attribution.variation RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_variation, ARRAY_AGG(default_search_engine_data_load_path RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS engine_data_load_path, ARRAY_AGG(default_search_engine_data_name RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS engine_data_name, ARRAY_AGG(default_search_engine_data_origin RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS engine_data_origin, ARRAY_AGG(default_search_engine_data_submission_url RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS engine_data_submission_url, ARRAY_AGG(apple_model_id RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS apple_model_id, ARRAY_AGG(city RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS city, ARRAY_AGG(geo_db_version RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS db_version, ARRAY_AGG(geo_subdivision1 RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS subdivision1, ARRAY_AGG(isp_name RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS isp_name, ARRAY_AGG(normalized_channel RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS normalized_channel, ARRAY_AGG(country RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS country, ARRAY_AGG(mozfun.norm.os(os) RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS normalized_os, ARRAY_AGG(normalized_os_version RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS normalized_os_version, ARRAY_AGG(startup_profile_selection_first_ping_only RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS startup_profile_selection_reason, CAST(NULL AS BOOL) AS installation_first_seen_admin_user, CAST(NULL AS BOOL) AS installation_first_seen_default_path, CAST(NULL AS STRING) AS installation_first_seen_failure_reason, CAST(NULL AS BOOL) AS installation_first_seen_from_msi, CAST(NULL AS BOOL) AS installation_first_seen_install_existed, CAST(NULL AS STRING) AS installation_first_seen_installer_type, CAST(NULL AS BOOL) AS installation_first_seen_other_inst, CAST(NULL AS BOOL) AS installation_first_seen_other_msix_inst, CAST(NULL AS BOOL) AS installation_first_seen_profdir_existed, CAST(NULL AS BOOL) AS installation_first_seen_silent, CAST(NULL AS STRING) AS installation_first_seen_version, ARRAY_AGG(os RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS os, ARRAY_AGG(os_version RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS os_version, -- windows_build_number is an INT64 in the main ping but FLOAT65 in n_p_ping and shutdown_ping, will convert to FLOAT64 in next step ARRAY_AGG(windows_build_number RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS windows_build_number_raw, ARRAY_AGG(profile_group_id ORDER BY submission_date)[SAFE_OFFSET(0)] AS profile_group_id, FROM `moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6` WHERE {% if is_init() %} submission_date >= '2010-01-01' AND sample_id = @sample_id {% else %} submission_date = @submission_date {% endif %} GROUP BY client_id, sample_id ), main_ping AS ( SELECT * EXCEPT (windows_build_number_raw, profile_group_id), CAST(windows_build_number_raw AS FLOAT64) AS windows_build_number, profile_group_id, mozfun.norm.windows_version_info( os, os_version, CAST(windows_build_number_raw AS INT64) ) AS windows_version FROM main_ping_agg ), -- The ping priority is required when different ping types have the exact same timestamp unioned AS ( SELECT *, 'new_profile' AS source_ping, 1 AS source_ping_priority FROM new_profile_ping UNION ALL SELECT *, 'shutdown' AS source_ping, 2 AS source_ping_priority FROM shutdown_ping UNION ALL SELECT *, 'main' AS source_ping, 3 AS source_ping_priority FROM main_ping ), -- The next CTE returns the first_seen_date and reporting ping. -- The source_ping_priority and first_seen_timestamp are used to prioritize which ping to select on the first_seen_date -- The timestamp is retrieved to select the first_seen attributes. first_seen_date AS ( SELECT client_id, DATE(MIN(first_seen_timestamp)) AS first_seen_date, ARRAY_AGG(source_ping ORDER BY DATE(first_seen_timestamp), source_ping_priority)[ SAFE_OFFSET(0) ] AS first_seen_source_ping, ARRAY_AGG(first_seen_timestamp ORDER BY DATE(first_seen_timestamp), source_ping_priority)[ SAFE_OFFSET(0) ] AS first_seen_source_ping_timestamp FROM unioned GROUP BY client_id ), -- The next CTE returns the second_seen_date calculated as the next date reported by the -- main ping after first_seen_date or NULL. Dates reported by other pings are excluded. second_seen_date AS ( SELECT client_id, MIN(seen_dates) AS second_seen_date FROM main_ping LEFT JOIN UNNEST(all_dates) AS seen_dates LEFT JOIN first_seen_date fs USING (client_id) WHERE seen_dates > fs.first_seen_date GROUP BY client_id ), -- The next CTE returns the pings ever reported by each client -- Different from other attributes, this data is updated daily when it's NULL, -- so it's not limited to the first_seen_date. reported_pings AS ( SELECT client_id, 'main' IN UNNEST(ARRAY_AGG(source_ping)) AS reported_main_ping, 'new_profile' IN UNNEST(ARRAY_AGG(source_ping)) AS reported_new_profile_ping, 'shutdown' IN UNNEST(ARRAY_AGG(source_ping)) AS reported_shutdown_ping FROM unioned GROUP BY client_id ), _current AS ( -- Get first value when the same ping type returns more than one record with the exact same TIMESTAMP SELECT unioned.client_id AS client_id, unioned.sample_id AS sample_id, fsd.first_seen_date AS first_seen_date, ssd.second_seen_date AS second_seen_date, unioned.* EXCEPT ( client_id, sample_id, first_seen_timestamp, all_dates, source_ping, source_ping_priority, profile_group_id ), STRUCT( fsd.first_seen_source_ping AS first_seen_date_source_ping, pings.reported_main_ping AS reported_main_ping, pings.reported_new_profile_ping AS reported_new_profile_ping, pings.reported_shutdown_ping AS reported_shutdown_ping ) AS metadata, unioned.profile_group_id FROM unioned INNER JOIN first_seen_date AS fsd ON ( unioned.client_id = fsd.client_id AND unioned.first_seen_timestamp = fsd.first_seen_source_ping_timestamp AND unioned.source_ping = fsd.first_seen_source_ping ) LEFT JOIN second_seen_date AS ssd ON unioned.client_id = ssd.client_id LEFT JOIN reported_pings AS pings ON unioned.client_id = pings.client_id ), _previous AS ( SELECT * FROM `moz-fx-data-shared-prod.telemetry_derived.clients_first_seen_v3` ) {% if is_init() %} SELECT * FROM _current {% else %} -- For the daily update: -- The reported ping status in the metadata is updated when it's NULL. -- The second_seen_date is updated when it's NULL and only if there is a -- main ping reported on the submission_date. -- Every other attribute remains as reported on the first_seen_date. SELECT IF(_previous.client_id IS NULL, _current, _previous).* REPLACE ( IF( _previous.first_seen_date IS NOT NULL AND _previous.second_seen_date IS NULL AND _current.client_id IS NOT NULL AND _current.metadata.reported_main_ping, @submission_date, _previous.second_seen_date ) AS second_seen_date, ( SELECT AS STRUCT IF(_previous.client_id IS NULL, _current, _previous).metadata.* REPLACE ( IF( _previous.client_id IS NULL OR _previous.metadata.reported_main_ping IS FALSE AND _current.metadata.reported_main_ping IS TRUE, _current.metadata.reported_main_ping, _previous.metadata.reported_main_ping ) AS reported_main_ping, IF( _previous.client_id IS NULL OR _previous.metadata.reported_new_profile_ping IS FALSE AND _current.metadata.reported_new_profile_ping IS TRUE, _current.metadata.reported_new_profile_ping, _previous.metadata.reported_new_profile_ping ) AS reported_new_profile_ping, IF( _previous.client_id IS NULL OR _previous.metadata.reported_shutdown_ping IS FALSE AND _current.metadata.reported_shutdown_ping IS TRUE, _current.metadata.reported_shutdown_ping, _previous.metadata.reported_shutdown_ping ) AS reported_shutdown_ping ) ) AS metadata ) FROM _previous FULL JOIN _current USING (client_id) {% endif %}