sql/moz-fx-data-shared-prod/search_derived/search_clients_daily_v8/query.sql (353 lines of code) (raw):

-- Return the version of the search addon if it exists, null otherwise CREATE TEMP FUNCTION get_search_addon_version(active_addons ANY type) AS ( ( SELECT mozfun.stats.mode_last(ARRAY_AGG(version)) FROM UNNEST(active_addons) WHERE addon_id = 'followonsearch@mozilla.com' GROUP BY version ) ); -- For newer search probes that are based on access point CREATE TEMP FUNCTION add_access_point( entries ARRAY<STRUCT<key STRING, value INT64>>, access_point STRING ) AS ( ARRAY(SELECT AS STRUCT CONCAT(key, '.', access_point) AS key, value, FROM UNNEST(entries)) ); -- List of Ad Blocking Addons produced using this logic: https://github.com/mozilla/search-adhoc-analysis/tree/master/monetization-blocking-addons WITH adblocker_addons AS ( SELECT addon_id, addon_name FROM `moz-fx-data-shared-prod.revenue.monetization_blocking_addons` WHERE blocks_monetization ), clients_with_adblocker_addons AS ( SELECT client_id, submission_date, TRUE AS has_adblocker_addon FROM `moz-fx-data-shared-prod.telemetry.clients_daily` CROSS JOIN UNNEST(active_addons) a INNER JOIN adblocker_addons USING (addon_id) WHERE submission_date = @submission_date AND NOT a.user_disabled AND NOT a.app_disabled AND NOT a.blocklisted GROUP BY client_id, submission_date ), profile_group_id AS ( SELECT client_id, profile_group_id FROM `moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6` WHERE submission_date = @submission_date ), is_enterprise_policies AS ( SELECT client_id, DATE(submission_timestamp) AS submission_date, mozfun.stats.mode_last( ARRAY_AGG( payload.processes.parent.scalars.policies_is_enterprise ORDER BY submission_timestamp ) ) AS policies_is_enterprise FROM `moz-fx-data-shared-prod`.telemetry_stable.main_v5 WHERE normalized_app_name = 'Firefox' AND document_id IS NOT NULL GROUP BY client_id, submission_date ), combined_access_point AS ( SELECT * EXCEPT (has_adblocker_addon), COALESCE(has_adblocker_addon, FALSE) AS has_adblocker_addon, ARRAY_CONCAT( add_access_point(search_content_urlbar_sum, 'urlbar'), add_access_point(search_content_urlbar_persisted_sum, 'urlbar_persisted'), add_access_point(search_content_urlbar_handoff_sum, 'urlbar_handoff'), add_access_point(search_content_urlbar_searchmode_sum, 'urlbar_searchmode'), add_access_point(search_content_contextmenu_sum, 'contextmenu'), add_access_point(search_content_about_home_sum, 'about_home'), add_access_point(search_content_about_newtab_sum, 'about_newtab'), add_access_point(search_content_searchbar_sum, 'searchbar'), add_access_point(search_content_system_sum, 'system'), add_access_point(search_content_webextension_sum, 'webextension'), add_access_point(search_content_tabhistory_sum, 'tabhistory'), add_access_point(search_content_reload_sum, 'reload'), add_access_point(search_content_unknown_sum, 'unknown') ) AS in_content_with_sap, ARRAY_CONCAT( add_access_point(search_withads_urlbar_sum, 'urlbar'), add_access_point(search_withads_urlbar_persisted_sum, 'urlbar_persisted'), add_access_point(search_withads_urlbar_handoff_sum, 'urlbar_handoff'), add_access_point(search_withads_urlbar_searchmode_sum, 'urlbar_searchmode'), add_access_point(search_withads_contextmenu_sum, 'contextmenu'), add_access_point(search_withads_about_home_sum, 'about_home'), add_access_point(search_withads_about_newtab_sum, 'about_newtab'), add_access_point(search_withads_searchbar_sum, 'searchbar'), add_access_point(search_withads_system_sum, 'system'), add_access_point(search_withads_webextension_sum, 'webextension'), add_access_point(search_withads_tabhistory_sum, 'tabhistory'), add_access_point(search_withads_reload_sum, 'reload'), add_access_point(search_withads_unknown_sum, 'unknown') ) AS search_with_ads_with_sap, ARRAY_CONCAT( add_access_point(search_adclicks_urlbar_sum, 'urlbar'), add_access_point(search_adclicks_urlbar_persisted_sum, 'urlbar_persisted'), add_access_point(search_adclicks_urlbar_handoff_sum, 'urlbar_handoff'), add_access_point(search_adclicks_urlbar_searchmode_sum, 'urlbar_searchmode'), add_access_point(search_adclicks_contextmenu_sum, 'contextmenu'), add_access_point(search_adclicks_about_home_sum, 'about_home'), add_access_point(search_adclicks_about_newtab_sum, 'about_newtab'), add_access_point(search_adclicks_searchbar_sum, 'searchbar'), add_access_point(search_adclicks_system_sum, 'system'), add_access_point(search_adclicks_webextension_sum, 'webextension'), add_access_point(search_adclicks_tabhistory_sum, 'tabhistory'), add_access_point(search_adclicks_reload_sum, 'reload'), add_access_point(search_adclicks_unknown_sum, 'unknown') ) AS ad_clicks_with_sap, FROM `moz-fx-data-shared-prod.telemetry.clients_daily` LEFT JOIN clients_with_adblocker_addons USING (client_id, submission_date) LEFT JOIN is_enterprise_policies USING (client_id, submission_date) ), augmented AS ( SELECT *, ARRAY_CONCAT( ARRAY( SELECT AS STRUCT element.source AS source, element.engine AS engine, element.count AS count, CASE WHEN ( element.source IN ( 'searchbar', 'urlbar', 'abouthome', 'newtab', 'contextmenu', 'system', 'activitystream', 'webextension', 'alias', 'urlbar-searchmode', 'urlbar-handoff', 'urlbar-persisted' ) OR element.source IS NULL ) THEN 'sap' WHEN STARTS_WITH(element.source, 'in-content:sap:') OR STARTS_WITH(element.source, 'sap:') THEN 'tagged-sap' WHEN STARTS_WITH(element.source, 'in-content:sap-follow-on:') OR STARTS_WITH(element.source, 'follow-on:') THEN 'tagged-follow-on' WHEN STARTS_WITH(element.source, 'in-content:organic:') THEN 'organic' WHEN STARTS_WITH(element.source, 'ad-click:') THEN 'ad-click' WHEN STARTS_WITH(element.source, 'search-with-ads:') THEN 'search-with-ads' ELSE 'unknown' END AS type FROM UNNEST(search_counts) AS element WHERE -- only use these in-content counts if access point probes are not available ARRAY_LENGTH(in_content_with_sap) = 0 OR NOT STARTS_WITH(element.source, 'in-content:') ), ARRAY( SELECT AS STRUCT CONCAT('in-content:', SUBSTR(key, STRPOS(key, ':') + 1)) AS source, SPLIT(key, ':')[OFFSET(0)] AS engine, value AS count, CASE WHEN REGEXP_CONTAINS(key, ':tagged:') THEN 'tagged-sap' WHEN REGEXP_CONTAINS(key, ':tagged-follow-on:') THEN 'tagged-follow-on' WHEN REGEXP_CONTAINS(key, ':organic:') THEN 'organic' ELSE 'unknown' END AS type FROM UNNEST(in_content_with_sap) ), ARRAY( SELECT AS STRUCT CONCAT('ad-click:', COALESCE(SPLIT(key, ':')[SAFE_OFFSET(1)], '')) AS source, SPLIT(key, ':')[OFFSET(0)] AS engine, value AS count, CONCAT('ad-click', IF(REGEXP_CONTAINS(key, ':organic'), ':organic', '')) AS type FROM UNNEST(IF(ARRAY_LENGTH(ad_clicks_with_sap) = 0, ad_clicks, ad_clicks_with_sap)) ), ARRAY( SELECT AS STRUCT CONCAT('search-with-ads:', COALESCE(SPLIT(key, ':')[SAFE_OFFSET(1)], '')) AS source, SPLIT(key, ':')[OFFSET(0)] AS engine, value AS count, CONCAT('search-with-ads', IF(REGEXP_CONTAINS(key, ':organic'), ':organic', '')) AS type FROM UNNEST( IF( ARRAY_LENGTH(search_with_ads_with_sap) = 0, search_with_ads, search_with_ads_with_sap ) ) ) ) AS _searches, FROM combined_access_point ), flattened AS ( SELECT * FROM augmented CROSS JOIN UNNEST( IF -- Provide replacement empty _searches with one null search, to ensure all -- clients are included in results ( ARRAY_LENGTH(_searches) > 0, _searches, [(CAST(NULL AS STRING), CAST(NULL AS STRING), NULL, CAST(NULL AS STRING))] ) ) ), -- Get count based on search type counted AS ( SELECT -- use row number to dedupe over window ROW_NUMBER() OVER w1 AS _n, submission_date, client_id, engine, source, country, get_search_addon_version(active_addons) AS addon_version, has_adblocker_addon, policies_is_enterprise, app_version, distribution_id, locale, user_pref_browser_search_region, search_cohort, os, os_version, CASE WHEN mozfun.norm.os(os) = "Windows" THEN mozfun.norm.windows_version_info(os, os_version, windows_build_number) ELSE CAST(mozfun.norm.truncate_version(os_version, "major") AS STRING) END AS os_version_major, CASE WHEN mozfun.norm.os(os) = "Windows" THEN mozfun.norm.windows_version_info(os, os_version, windows_build_number) ELSE CAST(mozfun.norm.truncate_version(os_version, "minor") AS STRING) END AS os_version_minor, channel, is_default_browser, UNIX_DATE(DATE(profile_creation_date)) AS profile_creation_date, default_search_engine, default_search_engine_data_load_path, default_search_engine_data_submission_url, default_private_search_engine, default_private_search_engine_data_load_path, default_private_search_engine_data_submission_url, sample_id, SAFE_CAST(subsession_hours_sum AS FLOAT64) AS subsession_hours_sum, sessions_started_on_this_day, active_addons_count_mean, scalar_parent_browser_engagement_max_concurrent_tab_count_max AS max_concurrent_tab_count_max, scalar_parent_browser_engagement_tab_open_event_count_sum AS tab_open_event_count_sum, active_hours_sum, scalar_parent_browser_engagement_total_uri_count_sum AS total_uri_count, experiments, scalar_parent_urlbar_searchmode_bookmarkmenu_sum, scalar_parent_urlbar_searchmode_handoff_sum, scalar_parent_urlbar_searchmode_keywordoffer_sum, scalar_parent_urlbar_searchmode_oneoff_sum, scalar_parent_urlbar_searchmode_other_sum, scalar_parent_urlbar_searchmode_shortcut_sum, scalar_parent_urlbar_searchmode_tabmenu_sum, scalar_parent_urlbar_searchmode_tabtosearch_sum, scalar_parent_urlbar_searchmode_tabtosearch_onboard_sum, scalar_parent_urlbar_searchmode_topsites_newtab_sum, scalar_parent_urlbar_searchmode_topsites_urlbar_sum, scalar_parent_urlbar_searchmode_touchbar_sum, scalar_parent_urlbar_searchmode_typed_sum, profile_age_in_days, CAST( NULL AS STRING ) AS normalized_engine, -- https://github.com/mozilla/bigquery-etl/issues/2462 SUM(IF(type = 'organic', count, 0)) OVER w1 AS organic, SUM(IF(type = 'tagged-sap', count, 0)) OVER w1 AS tagged_sap, SUM(IF(type = 'tagged-follow-on', count, 0)) OVER w1 AS tagged_follow_on, SUM(IF(type = 'sap', count, 0)) OVER w1 AS sap, SUM(IF(type = 'ad-click', count, 0)) OVER w1 AS ad_click, SUM(IF(type = 'ad-click:organic', count, 0)) OVER w1 AS ad_click_organic, SUM(IF(type = 'search-with-ads', count, 0)) OVER w1 AS search_with_ads, SUM(IF(type = 'search-with-ads:organic', count, 0)) OVER w1 AS search_with_ads_organic, SUM(IF(type = 'unknown', count, 0)) OVER w1 AS unknown, FROM flattened WHERE submission_date = @submission_date AND client_id IS NOT NULL AND (count < 10000 OR count IS NULL) WINDOW w1 AS ( PARTITION BY client_id, submission_date, engine, source, type ) ), staging AS ( SELECT * EXCEPT (_n), `moz-fx-data-shared-prod.udf.monetized_search`( engine, country, distribution_id, submission_date ) AS is_sap_monetizable FROM counted WHERE _n = 1 ) SELECT stg.*, prfl_gp_id.profile_group_id FROM staging stg LEFT JOIN profile_group_id prfl_gp_id ON stg.client_id = prfl_gp_id.client_id