backfill/2022-01-27-sanitize-search-engines/desktop_query.sql (51 lines of code) (raw):

CREATE TEMP FUNCTION sanitize_search_counts(input ANY TYPE) AS (( WITH base AS ( SELECT key, value, REGEXP_EXTRACT(key, "([^.]+\\.in-content[:.][^:]+:).*") AS prefix, REGEXP_EXTRACT(key, "[^.]+\\.in-content[:.][^:]+:(.*)") AS code, FROM UNNEST(input) ) SELECT ARRAY_AGG(STRUCT( IF(prefix IS NULL OR code IN ("none", "other", "hz", "h_", "MOZ2", "MOZ4", "MOZ5", "MOZA", "MOZB", "MOZD", "MOZE", "MOZI", "MOZM", "MOZO", "MOZT", "MOZW", "MOZSL01", "MOZSL02", "MOZSL03", "firefox-a", "firefox-b", "firefox-b-1", "firefox-b-ab", "firefox-b-1-ab", "firefox-b-d", "firefox-b-1-d", "firefox-b-e", "firefox-b-1-e", "firefox-b-m", "firefox-b-1-m", "firefox-b-o", "firefox-b-1-o", "firefox-b-lm", "firefox-b-1-lm", "firefox-b-lg", "firefox-b-huawei-h1611", "firefox-b-is-oem1", "firefox-b-oem1", "firefox-b-oem2", "firefox-b-tinno", "firefox-b-pn-wt", "firefox-b-pn-wt-us", "ubuntu", "ffab", "ffcm", "ffhp", "ffip", "ffit", "ffnt", "ffocus", "ffos", "ffsb", "fpas", "fpsa", "ftas", "ftsa", "newext", "monline_dg", "monline_3_dg", "monline_4_dg", "monline_7_dg"), key, CONCAT(prefix, "other.scrubbed")) AS key, --key AS old_key, value)) FROM base )); CREATE TEMP FUNCTION sanitize_scalar(input ANY TYPE) AS (( WITH base AS ( SELECT key, value, REGEXP_EXTRACT(key, "([^:]+:[^:]+:).*") AS prefix, REGEXP_EXTRACT(key, "[^:]+:[^:]+:(.*)") AS code, FROM UNNEST(input) ) SELECT ARRAY_AGG(STRUCT( IF(prefix IS NULL OR code IN ("none", "other", "hz", "h_", "MOZ2", "MOZ4", "MOZ5", "MOZA", "MOZB", "MOZD", "MOZE", "MOZI", "MOZM", "MOZO", "MOZT", "MOZW", "MOZSL01", "MOZSL02", "MOZSL03", "firefox-a", "firefox-b", "firefox-b-1", "firefox-b-ab", "firefox-b-1-ab", "firefox-b-d", "firefox-b-1-d", "firefox-b-e", "firefox-b-1-e", "firefox-b-m", "firefox-b-1-m", "firefox-b-o", "firefox-b-1-o", "firefox-b-lm", "firefox-b-1-lm", "firefox-b-lg", "firefox-b-huawei-h1611", "firefox-b-is-oem1", "firefox-b-oem1", "firefox-b-oem2", "firefox-b-tinno", "firefox-b-pn-wt", "firefox-b-pn-wt-us", "ubuntu", "ffab", "ffcm", "ffhp", "ffip", "ffit", "ffnt", "ffocus", "ffos", "ffsb", "fpas", "fpsa", "ftas", "ftsa", "newext", "monline_dg", "monline_3_dg", "monline_4_dg", "monline_7_dg"), key, CONCAT(prefix, "other.scrubbed")) AS key, --key AS old_key, value)) FROM base )); WITH unioned AS ( SELECT * FROM --`moz-fx-data-shared-prod.telemetry_live.main_v4` `moz-fx-data-shared-prod.telemetry.main_1pct` WHERE submission_timestamp BETWEEN "2022-01-10 12:00:00 UTC" AND "2022-01-10 12:10:00 UTC" ), base AS ( SELECT payload.keyed_histograms.* REPLACE(sanitize_search_counts(payload.keyed_histograms.search_counts) AS search_counts), payload.processes.parent.keyed_scalars.* REPLACE(sanitize_scalar(payload.processes.parent.keyed_scalars.browser_search_content_urlbar) AS browser_search_content_urlbar), /* payload.processes.parent.keyed_scalars.browser_search_content_urlbar_handoff, payload.processes.parent.keyed_scalars.browser_search_content_urlbar_searchmode, payload.processes.parent.keyed_scalars.browser_search_content_searchbar, payload.processes.parent.keyed_scalars.browser_search_content_about_home, payload.processes.parent.keyed_scalars.browser_search_content_about_newtab, payload.processes.parent.keyed_scalars.browser_search_content_contextmenu, payload.processes.parent.keyed_scalars.browser_search_content_webextension, payload.processes.parent.keyed_scalars.browser_search_content_system, payload.processes.parent.keyed_scalars.browser_search_content_tabhistory, payload.processes.parent.keyed_scalars.browser_search_content_reload, payload.processes.parent.keyed_scalars.browser_search_content_unknown payload.processes.parent.keyed_scalars.browser_search_content_searchbar */ FROM unioned --WHERE normalized_channel = 'nightly' ) SELECT browser_search_content_urlbar FROM base where ARRAY_LENGTH(browser_search_content_urlbar) > 0 --AND to_json_string(browser_search_content_urlbar) like '%scrubbed%' limit 100