backfill/2024-05-07-telemetry-pings-os-distro/dedupe_pings.sql (372 lines of code) (raw):

-- bhr_v4 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.bhr_v4` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.bhr_v4` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.bhr_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.bhr_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- crash_v4 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.crash_v4` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.crash_v4` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.crash_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.crash_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- event_v4 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.event_v4` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.event_v4` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.event_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.event_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- first_shutdown_use_counter_v4 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.first_shutdown_use_counter_v4` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.first_shutdown_use_counter_v4` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.first_shutdown_use_counter_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.first_shutdown_use_counter_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- first_shutdown_v5 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.first_shutdown_v5` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.first_shutdown_v5` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.first_shutdown_v5` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.first_shutdown_v5` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- heartbeat_v4 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.heartbeat_v4` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.heartbeat_v4` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.heartbeat_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.heartbeat_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- main_use_counter_v4 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.main_use_counter_v4` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.main_use_counter_v4` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.main_use_counter_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.main_use_counter_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- main_v5 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.main_v5` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.main_v5` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.main_v5` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.main_v5` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- modules_v4 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.modules_v4` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.modules_v4` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.modules_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.modules_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- new_profile_v4 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.new_profile_v4` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.new_profile_v4` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.new_profile_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.new_profile_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- sync_v4 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.sync_v4` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.sync_v4` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.sync_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.sync_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL ); -- update_v4 CREATE TABLE `moz-fx-data-backfill-1.telemetry_os_distro_deduped.update_v4` LIKE `moz-fx-data-backfill-1.telemetry_os_distro_output.update_v4` AS ( WITH existing_doc_ids AS ( SELECT document_id FROM `moz-fx-data-shared-prod.telemetry_stable.update_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' ), new_rows AS ( SELECT * FROM `moz-fx-data-backfill-1.telemetry_os_distro_output.update_v4` WHERE DATE(submission_timestamp) BETWEEN '2024-01-16' AND '2024-05-02' QUALIFY ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY submission_timestamp) = 1 ) SELECT new_rows.* FROM new_rows LEFT JOIN existing_doc_ids USING (document_id) WHERE existing_doc_ids.document_id IS NULL );