backfill/2020-04-14-copy-dedupe-failure/query.sql (36 lines of code) (raw):
WITH distinct_document_ids AS (
SELECT
document_id,
MIN(submission_timestamp) AS submission_timestamp
FROM
`moz-fx-data-shared-prod.payload_bytes_decoded.telemetry_telemetry__main_v4`
WHERE
DATE(submission_timestamp) = '2020-04-14'
GROUP BY
document_id
),
base AS (
SELECT
*
FROM
`moz-fx-data-shared-prod.payload_bytes_decoded.telemetry_telemetry__main_v4`
JOIN
distinct_document_ids
USING
(document_id, submission_timestamp)
WHERE
DATE(submission_timestamp) = '2020-04-14'
),
numbered_duplicates AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY document_id) AS _n
FROM
base
)
SELECT
* EXCEPT (_n)
FROM
numbered_duplicates
WHERE
_n = 1