backfill/2020-03-19-replay-payload-bytes-decoded/query.sql (36 lines of code) (raw):

WITH distinct_document_ids AS ( SELECT document_id, MIN(submission_timestamp) AS submission_timestamp FROM `moz-fx-data-shared-prod.payload_bytes_decoded.telemetry_telemetry__*` WHERE DATE(submission_timestamp) = '2020-03-17' GROUP BY document_id ), base AS ( SELECT * FROM `moz-fx-data-shared-prod.payload_bytes_decoded.telemetry_telemetry__*` JOIN distinct_document_ids USING (document_id, submission_timestamp) WHERE DATE(submission_timestamp) = '2020-03-17' ), numbered_duplicates AS ( SELECT *, ROW_NUMBER() OVER (PARTITION BY document_id) AS _n FROM base ) SELECT * EXCEPT (_n) FROM numbered_duplicates WHERE _n = 1