views/audit/bigquery_audit_logs_v1.sql (166 lines of code) (raw):

/* * Copyright 2020 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * View: bigquery_audit_logs_v1 * Author: ryanmcdowell, freedomofnet, mihirborkar * Description: * This is a user-friendly view over BigQuery job events based on * the legacy BigQuery audit data: https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/AuditData. */ CREATE OR REPLACE VIEW `project_id.dataset_id.bigquery_audit_logs_v1` AS WITH BQAudit AS ( SELECT protopayload_auditlog.authenticationInfo.principalEmail, protopayload_auditlog.requestMetadata.callerIp, protopayload_auditlog.serviceName, protopayload_auditlog.methodName, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.eventName, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobName.projectId, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobName.jobId, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.createTime, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatus.error.code AS errorCode, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatus.error.message AS errorMessage, TIMESTAMP_DIFF( protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime, MILLISECOND) AS runtimeMs, /* This following code extracts the column specific to the Copy operation in BQ */ protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobConfiguration.tableCopy, /* This following code extracts the column specific to the Extract operation in BQ */ protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobConfiguration.extract, /* The following code extracts the columns specific to the Load operation in BQ */ protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalLoadOutputBytes, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobConfiguration.load, /* The following code extracts columns specific to Query operation in BQ */ TIMESTAMP_DIFF( protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime, SECOND) AS runtimeSecs, CAST(CEILING((TIMESTAMP_DIFF( protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime, SECOND)) / 60) AS INT64) AS executionMinuteBuckets, IF(COALESCE(protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalProcessedBytes, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalSlotMs, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatus.error.code) IS NULL, TRUE, FALSE ) AS isCached, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalSlotMs, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalTablesProcessed, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalViewsProcessed, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalProcessedBytes, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalBilledBytes, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.billingTier, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobConfiguration.query, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.referencedTables, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.referencedViews FROM `project_id.dataset_id.cloudaudit_googleapis_com_data_access_*` WHERE protopayload_auditlog.serviceName = 'bigquery.googleapis.com' AND protopayload_auditlog.methodName = 'jobservice.jobcompleted' AND protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.eventName IN ( 'table_copy_job_completed', 'query_job_completed', 'extract_job_completed', 'load_job_completed' ) ) /* The following builds a user-friendly projection of the audit data. */ SELECT principalEmail, callerIp, serviceName, methodName, eventName, projectId, jobId, errorCode, errorMessage, STRUCT( EXTRACT(MINUTE FROM startTime) AS minuteOfDay, EXTRACT(HOUR FROM startTime) AS hourOfDay, EXTRACT(DAYOFWEEK FROM startTime) - 1 AS dayOfWeek, EXTRACT(DAYOFYEAR FROM startTime) AS dayOfYear, EXTRACT(WEEK FROM startTime) AS week, EXTRACT(MONTH FROM startTime) AS month, EXTRACT(QUARTER FROM startTime) AS quarter, EXTRACT(YEAR FROM startTime) AS year ) AS date, createTime, startTime, endTime, runtimeMs, runtimeSecs, /* This code queries data specific to the Copy operation */ STRUCT( tableCopy.sourceTables, STRUCT( tableCopy.destinationTable.projectId, tableCopy.destinationTable.datasetId, tableCopy.destinationTable.tableId, CONCAT(tableCopy.destinationTable.datasetId, '.', tableCopy.destinationTable.tableId) AS relativePath, CONCAT(tableCopy.destinationTable.projectId, '.', tableCopy.destinationTable.datasetId, '.', tableCopy.destinationTable.tableId) AS absolutePath ) AS destinationTable, tableCopy.createDisposition, tableCopy.writeDisposition ) AS tableCopy, IF(eventName = 'table_copy_job_completed', 1, 0) AS numCopies, /* The following code queries data specific to the Load operation in BQ */ totalLoadOutputBytes, (totalLoadOutputBytes / pow(2,30)) AS totalLoadOutputGigabytes, (totalLoadOutputBytes / pow(2,40)) AS totalLoadOutputTerabytes, STRUCT( load.sourceUris, STRUCT( load.destinationTable.projectId, load.destinationTable.datasetId, load.destinationTable.tableId, CONCAT(load.destinationTable.datasetId, '.', load.destinationTable.tableId) AS relativePath, CONCAT(load.destinationTable.projectId, '.', load.destinationTable.datasetId, '.', load.destinationTable.tableId) AS absolutePath ) AS destinationTable, load.createDisposition, load.writeDisposition, load.schemaJson ) AS load, IF(eventName = 'load_job_completed', 1, 0) AS numLoads, /* The following code queries data specific to the Extract operation in BQ */ REGEXP_CONTAINS(jobId, 'beam') AS isBeamJob, STRUCT( `extract`.destinationUris, STRUCT( `extract`.sourceTable.projectId, `extract`.sourceTable.datasetId, `extract`.sourceTable.tableId, CONCAT(`extract`.sourceTable.datasetId, '.', `extract`.sourceTable.tableId) AS relativeTableRef, CONCAT(`extract`.sourceTable.projectId, '.', `extract`.sourceTable.datasetId, '.', `extract`.sourceTable.tableId) AS absoluteTableRef ) AS sourceTable ) AS `extract`, IF(eventName = 'extract_job_completed', 1, 0) AS numExtracts, /* The following code queries data specific to the Query operation in BQ */ REGEXP_CONTAINS(query.query, 'cloudaudit_googleapis_com_data_access_') AS isAuditDashboardQuery, errorCode IS NOT NULL AS isError, REGEXP_CONTAINS(errorMessage, 'timeout') AS isTimeout, isCached, IF(isCached, 1, 0) AS numCached, totalSlotMs, totalSlotMs / runtimeMs AS avgSlots, /* The following statement breaks down the query into minute buckets * and provides the average slot usage within that minute. This is a * crude way of making it so you can retrieve the average slot utilization * for a particular minute across multiple queries. */ ARRAY( SELECT STRUCT( TIMESTAMP_TRUNC(TIMESTAMP_ADD(startTime, INTERVAL bucket_num MINUTE), MINUTE) AS time, totalSlotMs / runtimeMs AS avgSlotUsage ) FROM UNNEST(GENERATE_ARRAY(1, executionMinuteBuckets)) AS bucket_num ) AS executionTimeline, totalTablesProcessed, totalViewsProcessed, totalProcessedBytes, (totalProcessedBytes / pow(2,30)) AS totalProcessedGigabytes, (totalProcessedBytes / pow(2,40)) AS totalProcessedTerabytes, totalBilledBytes, (totalBilledBytes / pow(2,30)) AS totalBilledGigabytes, (totalBilledBytes / pow(2,40)) AS totalBilledTerabytes, (totalBilledBytes / pow(2,40)) * 5 AS estimatedCostUsd, billingTier, query, CONCAT(query.destinationTable.datasetId, '.', query.destinationTable.tableId) AS queryDestinationTableRelativePath, CONCAT(query.destinationTable.projectId, '.', query.destinationTable.datasetId, '.', query.destinationTable.tableId) AS queryDestinationTableAbsolutePath, referencedTables, referencedViews, IF(eventName = 'query_job_completed', 1, 0) AS queries FROM BQAudit