dashboards/system_tables/sql/job_analyzer_slow.sql (121 lines of code) (raw):
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* It is assumed that the following query will be run in the administration
* project where the reservations were created. If this is not the case,
* prepend the project id to the table name as follows:
* `{project_id}`.`region-{region_name}`.INFORMATION_SCHEMA.{table}
*/
/*
* Job Comparison Report: Returns information about jobs to compare performance
* and troubleshoot.
*/
SELECT
job_id,
creation_time,
ROUND(TIMESTAMP_DIFF(start_time, creation_time, MILLISECOND) / 1000, 2) AS creation_duration_s,
ROUND(TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) / 1000, 2) AS execution_duration_s,
project_id,
user_email,
job_type,
statement_type,
total_bytes_processed,
total_slot_ms,
total_slot_ms / TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS avg_slots,
cache_hit,
ARRAY(
SELECT
STRUCT(
snap.elapsed_ms,
snap.total_slot_ms,
snap.pending_units,
snap.completed_units,
snap.active_units,
snap.elapsed_ms - IFNULL(LAG(snap.elapsed_ms) OVER (ORDER BY snap.elapsed_ms ASC), 0) AS incremental_elapsed_ms,
snap.total_slot_ms - IFNULL(LAG(snap.total_slot_ms) OVER (ORDER BY snap.elapsed_ms ASC), 0) AS incremental_slot_ms
)
FROM
UNNEST(timeline) snap
) AS timeline,
# Rebuild the job_stages array so we can add additional attributes
ARRAY(
SELECT
STRUCT(
stage.id,
stage.name,
TIMESTAMP_MILLIS(stage.start_ms) AS start_time,
stage.start_ms,
TIMESTAMP_MILLIS(stage.end_ms) AS end_time,
stage.end_ms,
stage.end_ms - stage.start_ms AS duration_ms,
stage.slot_ms,
stage.slot_ms / (stage.end_ms - stage.start_ms) AS avg_slots,
stage.input_stages,
stage.status,
stage.parallel_inputs,
stage.completed_parallel_inputs,
stage.records_read,
stage.records_written,
stage.shuffle_output_bytes,
stage.shuffle_output_bytes_spilled,
stage.wait_ratio_avg,
stage.wait_ms_avg,
stage.wait_ratio_max,
stage.wait_ms_max,
stage.read_ratio_avg,
stage.read_ms_avg,
stage.read_ratio_max,
stage.read_ms_max,
stage.compute_ratio_avg,
stage.compute_ms_avg,
stage.compute_ratio_max,
stage.compute_ms_max,
stage.write_ratio_avg,
stage.write_ms_avg,
stage.write_ratio_max,
stage.write_ms_max,
ARRAY_TO_STRING(
ARRAY(
SELECT
step.kind
FROM
UNNEST(stage.steps) step
WITH OFFSET AS step_offset
ORDER BY step_offset ASC
),
", "
) AS steps
)
FROM
UNNEST(job_stages) stage
) AS job_stages,
ARRAY(
SELECT
STRUCT(
ROUND(all_timeline_events.event_time / 1000, 2) AS event_time_seconds,
job_stage_entry.id as job_stage_id,
job_stage_entry.name as job_stage_name
)
FROM
UNNEST(job_stages) job_stage_entry
CROSS JOIN
(
(
SELECT DISTINCT
jse1.start_ms - UNIX_MILLIS(start_time) event_time
FROM
UNNEST(job_stages) jse1
)
UNION DISTINCT
(
SELECT DISTINCT
jse2.end_ms - UNIX_MILLIS(start_time) event_time
FROM
UNNEST(job_stages) jse2
)
UNION DISTINCT
(
SELECT DISTINCT
timeline_entry.elapsed_ms event_time
FROM
UNNEST(timeline) as timeline_entry
)
) all_timeline_events
WHERE
all_timeline_events.event_time >= job_stage_entry.start_ms - UNIX_MILLIS(start_time) AND
all_timeline_events.event_time <= job_stage_entry.end_ms - UNIX_MILLIS(start_time) AND
job_stage_entry.id <= 19
) AS stages_gantt
FROM
-- PUBLIC DASHBOARD USE ONLY
-- Modify this to use your project's INFORMATION_SCHEMA table as follows:
-- `region-{region_name}`.INFORMATION_SCHEMA.{table}
`region-us`.INFORMATION_SCHEMA.JOBS_BY_ORGANIZATION
-- If making a copy of this query, update `@job_param` for the Job Comparison Report.
-- Depending on if this is for the slow or fast job, use `@job_param` or `job_param_2`.
-- When creating the side-by-side comparison view, you will need to duplicate
-- this data source and update parameter to @job_param_2, or similar.
WHERE job_id = @job_param