in plugin/tensorboard_plugin_profile/convert/input_pipeline_proto_to_gviz.py [0:0]
def get_step_breakdown_table_args(ipa):
"""Creates a step breakdown from an Input Pipeline Analyzer proto.
Args:
ipa: An input_pipeline_pb2.InputPipelineAnalysisResult.
Returns:
Returns a gviz_api.DataTable
"""
table_description = [
("stepnum", "string", "Step number"),
("deviceComputeTimeMs", "number", "Device compute"),
("deviceToDeviceTimeMs", "number", "Device to device"),
("deviceCollectivesTimeMs", "number", "Device collectives"),
("hostComputeTimeMs", "number", "Host compute"),
("kernelLaunchTimeMs", "number", "Kernel launch"),
("infeedTimeMs", "number", "Input"),
("outfeedTimeMs", "number", "Output"),
("compileTimeMs", "number", "Compilation"),
("otherTimeMs", "number", "All others"),
("tooltip", "string", "tooltip", {
"role": "tooltip"
}),
]
# Parameters for input analysis summary.
total_step_time_ms = 0.0
total_input_ms = 0.0
total_output_ms = 0.0
total_host_compute_ms = 0.0
total_host_prepare_ms = 0.0
total_host_compile_ms = 0.0
total_device_to_device_ms = 0.0
total_device_collectives_ms = 0.0
total_unknown_ms = 0.0
data = []
for step_details in ipa.step_details:
details = input_pipeline_pb2.PerGenericStepDetails()
step_details.Unpack(details)
tooltip = ("Step {}, duration: {:.2f} ms\n"
"-All others: {:.2f} ms\n"
"-Compilation: {:.2f} ms\n"
"-Output: {:.2f} ms\n"
"-Input: {:.2f} ms\n"
"-Kernel launch: {:.2f} ms\n"
"-Host compute: {:.2f} ms\n"
"-Device collectives: {:.2f} ms\n"
"-Device to device: {:.2f} ms\n"
"-Device compute: {:.2f} ms").format(
details.step_name, details.step_time_ms,
details.unknown_time_ms, details.host_compile_ms,
details.output_ms,
details.host_wait_input_ms + details.host_to_device_ms,
details.host_prepare_ms, details.host_compute_ms,
details.device_collectives_ms, details.device_to_device_ms,
details.device_compute_ms)
row = [
details.step_name, details.device_compute_ms,
details.device_to_device_ms, details.device_collectives_ms,
details.host_compute_ms, details.host_prepare_ms,
details.host_wait_input_ms + details.host_to_device_ms,
details.output_ms, details.host_compile_ms, details.unknown_time_ms,
tooltip
]
data.append(row)
total_step_time_ms += details.step_time_ms
total_input_ms += details.host_wait_input_ms + details.host_to_device_ms
total_output_ms += details.output_ms
total_host_prepare_ms += details.host_prepare_ms
total_device_to_device_ms += details.device_to_device_ms
total_device_collectives_ms += details.device_collectives_ms
total_host_compute_ms += details.host_compute_ms
total_host_compile_ms += details.host_compile_ms
total_unknown_ms += details.unknown_time_ms
bottleneck_analysis = input_pipeline_pb2.BottleneckAnalysis()
ipa.recommendation.bottleneck_analysis.Unpack(bottleneck_analysis)
kernel_launch_classification = \
bottleneck_analysis.kernel_launch_classification
kernel_launch_statement = bottleneck_analysis.kernel_launch_statement
all_other_classification = bottleneck_analysis.all_other_classification
all_other_statement = bottleneck_analysis.all_other_statement
device_collectives_classification = \
bottleneck_analysis.device_collectives_classification
device_collectives_statement = \
bottleneck_analysis.device_collectives_statement
input_conclusion = bottleneck_analysis.input_statement
summary_next_step = ipa.recommendation.summary_next_step
# Add step time summary
steptime_ms_average = "{:.1f}".format(ipa.step_time_summary.average)
steptime_ms_standard_deviation = "{:.1f}".format(
ipa.step_time_summary.standard_deviation)
steptime_ms_minimum = "{:.1f}".format(ipa.step_time_summary.minimum)
steptime_ms_maximum = "{:.1f}".format(ipa.step_time_summary.maximum)
# Add step time breakdown
breakdown = input_pipeline_pb2.GenericStepTimeBreakdown()
ipa.step_time_breakdown.Unpack(breakdown)
device_compute_time_ms_avg = "{:.1f}".format(
breakdown.device_compute_ms_summary.average)
device_compute_time_ms_sdv = "{:.1f}".format(
breakdown.device_compute_ms_summary.standard_deviation)
device_to_device_time_ms_avg = "{:.1f}".format(
breakdown.device_to_device_ms_summary.average)
device_to_device_time_ms_sdv = "{:.1f}".format(
breakdown.device_to_device_ms_summary.standard_deviation)
device_collectives_time_ms_avg = "{:.1f}".format(
breakdown.device_collectives_ms_summary.average)
device_collectives_time_ms_sdv = "{:.1f}".format(
breakdown.device_collectives_ms_summary.standard_deviation)
infeed_time_ms_avg = "{:.1f}".format(breakdown.input_ms_summary.average)
infeed_time_ms_sdv = "{:.1f}".format(
breakdown.input_ms_summary.standard_deviation)
outfeed_time_ms_avg = "{:.1f}".format(breakdown.output_ms_summary.average)
outfeed_time_ms_sdv = "{:.1f}".format(
breakdown.output_ms_summary.standard_deviation)
host_compute_time_ms_avg = "{:.1f}".format(
breakdown.host_compute_ms_summary.average)
host_compute_time_ms_sdv = "{:.1f}".format(
breakdown.host_compute_ms_summary.standard_deviation)
kernel_launch_time_ms_avg = "{:.1f}".format(
breakdown.host_prepare_ms_summary.average)
kernel_launch_time_ms_sdv = "{:.1f}".format(
breakdown.host_prepare_ms_summary.standard_deviation)
compile_time_ms_avg = "{:.1f}".format(
breakdown.host_compile_ms_summary.average)
compile_time_ms_sdv = "{:.1f}".format(
breakdown.host_compile_ms_summary.standard_deviation)
other_time_ms_avg = "{:.1f}".format(breakdown.unknown_time_ms_summary.average)
other_time_ms_sdv = "{:.1f}".format(
breakdown.unknown_time_ms_summary.standard_deviation)
custom_properties = {
"hardware_type": ipa.hardware_type,
# Step time summary
"steptime_ms_average": steptime_ms_average,
"steptime_ms_standard_deviation": steptime_ms_standard_deviation,
"steptime_ms_minimum": steptime_ms_minimum,
"steptime_ms_maximum": steptime_ms_maximum,
# Step time breakdown
"device_compute_time_ms_avg": device_compute_time_ms_avg,
"device_compute_time_ms_sdv": device_compute_time_ms_sdv,
"device_to_device_time_ms_avg": device_to_device_time_ms_avg,
"device_to_device_time_ms_sdv": device_to_device_time_ms_sdv,
"device_collectives_time_ms_avg": device_collectives_time_ms_avg,
"device_collectives_time_ms_sdv": device_collectives_time_ms_sdv,
"infeed_time_ms_avg": infeed_time_ms_avg,
"infeed_time_ms_sdv": infeed_time_ms_sdv,
"outfeed_time_ms_avg": outfeed_time_ms_avg,
"outfeed_time_ms_sdv": outfeed_time_ms_sdv,
"host_compute_time_ms_avg": host_compute_time_ms_avg,
"host_compute_time_ms_sdv": host_compute_time_ms_sdv,
"kernel_launch_time_ms_avg": kernel_launch_time_ms_avg,
"kernel_launch_time_ms_sdv": kernel_launch_time_ms_sdv,
"compile_time_ms_avg": compile_time_ms_avg,
"compile_time_ms_sdv": compile_time_ms_sdv,
"other_time_ms_avg": other_time_ms_avg,
"other_time_ms_sdv": other_time_ms_sdv,
# Input analysis summary
"input_conclusion": input_conclusion,
"summary_nextstep": summary_next_step,
# Generic recommendation
"device_collectives_bottleneck": device_collectives_classification,
"device_collectives_statement": device_collectives_statement,
"kernel_launch_bottleneck": kernel_launch_classification,
"kernel_launch_statement": kernel_launch_statement,
"all_other_bottleneck": all_other_classification,
"all_other_statement": all_other_statement,
}
return (table_description, data, custom_properties)