def get_step_breakdown_table_args()

in plugin/tensorboard_plugin_profile/convert/input_pipeline_proto_to_gviz.py [0:0]


def get_step_breakdown_table_args(ipa):
  """Creates a step breakdown from an Input Pipeline Analyzer proto.

  Args:
    ipa: An input_pipeline_pb2.InputPipelineAnalysisResult.

  Returns:
    Returns a gviz_api.DataTable
  """

  table_description = [
      ("stepnum", "string", "Step number"),
      ("deviceComputeTimeMs", "number", "Device compute"),
      ("deviceToDeviceTimeMs", "number", "Device to device"),
      ("deviceCollectivesTimeMs", "number", "Device collectives"),
      ("hostComputeTimeMs", "number", "Host compute"),
      ("kernelLaunchTimeMs", "number", "Kernel launch"),
      ("infeedTimeMs", "number", "Input"),
      ("outfeedTimeMs", "number", "Output"),
      ("compileTimeMs", "number", "Compilation"),
      ("otherTimeMs", "number", "All others"),
      ("tooltip", "string", "tooltip", {
          "role": "tooltip"
      }),
  ]

  # Parameters for input analysis summary.
  total_step_time_ms = 0.0
  total_input_ms = 0.0
  total_output_ms = 0.0
  total_host_compute_ms = 0.0
  total_host_prepare_ms = 0.0
  total_host_compile_ms = 0.0
  total_device_to_device_ms = 0.0
  total_device_collectives_ms = 0.0
  total_unknown_ms = 0.0

  data = []
  for step_details in ipa.step_details:
    details = input_pipeline_pb2.PerGenericStepDetails()
    step_details.Unpack(details)

    tooltip = ("Step {}, duration: {:.2f} ms\n"
               "-All others: {:.2f} ms\n"
               "-Compilation: {:.2f} ms\n"
               "-Output: {:.2f} ms\n"
               "-Input: {:.2f} ms\n"
               "-Kernel launch: {:.2f} ms\n"
               "-Host compute: {:.2f} ms\n"
               "-Device collectives: {:.2f} ms\n"
               "-Device to device: {:.2f} ms\n"
               "-Device compute: {:.2f} ms").format(
                   details.step_name, details.step_time_ms,
                   details.unknown_time_ms, details.host_compile_ms,
                   details.output_ms,
                   details.host_wait_input_ms + details.host_to_device_ms,
                   details.host_prepare_ms, details.host_compute_ms,
                   details.device_collectives_ms, details.device_to_device_ms,
                   details.device_compute_ms)

    row = [
        details.step_name, details.device_compute_ms,
        details.device_to_device_ms, details.device_collectives_ms,
        details.host_compute_ms, details.host_prepare_ms,
        details.host_wait_input_ms + details.host_to_device_ms,
        details.output_ms, details.host_compile_ms, details.unknown_time_ms,
        tooltip
    ]
    data.append(row)

    total_step_time_ms += details.step_time_ms
    total_input_ms += details.host_wait_input_ms + details.host_to_device_ms
    total_output_ms += details.output_ms
    total_host_prepare_ms += details.host_prepare_ms
    total_device_to_device_ms += details.device_to_device_ms
    total_device_collectives_ms += details.device_collectives_ms
    total_host_compute_ms += details.host_compute_ms
    total_host_compile_ms += details.host_compile_ms
    total_unknown_ms += details.unknown_time_ms

  bottleneck_analysis = input_pipeline_pb2.BottleneckAnalysis()
  ipa.recommendation.bottleneck_analysis.Unpack(bottleneck_analysis)
  kernel_launch_classification = \
      bottleneck_analysis.kernel_launch_classification
  kernel_launch_statement = bottleneck_analysis.kernel_launch_statement
  all_other_classification = bottleneck_analysis.all_other_classification
  all_other_statement = bottleneck_analysis.all_other_statement
  device_collectives_classification = \
      bottleneck_analysis.device_collectives_classification
  device_collectives_statement = \
      bottleneck_analysis.device_collectives_statement
  input_conclusion = bottleneck_analysis.input_statement
  summary_next_step = ipa.recommendation.summary_next_step

  # Add step time summary
  steptime_ms_average = "{:.1f}".format(ipa.step_time_summary.average)
  steptime_ms_standard_deviation = "{:.1f}".format(
      ipa.step_time_summary.standard_deviation)
  steptime_ms_minimum = "{:.1f}".format(ipa.step_time_summary.minimum)
  steptime_ms_maximum = "{:.1f}".format(ipa.step_time_summary.maximum)

  # Add step time breakdown
  breakdown = input_pipeline_pb2.GenericStepTimeBreakdown()
  ipa.step_time_breakdown.Unpack(breakdown)
  device_compute_time_ms_avg = "{:.1f}".format(
      breakdown.device_compute_ms_summary.average)
  device_compute_time_ms_sdv = "{:.1f}".format(
      breakdown.device_compute_ms_summary.standard_deviation)
  device_to_device_time_ms_avg = "{:.1f}".format(
      breakdown.device_to_device_ms_summary.average)
  device_to_device_time_ms_sdv = "{:.1f}".format(
      breakdown.device_to_device_ms_summary.standard_deviation)
  device_collectives_time_ms_avg = "{:.1f}".format(
      breakdown.device_collectives_ms_summary.average)
  device_collectives_time_ms_sdv = "{:.1f}".format(
      breakdown.device_collectives_ms_summary.standard_deviation)
  infeed_time_ms_avg = "{:.1f}".format(breakdown.input_ms_summary.average)
  infeed_time_ms_sdv = "{:.1f}".format(
      breakdown.input_ms_summary.standard_deviation)
  outfeed_time_ms_avg = "{:.1f}".format(breakdown.output_ms_summary.average)
  outfeed_time_ms_sdv = "{:.1f}".format(
      breakdown.output_ms_summary.standard_deviation)
  host_compute_time_ms_avg = "{:.1f}".format(
      breakdown.host_compute_ms_summary.average)
  host_compute_time_ms_sdv = "{:.1f}".format(
      breakdown.host_compute_ms_summary.standard_deviation)
  kernel_launch_time_ms_avg = "{:.1f}".format(
      breakdown.host_prepare_ms_summary.average)
  kernel_launch_time_ms_sdv = "{:.1f}".format(
      breakdown.host_prepare_ms_summary.standard_deviation)
  compile_time_ms_avg = "{:.1f}".format(
      breakdown.host_compile_ms_summary.average)
  compile_time_ms_sdv = "{:.1f}".format(
      breakdown.host_compile_ms_summary.standard_deviation)
  other_time_ms_avg = "{:.1f}".format(breakdown.unknown_time_ms_summary.average)
  other_time_ms_sdv = "{:.1f}".format(
      breakdown.unknown_time_ms_summary.standard_deviation)

  custom_properties = {
      "hardware_type": ipa.hardware_type,
      # Step time summary
      "steptime_ms_average": steptime_ms_average,
      "steptime_ms_standard_deviation": steptime_ms_standard_deviation,
      "steptime_ms_minimum": steptime_ms_minimum,
      "steptime_ms_maximum": steptime_ms_maximum,
      # Step time breakdown
      "device_compute_time_ms_avg": device_compute_time_ms_avg,
      "device_compute_time_ms_sdv": device_compute_time_ms_sdv,
      "device_to_device_time_ms_avg": device_to_device_time_ms_avg,
      "device_to_device_time_ms_sdv": device_to_device_time_ms_sdv,
      "device_collectives_time_ms_avg": device_collectives_time_ms_avg,
      "device_collectives_time_ms_sdv": device_collectives_time_ms_sdv,
      "infeed_time_ms_avg": infeed_time_ms_avg,
      "infeed_time_ms_sdv": infeed_time_ms_sdv,
      "outfeed_time_ms_avg": outfeed_time_ms_avg,
      "outfeed_time_ms_sdv": outfeed_time_ms_sdv,
      "host_compute_time_ms_avg": host_compute_time_ms_avg,
      "host_compute_time_ms_sdv": host_compute_time_ms_sdv,
      "kernel_launch_time_ms_avg": kernel_launch_time_ms_avg,
      "kernel_launch_time_ms_sdv": kernel_launch_time_ms_sdv,
      "compile_time_ms_avg": compile_time_ms_avg,
      "compile_time_ms_sdv": compile_time_ms_sdv,
      "other_time_ms_avg": other_time_ms_avg,
      "other_time_ms_sdv": other_time_ms_sdv,
      # Input analysis summary
      "input_conclusion": input_conclusion,
      "summary_nextstep": summary_next_step,
      # Generic recommendation
      "device_collectives_bottleneck": device_collectives_classification,
      "device_collectives_statement": device_collectives_statement,
      "kernel_launch_bottleneck": kernel_launch_classification,
      "kernel_launch_statement": kernel_launch_statement,
      "all_other_bottleneck": all_other_classification,
      "all_other_statement": all_other_statement,
  }

  return (table_description, data, custom_properties)