in tb_plugin/torch_tb_profiler/profiler/data.py [0:0]
def process(self):
parser = EventParser()
self.tid2tree = parser.parse(self.events)
self.has_runtime = parser.has_runtime
self.has_kernel = parser.has_kernel
self.has_communication = parser.has_communication
self.has_memcpy_or_memset = parser.has_memcpy_or_memset
self.steps_names = parser.steps_names
self.used_devices = sorted(list(parser.used_devices))
self.use_dp = parser.use_dp
self.use_ddp = parser.use_ddp
self.comm_lib = parser.comm_lib
# Parse communications.
self.comm_node_list = parser.generate_communication_nodes()
# Starting aggregate
logger.debug("ModuleAggregator")
module_aggregator = ModuleAggregator()
module_aggregator.aggregate(self.tid2tree)
self.op_list_groupby_name = module_aggregator.op_list_groupby_name
self.op_list_groupby_name_input = module_aggregator.op_list_groupby_name_input
self.stack_lists_group_by_name = module_aggregator.stack_lists_group_by_name
self.stack_lists_group_by_name_input = module_aggregator.stack_lists_group_by_name_input
self.kernel_list_groupby_name_op = module_aggregator.kernel_list_groupby_name_op
logger.debug("OverallParser")
overall_parser = OverallParser()
overall_parser.aggregate(parser.steps, parser.role_ranges)
self.avg_costs = overall_parser.avg_costs
self.steps_costs = overall_parser.steps_costs
self.comm_overlap_costs = overall_parser.communication_overlap
logger.debug("GPUMetricsParser")
self.runtime_node_list = parser.runtime_node_list
gpu_metrics_parser = GPUMetricsParser()
gpu_metrics_parser.parse_events(self.events, parser.global_start_ts, parser.global_end_ts,
parser.steps[0][0], parser.steps[-1][1])
self.gpu_ids = gpu_metrics_parser.gpu_ids
self.gpu_utilization = gpu_metrics_parser.gpu_utilization
self.sm_efficiency = gpu_metrics_parser.avg_approximated_sm_efficiency_per_device
self.occupancy = gpu_metrics_parser.avg_occupancy_per_device
self.gpu_util_buckets = gpu_metrics_parser.gpu_util_buckets
self.approximated_sm_efficiency_ranges = gpu_metrics_parser.approximated_sm_efficiency_ranges
self.blocks_per_sm_count = gpu_metrics_parser.blocks_per_sm_count
self.occupancy_count = gpu_metrics_parser.occupancy_count
logger.debug("TensorCoresParser")
tensorcores_parser = TensorCoresParser()
tensorcores_parser.parse_events(self.tid2tree, module_aggregator.ops, gpu_metrics_parser.gpu_ids)
self.tc_eligible_ops_kernel_ratio = tensorcores_parser.tc_eligible_ops_kernel_ratio
self.tc_ratio = tensorcores_parser.tc_ratio
if self.has_kernel:
logger.debug("KernelParser")
kernel_parser = KernelParser()
kernel_parser.parse_events(self.events)
self.kernel_stat = kernel_parser.kernel_stat
self.tc_used_ratio = kernel_parser.tc_used_ratio
memory_events = self._memory_events()
if len(memory_events):
self.memory_parser = MemoryParser(self.tid2tree, memory_events)