in tb_plugin/torch_tb_profiler/profiler/event_parser.py [0:0]
def parse_nodes(self, events):
# For OperatorNode and ProfilerStepNode:
# Use time interval containing relationship to build father-child correlation,
# which is consistent with autograd profiler.
# For RuntimeNode:
# Use external_id to build correlation with its father OperatorNode or ProfilerStepNode.
# Because in the case when RuntimeNode has duration 0 and starts at same time as a OperatorNode,
# just use interval containing relationship can't tell it is child or brother of the OperatorNode.
tid2list = defaultdict(list) # value is a list of OperatorNode and ProfilerStepNode. Do not include RuntimeNode
tid2zero_rt_list = defaultdict(list) # value is a list of RuntimeNode with external_id=0. They will be attached to root nodes.
corrid_to_device = defaultdict(list) # value is a list of DeviceNode
corrid_to_runtime = {} # value is a RuntimeNode
externalid_to_runtime = defaultdict(list) # value is a list of RuntimeNode
for event in events:
if event.type == EventTypes.MEMORY:
continue
self._parse_node(event, corrid_to_device, corrid_to_runtime, externalid_to_runtime, tid2list, tid2zero_rt_list)
if CommLibTypes.Nccl in self.comm_lib:
for event in events:
if event.type == EventTypes.KERNEL:
self._update_communication_node(event)
# associate CUDA Runtimes with CPU events
for _, op_list in tid2list.items():
for op in op_list:
runtime_nodes = externalid_to_runtime.pop(op.external_id, [])
if runtime_nodes:
op.runtimes.extend(runtime_nodes)
for ext_id in externalid_to_runtime:
if ext_id != 0:
logger.warning("{} Runtime with external id {} don't correlate to any operator!".format(
len(externalid_to_runtime[ext_id]), ext_id))
return tid2list, tid2zero_rt_list, corrid_to_device