in tb_plugin/torch_tb_profiler/profiler/event_parser.py [0:0]
def _update_steps_duration(self, prev_step_end_time, steps_device, steps_matched_device_nodes):
'''Update self.steps considering device side events launched by each host side step.
Update self.steps_names if some tail steps are removed.'''
# Change step time to device side on the condition that any step have device time.
is_use_gpu = prev_step_end_time is not None
if is_use_gpu:
for i_step in range(len(self.steps)):
step_start_time = max(prev_step_end_time, self.steps[i_step][0])
step_end_time = self.steps[i_step][1]
if steps_device[i_step][0] == sys.maxsize: # When step i_step has no device event.
# Assign to step_start_time when kernel is behind host step end.
step_end_time = max(step_end_time, step_start_time)
else:
step_end_time = max(step_end_time, steps_device[i_step][1])
if step_end_time < step_start_time:
logger.warning(
"Abnormal step_end_time of step {}: [{}, {}]".format(
i_step, step_start_time, step_end_time))
step_end_time = step_start_time
self.steps[i_step] = (step_start_time, step_end_time) # Update step time considering device side.
prev_step_end_time = step_end_time
is_remove_tail_steps = True # TODO: Use tensorboard argument instead.
if is_use_gpu and len(self.steps) > 1 and is_remove_tail_steps:
i_step = len(self.steps) - 1
while i_step >= 0:
if steps_matched_device_nodes[i_step] > 0:
break
i_step -= 1
if i_step >= 0:
keep_steps = i_step + 1
if i_step > 0 and steps_matched_device_nodes[i_step - 1] * 0.8 > steps_matched_device_nodes[i_step]:
keep_steps = i_step
if keep_steps < len(self.steps):
logger.warning(
"Remove the last {} steps from overview. "
"Because the profiler may fail to capture all the kernels launched by these steps.".format(
len(self.steps) - keep_steps
))
self.steps = self.steps[:keep_steps]
self.steps_names = self.steps_names[:keep_steps]