in smdebug/pytorch/hook.py [0:0]
def _collect_torch_profiling_data_if_profiler_enabled(self):
if self.autograd_profiler_enabled is False:
return
if is_pt_1_8():
records = torch.autograd._disable_profiler_legacy()
else:
records = torch.autograd._disable_profiler()
self.autograd_profiler_enabled = False
if is_pt_1_7():
function_events = torch.autograd.profiler.EventList(
torch.autograd.profiler.parse_event_records(records), use_cuda=self.use_cuda
)
elif is_pt_1_8():
function_events = torch.autograd.profiler.EventList(
torch.autograd.profiler.parse_legacy_records(records), use_cuda=self.use_cuda
)
else:
function_events = torch.autograd.profiler.EventList(
torch.autograd.profiler.parse_cpu_trace(records), use_cuda=self.use_cuda
)
for index, event in enumerate(function_events):
if is_pt_1_8():
cpu_time = event.time_range.start + self.start_profiler_time_us
duration = event.time_range.elapsed_us() / float(CONVERT_TO_MICROSECS)
else:
cpu_time = event.cpu_interval.start + self.start_profiler_time_us
# event.cpu_interval.start is in microseconds
duration = event.cpu_interval.elapsed_us() / float(CONVERT_TO_MICROSECS)
# timestamp is expected in seconds for record_trace_events
timestamp = cpu_time / float(CONVERT_TO_MICROSECS)
self.record_trace_events(
training_phase="cpu_functions",
op_name=event.name,
phase="X",
timestamp=timestamp,
duration=duration,
tid=event.thread,
step_num=self.step,
device="cpu",
)
for k in event.kernels:
self.record_trace_events(
training_phase="gpu_functions-dev:" + str(k.device),
op_name=k.name,
phase="X",
timestamp=(k.interval.start + self.start_profiler_time_us)
/ float(
CONVERT_TO_MICROSECS
), # timestamp expected is in seconds for record_trace_events
duration=k.interval.elapsed_us() / float(CONVERT_TO_MICROSECS),
tid=k.device,
step_num=self.step,
event_name=event.name,
device=k.device,
start_cpu_thread=event.thread,
cpu_thread_start_time=cpu_time,
)