def _collect_torch_profiling_data_if_profiler_enabled()

in smdebug/pytorch/hook.py [0:0]


    def _collect_torch_profiling_data_if_profiler_enabled(self):
        if self.autograd_profiler_enabled is False:
            return
        if is_pt_1_8():
            records = torch.autograd._disable_profiler_legacy()
        else:
            records = torch.autograd._disable_profiler()
        self.autograd_profiler_enabled = False
        if is_pt_1_7():
            function_events = torch.autograd.profiler.EventList(
                torch.autograd.profiler.parse_event_records(records), use_cuda=self.use_cuda
            )
        elif is_pt_1_8():
            function_events = torch.autograd.profiler.EventList(
                torch.autograd.profiler.parse_legacy_records(records), use_cuda=self.use_cuda
            )
        else:
            function_events = torch.autograd.profiler.EventList(
                torch.autograd.profiler.parse_cpu_trace(records), use_cuda=self.use_cuda
            )

        for index, event in enumerate(function_events):
            if is_pt_1_8():
                cpu_time = event.time_range.start + self.start_profiler_time_us
                duration = event.time_range.elapsed_us() / float(CONVERT_TO_MICROSECS)
            else:
                cpu_time = event.cpu_interval.start + self.start_profiler_time_us
                # event.cpu_interval.start is in microseconds
                duration = event.cpu_interval.elapsed_us() / float(CONVERT_TO_MICROSECS)
            # timestamp is expected in seconds for record_trace_events
            timestamp = cpu_time / float(CONVERT_TO_MICROSECS)
            self.record_trace_events(
                training_phase="cpu_functions",
                op_name=event.name,
                phase="X",
                timestamp=timestamp,
                duration=duration,
                tid=event.thread,
                step_num=self.step,
                device="cpu",
            )
            for k in event.kernels:
                self.record_trace_events(
                    training_phase="gpu_functions-dev:" + str(k.device),
                    op_name=k.name,
                    phase="X",
                    timestamp=(k.interval.start + self.start_profiler_time_us)
                    / float(
                        CONVERT_TO_MICROSECS
                    ),  # timestamp expected is in seconds for record_trace_events
                    duration=k.interval.elapsed_us() / float(CONVERT_TO_MICROSECS),
                    tid=k.device,
                    step_num=self.step,
                    event_name=event.name,
                    device=k.device,
                    start_cpu_thread=event.thread,
                    cpu_thread_start_time=cpu_time,
                )