def apply_workload()

in workload_applyer.py [0:0]


    def apply_workload(self):
        torch.cuda.synchronize(self.device)
        start = time.perf_counter()
        key = "backward"
        for item in self.workload.workload:
            if (
                self.computation_aiob
                and item.comm_type == CommType.all_reduce
                and key in item.stage
            ):
                comm_func = self.comm_type_function[item.comm_type]
                # comm_func = self._overlap()
                # comm_func(item)
            else:
                comm_func = self.comm_type_function[item.comm_type]
                comm_func(item)
        torch.cuda.synchronize(self.device)
        end = time.perf_counter()
        return end - start