in workload_applyer.py [0:0]
def apply_workload(self):
torch.cuda.synchronize(self.device)
start = time.perf_counter()
key = "backward"
for item in self.workload.workload:
if (
self.computation_aiob
and item.comm_type == CommType.all_reduce
and key in item.stage
):
comm_func = self.comm_type_function[item.comm_type]
# comm_func = self._overlap()
# comm_func(item)
else:
comm_func = self.comm_type_function[item.comm_type]
comm_func(item)
torch.cuda.synchronize(self.device)
end = time.perf_counter()
return end - start