in workload_generator/generate_deepspeed_stage3_workload.py [0:0]
def step(self):
self.stage = "step"
self.workload.append(
LogItem(
comm_type=CommType.reduce_scatter,
comm_group=CommGroup.dp_group,
comm_group_size=self.dp_world_size,
msg_size=self.reduce_bucket * 2,
stage=f"{self.stage}.reduce_scatter_fn",
)
)
self.reduce_bucket = 0
self.workload.append(
LogItem(
comm_type=CommType.all_reduce,
comm_group=CommGroup.dp_group,
comm_group_size=self.dp_world_size,
msg_size=1,
stage=f"{self.stage}.has_overflow",
)
)
self.workload.append(
LogItem(
comm_type=CommType.all_reduce,
comm_group=CommGroup.dp_group,
comm_group_size=self.dp_world_size,
msg_size=8,
stage=f"{self.stage}.get_grad_norm_direct",
)
)
for param in self.model.parameters():
param.has_been_allgather = False
self.current_live_parameters = 0
for param in self.persistent_params:
self._gather_param_directly(param)