in workload_generator/generate_deepspeed_stage3_workload.py [0:0]
def init(self):
if not self.amp_enabled:
for param in self.model.parameters():
self.workload.append(
LogItem(
comm_type=CommType.broadcast,
comm_group=CommGroup.dp_group,
comm_group_size=self.dp_world_size,
msg_size=param.msg_size(),
stage="init._broadcast_model",
src=0,
)
)
self.workload.append(
LogItem(
comm_type=CommType.barrier,
comm_group=CommGroup.all,
comm_group_size=self.dp_world_size,
msg_size=0,
stage="init._create_fp16_partitions_with_defragmentation",
)
)
for _ in range(2):
self.workload.append(
LogItem(
comm_type=CommType.barrier,
comm_group=CommGroup.all,
comm_group_size=self.dp_world_size,
msg_size=0,
stage="init._setup_for_real_optimizer",
)
)
for param in self.model.parameters():
self.workload.append(
LogItem(
comm_type=CommType.all_gather,
comm_group=CommGroup.dp_group,
comm_group_size=self.dp_world_size,
msg_size=param.msg_size(),
stage="init._allgather_params",
)
)