def forward()

in workload_generator/generate_megatron_workload.py [0:0]


    def forward(self):
        args = self.args
        if self.tp_is_enable:
            self.workload.append(
                LogItem(
                    comm_type=CommType.broadcast,
                    comm_group=CommGroup.tp_group,
                    comm_group_size=self.args.tensor_model_parallel_size,
                    msg_size=5 * 8,
                    stage="forward_step",
                    src=0,
                )
            )
            self.workload.append(
                LogItem(
                    comm_type=CommType.broadcast,
                    comm_group=CommGroup.tp_group,
                    comm_group_size=self.args.tensor_model_parallel_size,
                    msg_size=8 * (args.world_size + args.seq_length * args.micro_batch),
                    stage="forward_step",
                    src=0,
                )
            )
        self.workload.extend(self.model.forward())
        for _ in range(3):
            # for bf16, we need to use float32 in loss communication
            self.workload.append(
                LogItem(
                    comm_type=CommType.all_reduce,
                    comm_group=CommGroup.tp_group,
                    comm_group_size=self.args.tensor_model_parallel_size,
                    msg_size=args.micro_batch * args.seq_length * 4,
                    stage="forward_step._VocabParallelCrossEntropy",
                )
            )
        # average_losses_across_data_parallel_group
        self.workload.append(
            LogItem(
                comm_type=CommType.all_reduce,
                comm_group=CommGroup.dp_group,
                comm_group_size=self.args.dp_num,
                msg_size=1 * 4,
                stage="forward_step.average_losses_across_data_parallel_group",
            )
        )