LayerData Layer::report()

in astra-sim-alibabacloud/astra-sim/workload/Layer.cc [335:537]


LayerData Layer::report(
    std::string run_name,
    int layer_num,
    int total_rows,
    int stat_row,
    CSVWriter* detailed,
    CSVWriter* EndToEnd,
    double& total_compute,
    double& total_exposed,
    bool seprate_log,
    vector<double>& total_fwd_time,
    vector<double>& total_wg_time,
    vector<double>& total_ig_time,
    double& pre_bubble_time,
    double& DP_comm,
    double& DP_EP_comm,
    double& Expose_TP_comm,
    double& Expose_EP_comm) {
  LayerData layerData;
  take_stream_stats_average();
  int TP_size = workload->model_parallel_npu_group;
  int PP_size = workload->pipeline_model_parallelism;
  int DP_size = workload->all_gpus / (TP_size * PP_size);
  int EP_size = workload->expert_parallel_npu_group;
  int vpp = workload->vpp;
  uint32_t pp_commsize = workload->pp_commsize;
  int GA = workload->GA;
  UserParam* param = UserParam::getInstance();
  int input_grad_group_size =
      input_grad_group_type == MockNccl::GroupType::EP ? EP_size : TP_size;
  int fwd_pass_group_size =
      fwd_pass_group_type == MockNccl::GroupType::EP ? EP_size : TP_size;
  int weight_grad_group_size =
      weight_grad_group_type == MockNccl::GroupType::DP_EP ? DP_size / EP_size
                                                           : DP_size;
  if (id != "embedding_layer"){
      pre_bubble_time += ((total_waiting_for_fwd_comm + total_forward_pass_compute + total_weight_grad_compute + total_input_grad_compute + total_waiting_for_ig_comm) / FREQ);
    }
  if(weight_grad_group_type == MockNccl::GroupType::DP_EP){
    DP_EP_comm += (total_waiting_for_wg_comm / FREQ);
  }
  else{
    DP_comm += (total_waiting_for_wg_comm / FREQ);
  }
  if(fwd_pass_group_type == MockNccl::GroupType::EP){
    Expose_EP_comm += ((total_waiting_for_fwd_comm + total_waiting_for_ig_comm) / FREQ);
  }
  else{
    Expose_TP_comm += ((total_waiting_for_fwd_comm + total_waiting_for_ig_comm) / FREQ);
  }
  total_compute += (total_forward_pass_compute / FREQ);
  total_compute += (total_weight_grad_compute / FREQ);
  total_compute += (total_input_grad_compute / FREQ);
  total_exposed += (total_waiting_for_fwd_comm / FREQ);
  total_exposed += (total_waiting_for_wg_comm / FREQ);
  total_exposed += (total_waiting_for_ig_comm / FREQ);
  layerData.layer_name = id;
  layerData.total_forward_pass_compute = total_forward_pass_compute / FREQ;
  layerData.total_weight_grad_compute = total_weight_grad_compute / FREQ;
  layerData.total_input_grad_compute = total_input_grad_compute / FREQ;
  layerData.total_waiting_for_fwd_comm = total_waiting_for_fwd_comm / FREQ;
  layerData.total_waiting_for_wg_comm = total_waiting_for_wg_comm / FREQ;
  layerData.total_waiting_for_ig_comm = total_waiting_for_ig_comm / FREQ;
  layerData.total_fwd_comm = total_fwd_comm / FREQ;
  layerData.total_weight_grad_comm = total_weight_grad_comm / FREQ;
  layerData.total_input_grad_comm = total_input_grad_comm / FREQ;
  total_fwd_time[0] +=total_forward_pass_compute / FREQ;
  total_fwd_time[1] +=total_waiting_for_fwd_comm / FREQ;
  total_fwd_time[2] +=total_fwd_comm / FREQ;
  total_wg_time[0] +=total_weight_grad_compute / FREQ;
  total_wg_time[1] +=total_waiting_for_wg_comm / FREQ;
  total_wg_time[2] +=total_weight_grad_comm / FREQ;
  total_ig_time[0] +=total_input_grad_compute / FREQ;
  total_ig_time[1] +=total_waiting_for_ig_comm / FREQ;
  total_ig_time[2] +=total_input_grad_comm / FREQ;
  int i = 0;
  for (auto& qd : queuing_delay) {
    layerData.avg_queuing_delay.push_back(std::make_pair(i, qd / FREQ));
  }
  i = 1;
  for (auto& ml : net_message_latency) {
    layerData.avg_network_message_dealy.push_back(std::make_pair(i, ml / FREQ));
  }
  if (seprate_log)
  {
    std::string data;
    std::pair<float, float> total_bw;
    std::cout << "*******************" << std::endl;
    std::cout << "Layer id: " << id << std::endl;
    std::cout << "Total collectives issued for this layer: "
              << collective_counter << std::endl;
    std::cout << "*************************  Workload stats  "
                 "************************* "
              << id << std::endl;
    if(stat_row == 0 && layer_num == 0) {
      data = "layer_name,"+run_name+",fwd compute,wg compute,ig compute,fwd exposed comm,wg exposed comm,ig exposed comm,fwd total comm,algbw,busbw,wg total comm,algbw,busbw,ig total comm,algbw,busbw,workload finished at";
      EndToEnd->write_line(data);
    }
    data = "";
    if(stat_row == 0){
      data += id;
    }
    data = data + "," + run_name;

    std::cout << "id: " << id << " ,Total cycles spent on fwd pass compute: "
              << total_forward_pass_compute << std::endl;
    data = data + "," + std::to_string(total_forward_pass_compute/FREQ);

    std::cout << "id: " << id << " ,Total cycles spent on weight grad compute: "
              << total_weight_grad_compute << std::endl;
    data = data + "," + to_string(total_weight_grad_compute/FREQ);

    std::cout << "id: " << id << " ,Total cycles spent on input grad compute: "
              << total_input_grad_compute << std::endl;
    data = data + "," + to_string(total_input_grad_compute/FREQ);

    std::cout << "id: " << id
              << " ,Total cycles spent idle waiting for fwd finish: "
              << total_waiting_for_fwd_comm << std::endl;
    data = data + "," + to_string(total_waiting_for_fwd_comm/FREQ);

    std::cout << "id: " << id
              << " ,Total cycles spent idle waiting for weight grad finish: "
              << total_waiting_for_wg_comm << std::endl;
    data = data + "," + to_string(total_waiting_for_wg_comm / FREQ);

    std::cout << "id: " << id
              << " ,Total cycles spent idle waiting for input grad finish: "
              << total_waiting_for_ig_comm << std::endl;
    data = data + "," + to_string(total_waiting_for_ig_comm / FREQ);

    std::cout << "id: " << id
              << " ,Total cycles spent on fwd pass comm: " << total_fwd_comm
              << std::endl;
 
    total_bw = compute_busbw(fwd_pass_comm_type, fwd_pass_group_size, fwd_pass_comm_size, total_fwd_comm);
    data = data + "," + to_string(total_fwd_comm / FREQ);
    data = data + "," + to_string(total_bw.first);
    data = data + "," + to_string(total_bw.second);

    std::cout << "id: " << id << " ,Total cycles spent on weight grad comm: "
              << total_weight_grad_comm << std::endl;

    total_bw = compute_busbw(weight_grad_comm_type,weight_grad_group_size,weight_grad_comm_size,total_weight_grad_comm);
    data = data + "," + to_string(total_weight_grad_comm / FREQ);
    data = data + "," + to_string(total_bw.first);
    data = data + "," + to_string(total_bw.second);

    std::cout << "id: " << id << " ,Total cycles spent on input grad comm: "
              << total_input_grad_comm << std::endl;
    
    total_bw = compute_busbw(input_grad_comm_type,input_grad_group_size,input_grad_comm_size,total_input_grad_comm);
    data = data + "," + to_string(total_input_grad_comm / FREQ);
    data = data + "," + to_string(total_bw.first);
    data = data + "," + to_string(total_bw.second);
    data = data + "," + to_string(((double)Sys::boostedTick()) / FREQ);
    EndToEnd->write_line(data);

    data = "layer_name,"+run_name+",fwd compute,wg compute,ig compute,fwd exposed comm,wg exposed comm,ig exposed comm,fwd total comm,algbw,busbw,wg total comm,algbw,busbw,ig total comm,algbw,busbw,workload finished at";
    if (layer_num == workload->SIZE - 1) {
      total_exposed = (((double)Sys::boostedTick()) / FREQ) - total_compute;
      data = "SUM," + run_name + "," + to_string(total_fwd_time[0]) + "," + to_string(total_wg_time[0]) + "," + to_string(total_ig_time[0]) + "," + to_string(total_fwd_time[1]) + "," + to_string(total_wg_time[1]) + "," + to_string(total_ig_time[1]) + "," + to_string(total_fwd_time[2]) + ",NONE,NONE," + to_string(total_wg_time[2]) + ",NONE,NONE," + to_string(total_ig_time[2]) + ",NONE,NONE";
      EndToEnd->write_line(data);
      double total_time = total_compute + total_exposed;
      data = "total exposed comm," + to_string(total_exposed) + ",total comp," + to_string(total_compute) + ",total time," + to_string(total_time);
      EndToEnd->write_line(data);

      Tick Expose_PP_time = (2 * vpp * GA * (pp_commsize * GBps / (param->net_work_param.pp) * 1e9) / FREQ );
      Expose_PP_time *= (1-param->net_work_param.pp_overlap_ratio) ;
      //pp bubble time
      pre_bubble_time *= static_cast<double>(PP_size - 1) / (GA * vpp);
      auto format_value = [](double value) {
        std::ostringstream stream;
       if (std::isfinite(value)) {
           stream << std::fixed << std::setprecision(0) << value;
       } else {
           stream << "NaN or Inf";
       }
        return stream.str();
      };
      auto format_percentage = [&](double value) {
        double percentage = (value / total_time) * 100;
        std::ostringstream stream;
        stream << std::fixed << std::setprecision(2) << percentage;
        return stream.str() + "%";
        };
      std::string keys = "File name, Expose DP comm, Expose DP_EP comm, Expose TP comm, Expose_EP_comm, Expose_PP_comm, bubble time, total comp, total exposed comm, Total time";
      std::string values = run_name + ", " +
                          format_value(DP_comm) + " (" + format_percentage(DP_comm) + "), " +
                          format_value(DP_EP_comm) + " (" + format_percentage(DP_EP_comm) + "), " +
                          format_value(Expose_TP_comm) + " (" + format_percentage(Expose_TP_comm) + "), " +
                          format_value(Expose_EP_comm) + " (" + format_percentage(Expose_EP_comm) + "), " +
                          format_value(Expose_PP_time) + " (" + format_percentage(Expose_PP_time) + "), " +
                          format_value(pre_bubble_time) + " (" + format_percentage(pre_bubble_time) + "), " +
                          format_value(total_compute) + " (" + format_percentage(total_compute) + "), " +
                          format_value(total_exposed) + " (" + format_percentage(total_exposed) + "), " +
                          format_value(total_time);
      data = keys + "\n" + values;
      EndToEnd->write_res(data);
    }
  }
  return layerData;
}