LayerData Layer::report()

in astra-sim-alibabacloud/astra-sim/workload/Layer.cc [546:831]
260 lines of code
20 McCabe index (conditional complexity)

LayerData Layer::report(
    std::string run_name,
    int layer_num,
    int total_rows,
    int stat_row,
    CSVWriter* detailed,
    CSVWriter* EndToEnd,
    double& total_compute,
    double& total_exposed,
    double& pre_bubble_time,
    double& DP_comm,
    double& DP_EP_comm,
    double& Expose_TP_comm,
    double& Expose_EP_comm,
    bool seprate_log) {
  LayerData layerData;
  take_stream_stats_average();
  int TP_size = workload->model_parallel_npu_group;
  int PP_size = workload->pipeline_model_parallelism;
  int vpp = workload->vpp;
  uint32_t pp_commsize = workload->pp_commsize;
  int DP_size = generator->all_gpus[0] / (TP_size * PP_size);
  int GA = workload->GA;
  int EP_size = workload->expert_parallel_npu_group;
  int fwd_pass_group_size ;
  int weight_grad_group_size ;
  int input_grad_group_size ;
  UserParam* param = UserParam::getInstance();
  input_grad_group_size =
        input_grad_group_type == MockNccl::GroupType::EP ? EP_size : TP_size;
    fwd_pass_group_size =
        fwd_pass_group_type == MockNccl::GroupType::EP ? EP_size : TP_size;
    weight_grad_group_size =
        weight_grad_group_type == MockNccl::GroupType::DP_EP ? DP_size / EP_size
                                                             : DP_size;
  if(param->mode == ModeType::ANALYTICAL){
    
    total_fwd_comm = compute_time(fwd_pass_comm_type,TP_size,fwd_pass_group_size,fwd_pass_comm_size,fwd_pass_group_type,generator->all_gpus[0],EP_size);
    total_weight_grad_comm = compute_time(weight_grad_comm_type,TP_size,weight_grad_group_size,weight_grad_comm_size,weight_grad_group_type,generator->all_gpus[0],EP_size);
    total_input_grad_comm = compute_time(input_grad_comm_type,TP_size,input_grad_group_size,input_grad_comm_size,input_grad_group_type,generator->all_gpus[0],EP_size);
    total_waiting_for_fwd_comm = total_fwd_comm; //tp forward
    total_waiting_for_ig_comm = total_input_grad_comm;  //tp backward
    total_waiting_for_wg_comm = total_weight_grad_comm;
    

  }
  if (id != "embedding_layer"){
      pre_bubble_time += ((total_waiting_for_fwd_comm + total_forward_pass_compute + total_weight_grad_compute + total_input_grad_compute + total_waiting_for_ig_comm) / FREQ);
    }
  if(weight_grad_group_type == MockNccl::GroupType::DP_EP){
    total_waiting_for_wg_comm *= (1-param->net_work_param.dp_overlap_ratio);
    DP_EP_comm += (total_waiting_for_wg_comm / FREQ);
  }
  else{
    total_waiting_for_wg_comm *= (1-param->net_work_param.dp_overlap_ratio);
    DP_comm += (total_waiting_for_wg_comm / FREQ);
  }
  if(fwd_pass_group_type == MockNccl::GroupType::EP){
    total_waiting_for_fwd_comm *= (1-param->net_work_param.ep_overlap_ratio);
    total_waiting_for_ig_comm *= (1-param->net_work_param.ep_overlap_ratio);
    Expose_EP_comm += ((total_waiting_for_fwd_comm + total_waiting_for_ig_comm) / FREQ);
  }
  else{
    total_waiting_for_fwd_comm *= (1-param->net_work_param.tp_overlap_ratio);
    total_waiting_for_ig_comm *= (1-param->net_work_param.tp_overlap_ratio);
    Expose_TP_comm += ((total_waiting_for_fwd_comm + total_waiting_for_ig_comm) / FREQ);
  }

  total_compute += (total_forward_pass_compute / FREQ);
  total_compute += (total_weight_grad_compute / FREQ);
  total_compute += (total_input_grad_compute / FREQ);
  total_exposed += (total_waiting_for_fwd_comm / FREQ);
  total_exposed += (total_waiting_for_wg_comm / FREQ);
  total_exposed += (total_waiting_for_ig_comm / FREQ);
  layerData.layer_name = id;
  layerData.total_forward_pass_compute = total_forward_pass_compute / FREQ;
  layerData.total_weight_grad_compute = total_weight_grad_compute / FREQ;
  layerData.total_input_grad_compute = total_input_grad_compute / FREQ;
  layerData.total_waiting_for_fwd_comm = total_waiting_for_fwd_comm / FREQ;
  layerData.total_waiting_for_wg_comm = total_waiting_for_wg_comm / FREQ;
  layerData.total_waiting_for_ig_comm = total_waiting_for_ig_comm / FREQ;
  layerData.total_fwd_comm = total_fwd_comm / FREQ;
  layerData.total_weight_grad_comm = total_weight_grad_comm / FREQ;
  layerData.total_input_grad_comm = total_input_grad_comm / FREQ;
  int i = 0;
  for (auto& qd : queuing_delay) {
    layerData.avg_queuing_delay.push_back(std::make_pair(i, qd / FREQ));
  }
  i = 1;
  for (auto& ml : net_message_latency) {
    layerData.avg_network_message_dealy.push_back(std::make_pair(i, ml / FREQ));
   }
  #ifdef NS3_MPI
  if (seprate_log)
  #else
  if (seprate_log) 
  #endif
  {
    std::string data;
    std::pair<float, float> total_bw;
    std::cout << "*******************" << std::endl;
    std::cout << "Layer id: " << id << std::endl;
    std::cout << "Total collectives issued for this layer: " << collective_counter << std::endl;
    std::cout << "*************************  Workload stats  ************************* " << id << std::endl;

    if (stat_row == 0 && layer_num == 0) {
        data = "layer_name," + run_name + ",fwd compute,wg compute,ig compute,fwd exposed comm,wg exposed comm,ig exposed comm,fwd total comm,algbw,busbw,wg total comm,algbw,busbw,ig total comm,algbw,busbw";
        EndToEnd->write_line(data);
    }
    data = "";
    if (stat_row == 0) {
        data += id;
    }
    data = data + "," + run_name;

    auto format_value = [](double value) {
        std::ostringstream stream;
       if (std::isfinite(value)) {
           stream << std::fixed << std::setprecision(0) << value;
       } else {
           stream << "NaN or Inf";
       }
        return stream.str();
    };
    auto format_value_bs = [](double value) {
        std::ostringstream stream;
        stream << std::fixed << std::setprecision(2) << value;
        return stream.str();
    };

    std::cout << "id: " << id << " ,Total cycles spent on fwd pass compute: "
              << format_value(total_forward_pass_compute / FREQ ) << std::endl;
    data = data + "," + format_value(total_forward_pass_compute / FREQ );

    std::cout << "id: " << id << " ,Total cycles spent on weight grad compute: "
              << format_value(total_weight_grad_compute / FREQ ) << std::endl;
    data = data + "," + format_value(total_weight_grad_compute / FREQ );

    std::cout << "id: " << id << " ,Total cycles spent on input grad compute: "
              << format_value(total_input_grad_compute / FREQ ) << std::endl;
    data = data + "," + format_value(total_input_grad_compute / FREQ );

    std::cout << "id: " << id
              << " ,Total cycles spent idle waiting for fwd finish: "
              << format_value(total_waiting_for_fwd_comm / FREQ ) << std::endl;
    data = data + "," + format_value(total_waiting_for_fwd_comm / FREQ );

    std::cout << "id: " << id
              << " ,Total cycles spent idle waiting for weight grad finish: "
              << format_value(total_waiting_for_wg_comm / FREQ ) << std::endl;
    data = data + "," + format_value(total_waiting_for_wg_comm / FREQ );

    std::cout << "id: " << id
              << " ,Total cycles spent idle waiting for input grad finish: "
              << format_value(total_waiting_for_ig_comm / FREQ ) << std::endl;
    data = data + "," + format_value(total_waiting_for_ig_comm / FREQ );

    std::cout << "id: " << id
              << " ,Total cycles spent on fwd pass comm: " << format_value(total_fwd_comm / FREQ ) << std::endl;
    total_bw = compute_busbw(fwd_pass_comm_type, fwd_pass_group_size, fwd_pass_comm_size, total_fwd_comm);
    data = data + "," + format_value(total_fwd_comm / FREQ );
    data = data + "," + format_value_bs(total_bw.first);
    data = data + "," + format_value_bs(total_bw.second);

    std::cout << "id: " << id << " ,Total cycles spent on weight grad comm: "
              << format_value(total_weight_grad_comm / FREQ ) << std::endl;
    total_bw = compute_busbw(weight_grad_comm_type, weight_grad_group_size, weight_grad_comm_size, total_weight_grad_comm);
    data = data + "," + format_value(total_weight_grad_comm / FREQ );
    data = data + "," + format_value_bs(total_bw.first);
    data = data + "," + format_value_bs(total_bw.second);

    std::cout << "id: " << id << " ,Total cycles spent on input grad comm: "
              << format_value(total_input_grad_comm / FREQ ) << std::endl;
    total_bw = compute_busbw(input_grad_comm_type, input_grad_group_size, input_grad_comm_size, total_input_grad_comm);
    data = data + "," + format_value(total_input_grad_comm / FREQ );
    data = data + "," + format_value_bs(total_bw.first);
    data = data + "," + format_value_bs(total_bw.second);

    // data = data + "," + format_value(((double)Sys::boostedTick()) / FREQ );
    EndToEnd->write_line(data);

    if (layer_num == workload->SIZE - 1) {
        if (param->mode != ModeType::ANALYTICAL) {
            total_exposed = (((double)Sys::boostedTick()) / FREQ ) - total_compute;
        }
        //pp commtime
        Tick Expose_PP_time = (2 * vpp * GA * (pp_commsize * GBps / (param->net_work_param.pp) * 1e9) / FREQ );
        Expose_PP_time *= (1-param->net_work_param.pp_overlap_ratio) ;
        //pp bubble time
        pre_bubble_time *= static_cast<double>(PP_size - 1) / (GA * vpp);
        //total time
        double total_time = total_compute + total_exposed + pre_bubble_time + Expose_PP_time;
        auto format_percentage = [&](double value) {
        double percentage = (value / total_time) * 100;
        std::ostringstream stream;
        stream << std::fixed << std::setprecision(2) << percentage;
        return stream.str() + "%";
        };
      std::string file_name = param->res;
      size_t last_slash_pos = param->res.find_last_of('/');
      std::string result;
      if (last_slash_pos != std::string::npos) {
          file_name = param->res.substr(last_slash_pos + 1); // 取 '/' 后面的部分
      }
      std::string keys = "File name, Expose DP comm, Expose DP_EP comm, Expose TP comm, Expose_EP_comm, Expose_PP_comm, bubble time, total comp, total exposed comm, Total time";
      std::string values = file_name + ", " +
                          format_value(DP_comm) + " (" + format_percentage(DP_comm) + "), " +
                          format_value(DP_EP_comm) + " (" + format_percentage(DP_EP_comm) + "), " +
                          format_value(Expose_TP_comm) + " (" + format_percentage(Expose_TP_comm) + "), " +
                          format_value(Expose_EP_comm) + " (" + format_percentage(Expose_EP_comm) + "), " +
                          format_value(Expose_PP_time) + " (" + format_percentage(Expose_PP_time) + "), " +
                          format_value(pre_bubble_time) + " (" + format_percentage(pre_bubble_time) + "), " +
                          format_value(total_compute) + " (" + format_percentage(total_compute) + "), " +
                          format_value(total_exposed) + " (" + format_percentage(total_exposed) + "), " +
                          format_value(total_time);

      data = keys + "\n" + values;
      EndToEnd->write_res(data);
    if(param->net_work_param.visual){
      std::string chart_path = EndToEnd->path;
      std::ofstream htmlFile(chart_path + "chart.html");
      std::string file_name = getFileName(chart_path); 
      htmlFile << "<!DOCTYPE html>\n";
      htmlFile << "<html>\n<head>\n";
      htmlFile << "<script src=\"https://cdn.jsdelivr.net/npm/chart.js\"></script>\n";
      htmlFile << "<style>\n";
      htmlFile << "body { display: flex; flex-direction: column; justify-content: center; align-items: center; height: 50vh; margin: 0; padding-top: 10%; }\n";
      htmlFile << "canvas { width: 50%; max-width: 400px; height: auto; }\n"; 
      htmlFile << "h2 { margin: 5px 0; }\n"; 
      htmlFile << "</style>\n";
      htmlFile << "</head>\n<body>\n";
      htmlFile << "<canvas id=\"myPieChart\"></canvas>\n";
      htmlFile << "<h2>Total Time: " << to_string(total_time) << " ns</h2>\n"; 
      htmlFile << "<h2>model: " << file_name << " </h2>\n"; 
      htmlFile << "<script>\n";
      htmlFile << "var ctx = document.getElementById('myPieChart').getContext('2d');\n";
      htmlFile << "var myPieChart = new Chart(ctx, {\n";
      htmlFile << "    type: 'pie',\n";
      htmlFile << "    data: {\n";
      htmlFile << "        labels: ['Expose DP comm', 'Expose DP_EP comm','Expose TP comm', 'Expose_EP_comm','Total compute', 'PP Bubble time', 'Expose PP comm'],\n";
      htmlFile << "        datasets: [{\n";
      htmlFile << "            data: [" 
              << DP_comm << ", " 
              << DP_EP_comm << ", "
              << Expose_TP_comm << ", " 
              << Expose_EP_comm << ", " 
              << total_compute << ", " 
              << pre_bubble_time << ", " 
              << Expose_PP_time << "],\n";
      htmlFile << "            backgroundColor: ['#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0', '#9966FF', '#FF9F40','#FF5733'],\n";
      htmlFile << "        }]\n";
      htmlFile << "    },\n";
      htmlFile << "    options: {\n";
      htmlFile << "        responsive: true,\n";
      htmlFile << "        maintainAspectRatio: true,\n";
      htmlFile << "        plugins: {\n";
      htmlFile << "            tooltip: {\n";
      htmlFile << "                callbacks: {\n";
      htmlFile << "                    label: function(context) {\n";
      htmlFile << "                        var label = context.label || '';\n";
      htmlFile << "                        if (label) {\n";
      htmlFile << "                            label += ': ';\n";
      htmlFile << "                        }\n";
      htmlFile << "                        if (context.parsed !== null) {\n";
      htmlFile << "                            label += context.parsed + ' ns';\n";
      htmlFile << "                        }\n";
      htmlFile << "                        return label;\n";
      htmlFile << "                    }\n";
      htmlFile << "                }\n";
      htmlFile << "            }\n";
      htmlFile << "        }\n";
      htmlFile << "    }\n";
      htmlFile << "});\n";
      htmlFile << "</script>\n";
      htmlFile << "</body>\n</html>";

      htmlFile.close();
      std::cout << "HTML file created" << std::endl;
    }

      
    }
  } 

  return layerData;
}