in astra-sim-alibabacloud/astra-sim/network_frontend/phynet/SimAiMain.cc [112:174]
int main(int argc,char *argv[]){
BootStrapNet(argc,argv);
pid_t pid = getpid();
MockNcclLog::set_log_name("SimAi_"+to_string(local_rank)+".log");
MockNcclLog* NcclLog = MockNcclLog::getInstance();
NcclLog->writeLog(NcclLogLevel::DEBUG," Local rank %d PID %d ",local_rank,pid);
struct user_param user_param;
if(user_param_prase(argc,argv,&user_param)){
return -1;
}
#ifdef PHY_RDMA
flow_rdma = FlowPhyRdma(user_param.gid_index);
flow_rdma.ibv_init();
#endif
set_simai_network_callback();
std::vector<int> physical_dims = {user_param.gpus};
std::vector<int>NVswitchs;
std::vector<int> queues_per_dim={1};
std::map<int, int> node2nvswitch;
for(int i = 0; i < user_param.gpus; ++ i) {
node2nvswitch[i] = user_param.gpus + i / user_param.gpus_per_server;
}
for(int i = user_param.gpus; i < user_param.gpus + user_param.nvswitch_num; ++ i){
node2nvswitch[i] = i;
NVswitchs.push_back(i);
}
physical_dims[0] += user_param.nvswitch_num;
SimAiPhyNetWork* phy_network = new SimAiPhyNetWork(local_rank);
global_sys = new AstraSim::Sys(
phy_network,
nullptr,
local_rank,
0,
1,
physical_dims,
queues_per_dim,
"",
user_param.workload,
user_param.comm_scale,
1,
1,
1,
0,
RESULT_PATH,
"phynet_test",
true,
false,
user_param.gpu_type,
{user_param.gpus},
NVswitchs,
user_param.gpus_per_server
);
global_sys->nvswitch_id = node2nvswitch[local_rank];
global_sys->num_gpus = user_param.gpus;
global_sys->workload->fire();
PhyNetSim::Run();
PhyNetSim::Stop();
notify_all_thread_finished();
PhyNetSim::Destory();
MPI_Finalize();
return 0;
};