in astra-sim-alibabacloud/astra-sim/network_frontend/ns3/common.h [694:1034]
void SetupNetwork(void (*qp_finish)(FILE *, Ptr<RdmaQueuePair>),void (*send_finish)(FILE *, Ptr<RdmaQueuePair>)) {
topof.open(topology_file.c_str());
flowf.open(flow_file.c_str());
tracef.open(trace_file.c_str());
string gpu_type_str;
topof >> node_num >> gpus_per_server >> nvswitch_num >> switch_num >>
link_num >> gpu_type_str;
flowf >> flow_num;
tracef >> trace_num;
if(gpu_type_str == "A100"){
gpu_type = GPUType::A100;
} else if(gpu_type_str == "A800"){
gpu_type = GPUType::A800;
} else if(gpu_type_str == "H100"){
gpu_type = GPUType::H100;
} else if(gpu_type_str == "H800"){
gpu_type = GPUType::H800;
} else{
gpu_type = GPUType::NONE;
}
std::vector<uint32_t> node_type(node_num, 0);
for (uint32_t i = 0; i < nvswitch_num; i++) {
uint32_t sid;
topof >> sid;
node_type[sid] = 2;
}
for (uint32_t i = 0; i < switch_num; i++)
{
uint32_t sid;
topof >> sid;
node_type[sid] = 1;
}
for (uint32_t i = 0; i < node_num; i++){
if (node_type[i] == 0)
n.Add(CreateObject<Node>());
else if(node_type[i] == 1){
Ptr<SwitchNode> sw = CreateObject<SwitchNode>();
n.Add(sw);
sw->SetAttribute("EcnEnabled", BooleanValue(enable_qcn));
}else if(node_type[i] == 2){
Ptr<NVSwitchNode> sw = CreateObject<NVSwitchNode>();
n.Add(sw);
}
}
NS_LOG_INFO("Create nodes.");
InternetStackHelper internet;
internet.Install(n);
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() == 0) {
serverAddress.resize(i + 1);
serverAddress[i] = node_id_to_ip(i);
} else if(n.Get(i)->GetNodeType() == 2) {
serverAddress.resize(i + 1);
serverAddress[i] = node_id_to_ip(i);
}
}
NS_LOG_INFO("Create channels.");
Ptr<RateErrorModel> rem = CreateObject<RateErrorModel>();
Ptr<UniformRandomVariable> uv = CreateObject<UniformRandomVariable>();
rem->SetRandomVariable(uv);
uv->SetStream(50);
rem->SetAttribute("ErrorRate", DoubleValue(error_rate_per_link));
rem->SetAttribute("ErrorUnit", StringValue("ERROR_UNIT_PACKET"));
FILE *pfc_file = fopen(pfc_output_file.c_str(), "w");
QbbHelper qbb;
Ipv4AddressHelper ipv4;
for (uint32_t i = 0; i < link_num; i++) {
uint32_t src, dst;
std::string data_rate, link_delay;
double error_rate;
topof >> src >> dst >> data_rate >> link_delay >> error_rate;
Ptr<Node> snode = n.Get(src), dnode = n.Get(dst);
qbb.SetDeviceAttribute("DataRate", StringValue(data_rate));
qbb.SetChannelAttribute("Delay", StringValue(link_delay));
if (error_rate > 0) {
Ptr<RateErrorModel> rem = CreateObject<RateErrorModel>();
Ptr<UniformRandomVariable> uv = CreateObject<UniformRandomVariable>();
rem->SetRandomVariable(uv);
uv->SetStream(50);
rem->SetAttribute("ErrorRate", DoubleValue(error_rate));
rem->SetAttribute("ErrorUnit", StringValue("ERROR_UNIT_PACKET"));
qbb.SetDeviceAttribute("ReceiveErrorModel", PointerValue(rem));
} else {
qbb.SetDeviceAttribute("ReceiveErrorModel", PointerValue(rem));
}
fflush(stdout);
NetDeviceContainer d = qbb.Install(snode, dnode);
if (snode->GetNodeType() == 0 || snode->GetNodeType() == 2) {
Ptr<Ipv4> ipv4 = snode->GetObject<Ipv4>();
ipv4->AddInterface(d.Get(0));
ipv4->AddAddress(
1, Ipv4InterfaceAddress(serverAddress[src], Ipv4Mask(0xff000000)));
}
if (dnode->GetNodeType() == 0 || dnode->GetNodeType() == 2) {
Ptr<Ipv4> ipv4 = dnode->GetObject<Ipv4>();
ipv4->AddInterface(d.Get(1));
ipv4->AddAddress(
1, Ipv4InterfaceAddress(serverAddress[dst], Ipv4Mask(0xff000000)));
}
nbr2if[snode][dnode].idx =
DynamicCast<QbbNetDevice>(d.Get(0))->GetIfIndex();
nbr2if[snode][dnode].up = true;
nbr2if[snode][dnode].delay =
DynamicCast<QbbChannel>(
DynamicCast<QbbNetDevice>(d.Get(0))->GetChannel())
->GetDelay()
.GetTimeStep();
nbr2if[snode][dnode].bw =
DynamicCast<QbbNetDevice>(d.Get(0))->GetDataRate().GetBitRate();
nbr2if[dnode][snode].idx =
DynamicCast<QbbNetDevice>(d.Get(1))->GetIfIndex();
nbr2if[dnode][snode].up = true;
nbr2if[dnode][snode].delay =
DynamicCast<QbbChannel>(
DynamicCast<QbbNetDevice>(d.Get(1))->GetChannel())
->GetDelay()
.GetTimeStep();
nbr2if[dnode][snode].bw =
DynamicCast<QbbNetDevice>(d.Get(1))->GetDataRate().GetBitRate();
char ipstring[16];
sprintf(ipstring, "10.%d.%d.0", i / 254 + 1, i % 254 + 1);
ipv4.SetBase(ipstring, "255.255.255.0");
ipv4.Assign(d);
DynamicCast<QbbNetDevice>(d.Get(0))->TraceConnectWithoutContext(
"QbbPfc", MakeBoundCallback(&get_pfc, pfc_file,
DynamicCast<QbbNetDevice>(d.Get(0))));
DynamicCast<QbbNetDevice>(d.Get(1))->TraceConnectWithoutContext(
"QbbPfc", MakeBoundCallback(&get_pfc, pfc_file,
DynamicCast<QbbNetDevice>(d.Get(1))));
}
nic_rate = get_nic_rate(n);
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() == 1) {
Ptr<SwitchNode> sw = DynamicCast<SwitchNode>(n.Get(i));
uint32_t shift = 3;
for (uint32_t j = 1; j < sw->GetNDevices(); j++) {
Ptr<QbbNetDevice> dev = DynamicCast<QbbNetDevice>(sw->GetDevice(j));
uint64_t rate = dev->GetDataRate().GetBitRate();
NS_ASSERT_MSG(rate2kmin.find(rate) != rate2kmin.end(),
"must set kmin for each link speed");
NS_ASSERT_MSG(rate2kmax.find(rate) != rate2kmax.end(),
"must set kmax for each link speed");
NS_ASSERT_MSG(rate2pmax.find(rate) != rate2pmax.end(),
"must set pmax for each link speed");
sw->m_mmu->ConfigEcn(j, rate2kmin[rate], rate2kmax[rate],
rate2pmax[rate]);
uint64_t delay = DynamicCast<QbbChannel>(dev->GetChannel())
->GetDelay()
.GetTimeStep();
uint32_t headroom = rate * delay / 8 / 1000000000 * 3;
sw->m_mmu->ConfigHdrm(j, headroom);
sw->m_mmu->pfc_a_shift[j] = shift;
while (rate > nic_rate && sw->m_mmu->pfc_a_shift[j] > 0) {
sw->m_mmu->pfc_a_shift[j]--;
rate /= 2;
}
}
sw->m_mmu->ConfigNPort(sw->GetNDevices() - 1);
sw->m_mmu->ConfigBufferSize(buffer_size * 1024 * 1024);
sw->m_mmu->node_id = sw->GetId();
} else if(n.Get(i)->GetNodeType() == 2){
Ptr<NVSwitchNode> sw = DynamicCast<NVSwitchNode>(n.Get(i));
uint32_t shift = 3;
for (uint32_t j = 1; j < sw->GetNDevices(); j++) {
Ptr<QbbNetDevice> dev = DynamicCast<QbbNetDevice>(sw->GetDevice(j));
uint64_t rate = dev->GetDataRate().GetBitRate();
uint64_t delay = DynamicCast<QbbChannel>(dev->GetChannel())
->GetDelay()
.GetTimeStep();
uint32_t headroom = rate * delay / 8 / 1000000000 * 3;
sw->m_mmu->ConfigHdrm(j, headroom);
sw->m_mmu->pfc_a_shift[j] = shift;
while (rate > nic_rate && sw->m_mmu->pfc_a_shift[j] > 0) {
sw->m_mmu->pfc_a_shift[j]--;
rate /= 2;
}
}
sw->m_mmu->ConfigNPort(sw->GetNDevices()-1);
sw->m_mmu->ConfigBufferSize(buffer_size* 1024 * 1024);
sw->m_mmu->node_id = sw->GetId();
}
}
#if ENABLE_QP
FILE *fct_output = fopen(fct_output_file.c_str(), "w");
FILE *send_output = fopen(send_output_file.c_str(), "w");
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() == 0 || n.Get(i)->GetNodeType() == 2) {
Ptr<RdmaHw> rdmaHw = CreateObject<RdmaHw>();
rdmaHw->SetAttribute("ClampTargetRate", BooleanValue(clamp_target_rate));
rdmaHw->SetAttribute("AlphaResumInterval",
DoubleValue(alpha_resume_interval));
rdmaHw->SetAttribute("RPTimer", DoubleValue(rp_timer));
rdmaHw->SetAttribute("FastRecoveryTimes",
UintegerValue(fast_recovery_times));
rdmaHw->SetAttribute("EwmaGain", DoubleValue(ewma_gain));
rdmaHw->SetAttribute("RateAI", DataRateValue(DataRate(rate_ai)));
rdmaHw->SetAttribute("RateHAI", DataRateValue(DataRate(rate_hai)));
rdmaHw->SetAttribute("L2BackToZero", BooleanValue(l2_back_to_zero));
rdmaHw->SetAttribute("L2ChunkSize", UintegerValue(l2_chunk_size));
rdmaHw->SetAttribute("L2AckInterval", UintegerValue(l2_ack_interval));
rdmaHw->SetAttribute("CcMode", UintegerValue(cc_mode));
rdmaHw->SetAttribute("RateDecreaseInterval",
DoubleValue(rate_decrease_interval));
rdmaHw->SetAttribute("MinRate", DataRateValue(DataRate(min_rate)));
rdmaHw->SetAttribute("Mtu", UintegerValue(packet_payload_size));
rdmaHw->SetAttribute("MiThresh", UintegerValue(mi_thresh));
rdmaHw->SetAttribute("VarWin", BooleanValue(var_win));
rdmaHw->SetAttribute("FastReact", BooleanValue(fast_react));
rdmaHw->SetAttribute("MultiRate", BooleanValue(multi_rate));
rdmaHw->SetAttribute("SampleFeedback", BooleanValue(sample_feedback));
rdmaHw->SetAttribute("TargetUtil", DoubleValue(u_target));
rdmaHw->SetAttribute("RateBound", BooleanValue(rate_bound));
rdmaHw->SetAttribute("DctcpRateAI",
DataRateValue(DataRate(dctcp_rate_ai)));
rdmaHw->SetAttribute("GPUsPerServer", UintegerValue(gpus_per_server));
rdmaHw->SetPintSmplThresh(pint_prob);
rdmaHw->SetAttribute("TotalPauseTimes",
UintegerValue(nic_total_pause_time));
Ptr<RdmaDriver> rdma = CreateObject<RdmaDriver>();
Ptr<Node> node = n.Get(i);
rdma->SetNode(node);
rdma->SetRdmaHw(rdmaHw);
node->AggregateObject(rdma);
rdma->Init();
rdma->TraceConnectWithoutContext(
"QpComplete", MakeBoundCallback(qp_finish, fct_output));
rdma->TraceConnectWithoutContext("SendComplete",MakeBoundCallback(send_finish,send_output));
}
}
#endif
if (ack_high_prio)
RdmaEgressQueue::ack_q_idx = 0;
else
RdmaEgressQueue::ack_q_idx = 3;
CalculateRoutes(n);
SetRoutingEntries();
maxRtt = maxBdp = 0;
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() != 0)
continue;
for (uint32_t j = 0; j < node_num; j++) {
if (n.Get(j)->GetNodeType() != 0)
continue;
uint64_t delay = pairDelay[n.Get(i)][n.Get(j)];
uint64_t txDelay = pairTxDelay[n.Get(i)][n.Get(j)];
uint64_t rtt = delay * 2 + txDelay;
uint64_t bw = pairBw[i][j];
uint64_t bdp = rtt * bw / 1000000000 / 8;
pairBdp[n.Get(i)][n.Get(j)] = bdp;
pairRtt[i][j] = rtt;
if (bdp > maxBdp)
maxBdp = bdp;
if (rtt > maxRtt)
maxRtt = rtt;
}
}
printf("maxRtt=%lu maxBdp=%lu\n", maxRtt, maxBdp);
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() == 1) {
Ptr<SwitchNode> sw = DynamicCast<SwitchNode>(n.Get(i));
sw->SetAttribute("CcMode", UintegerValue(cc_mode));
sw->SetAttribute("MaxRtt", UintegerValue(maxRtt));
}
}
NodeContainer trace_nodes;
for (uint32_t i = 0; i < trace_num; i++) {
uint32_t nid;
tracef >> nid;
if (nid >= n.GetN()) {
continue;
}
trace_nodes = NodeContainer(trace_nodes, n.Get(nid));
}
FILE *trace_output = fopen(trace_output_file.c_str(), "w");
if (enable_trace)
qbb.EnableTracing(trace_output, trace_nodes);
{
SimSetting sim_setting;
for (auto i : nbr2if) {
for (auto j : i.second) {
uint16_t node = i.first->GetId();
uint8_t intf = j.second.idx;
uint64_t bps =
DynamicCast<QbbNetDevice>(i.first->GetDevice(j.second.idx))
->GetDataRate()
.GetBitRate();
sim_setting.port_speed[node][intf] = bps;
}
}
sim_setting.win = maxBdp;
sim_setting.Serialize(trace_output);
}
NS_LOG_INFO("Create Applications.");
Time interPacketInterval = Seconds(0.0000005 / 2);
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() == 0 || n.Get(i)->GetNodeType() == 2)
for (uint32_t j = 0; j < node_num; j++) {
if (n.Get(j)->GetNodeType() == 0 || n.Get(j)->GetNodeType() == 2)
portNumber[i][j] = 10000;
}
}
flow_input.idx = -1;
topof.close();
tracef.close();
if (link_down_time > 0) {
Simulator::Schedule(Seconds(2) + MicroSeconds(link_down_time),
&TakeDownLink, n, n.Get(link_down_A),
n.Get(link_down_B));
}
}