astra-sim-alibabacloud/astra-sim/network_frontend/ns3/common.h (933 lines of code) (raw):
/*
*Copyright (c) 2024, Alibaba Group;
*Licensed under the Apache License, Version 2.0 (the "License");
*you may not use this file except in compliance with the License.
*You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*Unless required by applicable law or agreed to in writing, software
*distributed under the License is distributed on an "AS IS" BASIS,
*WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*See the License for the specific language governing permissions and
*limitations under the License.
*/
#ifndef __COMMON_H__
#define __COMMON_H__
#undef PGO_TRAINING
#define PATH_TO_PGO_CONFIG "path_to_pgo_config"
#include <fstream>
#include <iostream>
#include <time.h>
#include <unordered_map>
#include "ns3/applications-module.h"
#include "ns3/core-module.h"
#include "ns3/error-model.h"
#include "ns3/global-route-manager.h"
#include "ns3/internet-module.h"
#include "ns3/ipv4-static-routing-helper.h"
#include "ns3/packet.h"
#include "ns3/point-to-point-helper.h"
#include "ns3/qbb-helper.h"
#include <ns3/rdma-client-helper.h>
#include <ns3/rdma-client.h>
#include <ns3/rdma-driver.h>
#include <ns3/rdma.h>
#include <ns3/sim-setting.h>
#include <ns3/switch-node.h>
#include <ns3/nvswitch-node.h>
#include <atomic>
using namespace ns3;
using namespace std;
NS_LOG_COMPONENT_DEFINE("GENERIC_SIMULATION");
uint32_t cc_mode = 1;
bool enable_qcn = true, use_dynamic_pfc_threshold = true;
uint32_t packet_payload_size = 1000, l2_chunk_size = 0, l2_ack_interval = 0;
double pause_time = 5, simulator_stop_time = 3.01;
std::string data_rate, link_delay, topology_file, flow_file, trace_file,
trace_output_file;
std::string fct_output_file = "fct.txt";
std::string pfc_output_file = "pfc.txt";
std::string send_output_file = "send.txt";
double alpha_resume_interval = 55, rp_timer, ewma_gain = 1 / 16;
double rate_decrease_interval = 4;
uint32_t fast_recovery_times = 5;
std::string rate_ai, rate_hai, min_rate = "100Mb/s";
std::string dctcp_rate_ai = "1000Mb/s";
bool clamp_target_rate = false, l2_back_to_zero = false;
double error_rate_per_link = 0.0;
uint32_t has_win = 1;
uint32_t global_t = 1;
uint32_t mi_thresh = 5;
bool var_win = false, fast_react = true;
bool multi_rate = true;
bool sample_feedback = false;
double pint_log_base = 1.05;
double pint_prob = 1.0;
double u_target = 0.95;
uint32_t int_multi = 1;
bool rate_bound = true;
int nic_total_pause_time =
0;
uint32_t ack_high_prio = 0;
uint64_t link_down_time = 0;
uint32_t link_down_A = 0, link_down_B = 0;
uint32_t enable_trace = 1;
uint32_t buffer_size = 16;
uint32_t node_num, switch_num, link_num, trace_num, nvswitch_num, gpus_per_server;
GPUType gpu_type;
std::vector<int>NVswitchs;
uint32_t qp_mon_interval = 100;
uint32_t bw_mon_interval = 10000;
uint32_t qlen_mon_interval = 10000;
uint64_t mon_start = 0, mon_end = 2100000000;
string qlen_mon_file;
string bw_mon_file;
string rate_mon_file;
string cnp_mon_file;
string total_flow_file = "/root/astra-sim/extern/network_backend/ns3-interface/simulation/monitor_output/";
FILE* total_flow_output = nullptr;
unordered_map<uint64_t, uint32_t> rate2kmax, rate2kmin;
unordered_map<uint64_t, double> rate2pmax;
std::ifstream topof, flowf, tracef;
NodeContainer n;
uint64_t nic_rate;
uint64_t maxRtt, maxBdp;
std::vector<Ipv4Address> serverAddress;
std::unordered_map<uint32_t, unordered_map<uint32_t, uint16_t>> portNumber;
struct Interface {
uint32_t idx;
bool up;
uint64_t delay;
uint64_t bw;
Interface() : idx(0), up(false) {}
};
map<Ptr<Node>, map<Ptr<Node>, Interface>> nbr2if;
map<Ptr<Node>, map<Ptr<Node>, vector<Ptr<Node>>>> nextHop;
map<Ptr<Node>, map<Ptr<Node>, uint64_t>> pairDelay;
map<Ptr<Node>, map<Ptr<Node>, uint64_t>> pairTxDelay;
map<uint32_t, map<uint32_t, uint64_t>> pairBw;
map<Ptr<Node>, map<Ptr<Node>, uint64_t>> pairBdp;
map<uint32_t, map<uint32_t, uint64_t>> pairRtt;
struct FlowInput {
uint32_t src, dst, pg, maxPacketCount, port, dport;
double start_time;
uint32_t idx;
};
FlowInput flow_input = {0};
uint32_t flow_num;
Ipv4Address node_id_to_ip(uint32_t id) {
return Ipv4Address(0x0b000001 + ((id / 256) * 0x00010000) +
((id % 256) * 0x00000100));
}
uint32_t ip_to_node_id(Ipv4Address ip) { return (ip.Get() >> 8) & 0xffff; }
void get_pfc(FILE *fout, Ptr<QbbNetDevice> dev, uint32_t type) {
fprintf(fout, "%lu %u %u %u %u\n", Simulator::Now().GetTimeStep(),
dev->GetNode()->GetId(), dev->GetNode()->GetNodeType(),
dev->GetIfIndex(), type);
}
struct QlenDistribution {
vector<uint32_t>
cnt;
void add(uint32_t qlen) {
uint32_t kb = qlen / 1000;
if (cnt.size() < kb + 1)
cnt.resize(kb + 1);
cnt[kb]++;
}
};
void monitor_qlen(FILE* qlen_output, NodeContainer *n){
for (uint32_t i = 0; i < n->GetN(); i++){
if(n->Get(i)->GetNodeType() == 1){
Ptr<SwitchNode> sw = DynamicCast<SwitchNode>(n->Get(i));
sw->PrintSwitchQlen(qlen_output);
}else if(n->Get(i)->GetNodeType() == 2){
Ptr<NVSwitchNode> sw = DynamicCast<NVSwitchNode>(n->Get(i));
sw->PrintSwitchQlen(qlen_output);
}
}
Simulator::Schedule(MicroSeconds(qlen_mon_interval), &monitor_qlen, qlen_output, n);
}
void monitor_bw(FILE* bw_output, NodeContainer *n){
for (uint32_t i = 0; i < n->GetN(); i++){
if(n->Get(i)->GetNodeType() == 1){
Ptr<SwitchNode> sw = DynamicCast<SwitchNode>(n->Get(i));
sw->PrintSwitchBw(bw_output, bw_mon_interval);
}else if(n->Get(i)->GetNodeType() == 2){
Ptr<NVSwitchNode> sw = DynamicCast<NVSwitchNode>(n->Get(i));
sw->PrintSwitchBw(bw_output, bw_mon_interval);
}else{
Ptr<Node> host = n->Get(i);
host->GetObject<RdmaDriver>()->m_rdma->PrintHostBW(bw_output, bw_mon_interval);
}
}
Simulator::Schedule(MicroSeconds(bw_mon_interval), &monitor_bw, bw_output, n);
}
void monitor_qp_rate(FILE* rate_output, NodeContainer *n){
for(uint32_t i = 0; i < n->GetN(); i++){
if(n->Get(i)->GetNodeType() == 0){
Ptr<Node> host = n->Get(i);
host->GetObject<RdmaDriver>()->m_rdma->PrintQPRate(rate_output);
}
}
Simulator::Schedule(MicroSeconds(qp_mon_interval), &monitor_qp_rate, rate_output, n);
}
void monitor_qp_cnp_number(FILE* cnp_output, NodeContainer *n){
for(uint32_t i = 0; i < n->GetN(); i++){
if(n->Get(i)->GetNodeType() == 0){
Ptr<Node> host = n->Get(i);
host->GetObject<RdmaDriver>()->m_rdma->PrintQPCnpNumber(cnp_output);
}
}
Simulator::Schedule(MicroSeconds(qp_mon_interval), &monitor_qp_cnp_number, cnp_output, n);
}
void schedule_monitor(){
FILE* qlen_output = fopen(qlen_mon_file.c_str(), "w");
assert(qlen_output != nullptr);
fprintf(qlen_output, "%s, %s, %s, %s, %s, %s\n", "time", "sw_id", "port_id", "q_id", "q_len", "port_len");
fflush(qlen_output);
Simulator::Schedule(MicroSeconds(mon_start), &monitor_qlen, qlen_output, &n);
FILE* bw_output = fopen(bw_mon_file.c_str(), "w");
assert(bw_output != nullptr);
fprintf(bw_output, "%s, %s, %s, %s\n", "time", "node_id", "port_id", "bandwidth");
fflush(bw_output);
Simulator::Schedule(MicroSeconds(mon_start), &monitor_bw, bw_output, &n);
FILE* rate_output = fopen(rate_mon_file.c_str(), "w");
assert(rate_output != nullptr);
fprintf(rate_output, "%s, %s, %s, %s, %s, %s, %s\n", "time", "src", "dst", "sport", "dport", "size", "curr_rate");
fflush(rate_output);
Simulator::Schedule(MicroSeconds(mon_start), &monitor_qp_rate, rate_output, &n);
FILE* cnp_output = fopen(cnp_mon_file.c_str(), "w");
assert(cnp_output != nullptr);
fprintf(cnp_output, "%s, %s, %s, %s, %s, %s, %s\n", "time", "src", "dst", "sport", "dport", "size", "cnp_number");
fflush(cnp_output);
Simulator::Schedule(MicroSeconds(mon_start), &monitor_qp_cnp_number, cnp_output, &n);
}
void CalculateRoute(Ptr<Node> host) {
vector<Ptr<Node>> q;
map<Ptr<Node>, int> dis;
map<Ptr<Node>, uint64_t> delay;
map<Ptr<Node>, uint64_t> txDelay;
map<Ptr<Node>, uint64_t> bw;
q.push_back(host);
dis[host] = 0;
delay[host] = 0;
txDelay[host] = 0;
bw[host] = 0xfffffffffffffffflu;
for (int i = 0; i < (int)q.size(); i++) {
Ptr<Node> now = q[i];
int d = dis[now];
for (auto it = nbr2if[now].begin(); it != nbr2if[now].end(); it++) {
if (!it->second.up)
continue;
Ptr<Node> next = it->first;
if (dis.find(next) == dis.end()) {
dis[next] = d + 1;
delay[next] = delay[now] + it->second.delay;
txDelay[next] = txDelay[now] +
packet_payload_size * 1000000000lu * 8 / it->second.bw;
bw[next] = std::min(bw[now], it->second.bw);
if (next->GetNodeType() == 1 || next->GetNodeType() == 2) {
q.push_back(next);
}
}
bool via_nvswitch = false;
if (d + 1 == dis[next]) {
for(auto x : nextHop[next][host]) {
if(x->GetNodeType() == 2) via_nvswitch = true;
}
if(via_nvswitch == false) {
if(now->GetNodeType() == 2) {
while(nextHop[next][host].size() != 0)
nextHop[next][host].pop_back();
}
nextHop[next][host].push_back(now);
} else if(via_nvswitch == true && now->GetNodeType() == 2) {
nextHop[next][host].push_back(now);
}
if(next->GetNodeType() == 0 && nextHop[next][now].size() == 0) {
nextHop[next][now].push_back(now);
pairBw[next->GetId()][now->GetId()] = pairBw[now->GetId()][next->GetId()] = it->second.bw;
}
}
}
}
for (auto it : delay) {
pairDelay[it.first][host] = it.second;
}
for (auto it : txDelay)
pairTxDelay[it.first][host] = it.second;
for (auto it : bw) {
pairBw[it.first->GetId()][host->GetId()] = it.second;
}
}
void CalculateRoutes(NodeContainer &n) {
for (int i = 0; i < (int)n.GetN(); i++) {
Ptr<Node> node = n.Get(i);
if (node->GetNodeType() == 0)
CalculateRoute(node);
}
}
void SetRoutingEntries() {
for (auto i = nextHop.begin(); i != nextHop.end(); i++) {
Ptr<Node> node = i->first;
auto &table = i->second;
for (auto j = table.begin(); j != table.end(); j++) {
Ptr<Node> dst = j->first;
Ipv4Address dstAddr = dst->GetObject<Ipv4>()->GetAddress(1, 0).GetLocal();
vector<Ptr<Node>> nexts = j->second;
for (int k = 0; k < (int)nexts.size(); k++) {
Ptr<Node> next = nexts[k];
uint32_t interface = nbr2if[node][next].idx;
if (node->GetNodeType() == 1) {
DynamicCast<SwitchNode>(node)->AddTableEntry(dstAddr, interface);
} else if(node->GetNodeType() == 2){
DynamicCast<NVSwitchNode>(node)->AddTableEntry(dstAddr, interface);
node->GetObject<RdmaDriver>()->m_rdma->AddTableEntry(dstAddr, interface, true);
} else {
bool is_nvswitch = false;
if(next->GetNodeType() == 2){
is_nvswitch = true;
}
node->GetObject<RdmaDriver>()->m_rdma->AddTableEntry(dstAddr, interface, is_nvswitch);
if(next->GetId() == dst->GetId()) {
node->GetObject<RdmaDriver>()->m_rdma->add_nvswitch(dst->GetId());
}
}
}
}
}
}
void printRoutingEntries() {
map<uint32_t, string> types;
types[0] = "HOST";
types[1] = "SWITCH";
types[2] = "NVSWITCH";
map<Ptr<Node>, map<Ptr<Node>, vector<pair<Ptr<Node>, uint32_t> >>> NVSwitch, NetSwitch, Host;
for (auto i = nextHop.begin(); i != nextHop.end(); i++) {
Ptr<Node> src = i -> first;
auto &table = i->second;
for (auto j = table.begin(); j != table.end(); j++) {
Ptr<Node> dst = j->first;
Ipv4Address dstAddr = dst->GetObject<Ipv4>()->GetAddress(1, 0).GetLocal();
vector<Ptr<Node>> nexts = j->second;
for (int k = 0; k < (int)nexts.size(); k++) {
Ptr<Node> firstHop = nexts[k];
uint32_t interface = nbr2if[src][firstHop].idx;
if(src->GetNodeType() == 0) {
Host[src][dst].push_back(pair<Ptr<Node>, uint32_t>(firstHop, interface));
} else if(src->GetNodeType() == 1) {
NetSwitch[src][dst].push_back(pair<Ptr<Node>, uint32_t>(firstHop, interface));
} else if(src->GetNodeType() == 2) {
NVSwitch[src][dst].push_back(pair<Ptr<Node>, uint32_t>(firstHop, interface));
}
}
}
}
cout << "********************* PRINT SWITCH ROUTING TABLE *********************" << endl << endl << endl;
for(auto it = NetSwitch.begin(); it != NetSwitch.end(); ++ it) {
Ptr<Node> src = it -> first;
auto table = it -> second;
cout << "SWITCH: " << src->GetId() << "'s routing entries are as follows:" << endl;
for(auto j = table.begin(); j != table.end(); ++ j) {
Ptr<Node> dst = j -> first;
auto entries = j -> second;
for(auto k = entries.begin(); k != entries.end(); ++ k) {
Ptr<Node> nextHop = k->first;
uint32_t interface = k->second;
cout << "To " << dst->GetId() << "[" << types[dst->GetNodeType()] << "] via " << nextHop->GetId() << "[" << types[nextHop->GetNodeType()] << "]" << " from port: " << interface << endl;
}
}
}
cout << "********************* PRINT NVSWITCH ROUTING TABLE *********************" << endl << endl << endl;
for(auto it = NVSwitch.begin(); it != NVSwitch.end(); ++ it) {
Ptr<Node> src = it -> first;
auto table = it -> second;
cout << "NVSWITCH: " << src->GetId() << "'s routing entries are as follows:" << endl;
for(auto j = table.begin(); j != table.end(); ++ j) {
Ptr<Node> dst = j -> first;
auto entries = j -> second;
for(auto k = entries.begin(); k != entries.end(); ++ k) {
Ptr<Node> nextHop = k->first;
uint32_t interface = k->second;
cout << "To " << dst->GetId() << "[" << types[dst->GetNodeType()] << "] via " << nextHop->GetId() << "[" << types[nextHop->GetNodeType()] << "]" << " from port: " << interface << endl;
}
}
}
cout << "********************* HOST ROUTING TABLE *********************" << endl << endl << endl;
for(auto it = Host.begin(); it != Host.end(); ++ it) {
Ptr<Node> src = it -> first;
auto table = it -> second;
cout << "HOST: " << src->GetId() << "'s routing entries are as follows:" << endl;
for(auto j = table.begin(); j != table.end(); ++ j) {
Ptr<Node> dst = j -> first;
auto entries = j -> second;
for(auto k = entries.begin(); k != entries.end(); ++ k) {
Ptr<Node> nextHop = k->first;
uint32_t interface = k->second;
cout << "To " << dst->GetId() << "[" << types[dst->GetNodeType()] << "] via " << nextHop->GetId() << "[" << types[nextHop->GetNodeType()] << "]" << " from port: " << interface << endl;
}
}
}
}
bool validateRoutingEntries() {
return false;
}
void TakeDownLink(NodeContainer n, Ptr<Node> a, Ptr<Node> b) {
if (!nbr2if[a][b].up)
return;
nbr2if[a][b].up = nbr2if[b][a].up = false;
nextHop.clear();
CalculateRoutes(n);
for (uint32_t i = 0; i < n.GetN(); i++){
if (n.Get(i)->GetNodeType() == 1)
DynamicCast<SwitchNode>(n.Get(i))->ClearTable();
else if(n.Get(i)->GetNodeType() == 2)
DynamicCast<NVSwitchNode>(n.Get(i))->ClearTable();
else
n.Get(i)->GetObject<RdmaDriver>()->m_rdma->ClearTable();
}
DynamicCast<QbbNetDevice>(a->GetDevice(nbr2if[a][b].idx))->TakeDown();
DynamicCast<QbbNetDevice>(b->GetDevice(nbr2if[b][a].idx))->TakeDown();
SetRoutingEntries();
for (uint32_t i = 0; i < n.GetN(); i++) {
if (n.Get(i)->GetNodeType() == 0)
n.Get(i)->GetObject<RdmaDriver>()->m_rdma->RedistributeQp();
}
}
string get_output_file_name(string config_file, string output_file){
auto idx = config_file.find_last_of('/');
string ans = output_file.substr(0, output_file.length()-4) + config_file.substr(idx+7);
return ans;
}
uint64_t get_nic_rate(NodeContainer &n) {
for (uint32_t i = 0; i < n.GetN(); i++)
if (n.Get(i)->GetNodeType() == 0)
return DynamicCast<QbbNetDevice>(n.Get(i)->GetDevice(1))
->GetDataRate()
.GetBitRate();
}
bool ReadConf(string network_topo,string network_conf) {
std::ifstream conf;
conf.open(network_conf);
topology_file = network_topo;
while (!conf.eof()) {
std::string key;
conf >> key;
if (key.compare("ENABLE_QCN") == 0) {
uint32_t v;
conf >> v;
enable_qcn = v;
} else if (key.compare("USE_DYNAMIC_PFC_THRESHOLD") == 0) {
uint32_t v;
conf >> v;
use_dynamic_pfc_threshold = v;
} else if (key.compare("CLAMP_TARGET_RATE") == 0) {
uint32_t v;
conf >> v;
clamp_target_rate = v;
} else if (key.compare("PAUSE_TIME") == 0) {
double v;
conf >> v;
pause_time = v;
} else if (key.compare("DATA_RATE") == 0) {
std::string v;
conf >> v;
data_rate = v;
} else if (key.compare("LINK_DELAY") == 0) {
std::string v;
conf >> v;
link_delay = v;
} else if (key.compare("PACKET_PAYLOAD_SIZE") == 0) {
uint32_t v;
conf >> v;
packet_payload_size = v;
} else if (key.compare("L2_CHUNK_SIZE") == 0) {
uint32_t v;
conf >> v;
l2_chunk_size = v;
} else if (key.compare("L2_ACK_INTERVAL") == 0) {
uint32_t v;
conf >> v;
l2_ack_interval = v;
} else if (key.compare("L2_BACK_TO_ZERO") == 0) {
uint32_t v;
conf >> v;
l2_back_to_zero = v;
} else if (key.compare("FLOW_FILE") == 0) {
std::string v;
conf >> v;
flow_file = v;
} else if (key.compare("TRACE_FILE") == 0) {
std::string v;
conf >> v;
trace_file = v;
} else if (key.compare("TRACE_OUTPUT_FILE") == 0) {
std::string v;
conf >> v;
trace_output_file = v;
} else if (key.compare("SIMULATOR_STOP_TIME") == 0) {
double v;
conf >> v;
simulator_stop_time = v;
} else if (key.compare("ALPHA_RESUME_INTERVAL") == 0) {
double v;
conf >> v;
alpha_resume_interval = v;
} else if (key.compare("RP_TIMER") == 0) {
double v;
conf >> v;
rp_timer = v;
} else if (key.compare("EWMA_GAIN") == 0) {
double v;
conf >> v;
ewma_gain = v;
} else if (key.compare("FAST_RECOVERY_TIMES") == 0) {
uint32_t v;
conf >> v;
fast_recovery_times = v;
} else if (key.compare("RATE_AI") == 0) {
std::string v;
conf >> v;
rate_ai = v;
} else if (key.compare("RATE_HAI") == 0) {
std::string v;
conf >> v;
rate_hai = v;
} else if (key.compare("ERROR_RATE_PER_LINK") == 0) {
double v;
conf >> v;
error_rate_per_link = v;
} else if (key.compare("CC_MODE") == 0) {
conf >> cc_mode;
} else if (key.compare("RATE_DECREASE_INTERVAL") == 0) {
double v;
conf >> v;
rate_decrease_interval = v;
} else if (key.compare("MIN_RATE") == 0) {
conf >> min_rate;
} else if (key.compare("FCT_OUTPUT_FILE") == 0) {
conf >> fct_output_file;
} else if (key.compare("HAS_WIN") == 0) {
conf >> has_win;
} else if (key.compare("GLOBAL_T") == 0) {
conf >> global_t;
global_t = 1;
} else if (key.compare("MI_THRESH") == 0) {
conf >> mi_thresh;
} else if (key.compare("VAR_WIN") == 0) {
uint32_t v;
conf >> v;
var_win = v;
} else if (key.compare("FAST_REACT") == 0) {
uint32_t v;
conf >> v;
fast_react = v;
} else if (key.compare("U_TARGET") == 0) {
conf >> u_target;
} else if (key.compare("INT_MULTI") == 0) {
conf >> int_multi;
} else if (key.compare("RATE_BOUND") == 0) {
uint32_t v;
conf >> v;
rate_bound = v;
} else if (key.compare("ACK_HIGH_PRIO") == 0) {
conf >> ack_high_prio;
} else if (key.compare("DCTCP_RATE_AI") == 0) {
conf >> dctcp_rate_ai;
} else if (key.compare("NIC_TOTAL_PAUSE_TIME") == 0) {
conf >> nic_total_pause_time;
} else if (key.compare("PFC_OUTPUT_FILE") == 0) {
conf >> pfc_output_file;
} else if (key.compare("LINK_DOWN") == 0) {
conf >> link_down_time >> link_down_A >> link_down_B;
} else if (key.compare("ENABLE_TRACE") == 0) {
conf >> enable_trace;
} else if (key.compare("KMAX_MAP") == 0) {
int n_k;
conf >> n_k;
for (int i = 0; i < n_k; i++) {
uint64_t rate;
uint32_t k;
conf >> rate >> k;
rate2kmax[rate] = k;
}
} else if (key.compare("KMIN_MAP") == 0) {
int n_k;
conf >> n_k;
for (int i = 0; i < n_k; i++) {
uint64_t rate;
uint32_t k;
conf >> rate >> k;
rate2kmin[rate] = k;
}
} else if (key.compare("PMAX_MAP") == 0) {
int n_k;
conf >> n_k;
for (int i = 0; i < n_k; i++) {
uint64_t rate;
double p;
conf >> rate >> p;
rate2pmax[rate] = p;
}
} else if (key.compare("BUFFER_SIZE") == 0) {
conf >> buffer_size;
} else if (key.compare("QLEN_MON_FILE") == 0){
conf >> qlen_mon_file;
qlen_mon_file = get_output_file_name(network_conf, qlen_mon_file);
}else if(key.compare("BW_MON_FILE") == 0){
conf >> bw_mon_file;
bw_mon_file = get_output_file_name(network_conf, bw_mon_file);
}else if(key.compare("RATE_MON_FILE") == 0){
conf >> rate_mon_file;
rate_mon_file = get_output_file_name(network_conf, rate_mon_file);
}else if(key.compare("CNP_MON_FILE") == 0){
conf >> cnp_mon_file;
cnp_mon_file = get_output_file_name(network_conf, cnp_mon_file);
}else if (key.compare("MON_START") == 0){
conf >> mon_start;
}else if (key.compare("MON_END") == 0){
conf >> mon_end;
}else if(key.compare("QP_MON_INTERVAL") == 0){
conf >> qp_mon_interval;
}else if(key.compare("BW_MON_INTERVAL") == 0){
conf >> bw_mon_interval;
}else if(key.compare("QLEN_MON_INTERVAL") == 0){
conf >> qlen_mon_interval;
} else if (key.compare("MULTI_RATE") == 0) {
int v;
conf >> v;
multi_rate = v;
} else if (key.compare("SAMPLE_FEEDBACK") == 0) {
int v;
conf >> v;
sample_feedback = v;
} else if (key.compare("PINT_LOG_BASE") == 0) {
conf >> pint_log_base;
} else if (key.compare("PINT_PROB") == 0) {
conf >> pint_prob;
}
fflush(stdout);
}
conf.close();
return true;
}
void SetConfig() {
bool dynamicth = use_dynamic_pfc_threshold;
Config::SetDefault("ns3::QbbNetDevice::PauseTime", UintegerValue(pause_time));
Config::SetDefault("ns3::QbbNetDevice::QcnEnabled", BooleanValue(enable_qcn));
Config::SetDefault("ns3::QbbNetDevice::DynamicThreshold",
BooleanValue(dynamicth));
IntHop::multi = int_multi;
if (cc_mode == 7)
IntHeader::mode = IntHeader::TS;
else if (cc_mode == 3)
IntHeader::mode = IntHeader::NORMAL;
else if (cc_mode == 10)
IntHeader::mode = IntHeader::PINT;
else
IntHeader::mode = IntHeader::NONE;
if (cc_mode == 10) {
Pint::set_log_base(pint_log_base);
IntHeader::pint_bytes = Pint::get_n_bytes();
printf("PINT bits: %d bytes: %d\n", Pint::get_n_bits(),
Pint::get_n_bytes());
}
}
void SetupNetwork(void (*qp_finish)(FILE *, Ptr<RdmaQueuePair>),void (*send_finish)(FILE *, Ptr<RdmaQueuePair>)) {
topof.open(topology_file.c_str());
flowf.open(flow_file.c_str());
tracef.open(trace_file.c_str());
string gpu_type_str;
topof >> node_num >> gpus_per_server >> nvswitch_num >> switch_num >>
link_num >> gpu_type_str;
flowf >> flow_num;
tracef >> trace_num;
if(gpu_type_str == "A100"){
gpu_type = GPUType::A100;
} else if(gpu_type_str == "A800"){
gpu_type = GPUType::A800;
} else if(gpu_type_str == "H100"){
gpu_type = GPUType::H100;
} else if(gpu_type_str == "H800"){
gpu_type = GPUType::H800;
} else{
gpu_type = GPUType::NONE;
}
std::vector<uint32_t> node_type(node_num, 0);
for (uint32_t i = 0; i < nvswitch_num; i++) {
uint32_t sid;
topof >> sid;
node_type[sid] = 2;
}
for (uint32_t i = 0; i < switch_num; i++)
{
uint32_t sid;
topof >> sid;
node_type[sid] = 1;
}
for (uint32_t i = 0; i < node_num; i++){
if (node_type[i] == 0)
n.Add(CreateObject<Node>());
else if(node_type[i] == 1){
Ptr<SwitchNode> sw = CreateObject<SwitchNode>();
n.Add(sw);
sw->SetAttribute("EcnEnabled", BooleanValue(enable_qcn));
}else if(node_type[i] == 2){
Ptr<NVSwitchNode> sw = CreateObject<NVSwitchNode>();
n.Add(sw);
}
}
NS_LOG_INFO("Create nodes.");
InternetStackHelper internet;
internet.Install(n);
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() == 0) {
serverAddress.resize(i + 1);
serverAddress[i] = node_id_to_ip(i);
} else if(n.Get(i)->GetNodeType() == 2) {
serverAddress.resize(i + 1);
serverAddress[i] = node_id_to_ip(i);
}
}
NS_LOG_INFO("Create channels.");
Ptr<RateErrorModel> rem = CreateObject<RateErrorModel>();
Ptr<UniformRandomVariable> uv = CreateObject<UniformRandomVariable>();
rem->SetRandomVariable(uv);
uv->SetStream(50);
rem->SetAttribute("ErrorRate", DoubleValue(error_rate_per_link));
rem->SetAttribute("ErrorUnit", StringValue("ERROR_UNIT_PACKET"));
FILE *pfc_file = fopen(pfc_output_file.c_str(), "w");
QbbHelper qbb;
Ipv4AddressHelper ipv4;
for (uint32_t i = 0; i < link_num; i++) {
uint32_t src, dst;
std::string data_rate, link_delay;
double error_rate;
topof >> src >> dst >> data_rate >> link_delay >> error_rate;
Ptr<Node> snode = n.Get(src), dnode = n.Get(dst);
qbb.SetDeviceAttribute("DataRate", StringValue(data_rate));
qbb.SetChannelAttribute("Delay", StringValue(link_delay));
if (error_rate > 0) {
Ptr<RateErrorModel> rem = CreateObject<RateErrorModel>();
Ptr<UniformRandomVariable> uv = CreateObject<UniformRandomVariable>();
rem->SetRandomVariable(uv);
uv->SetStream(50);
rem->SetAttribute("ErrorRate", DoubleValue(error_rate));
rem->SetAttribute("ErrorUnit", StringValue("ERROR_UNIT_PACKET"));
qbb.SetDeviceAttribute("ReceiveErrorModel", PointerValue(rem));
} else {
qbb.SetDeviceAttribute("ReceiveErrorModel", PointerValue(rem));
}
fflush(stdout);
NetDeviceContainer d = qbb.Install(snode, dnode);
if (snode->GetNodeType() == 0 || snode->GetNodeType() == 2) {
Ptr<Ipv4> ipv4 = snode->GetObject<Ipv4>();
ipv4->AddInterface(d.Get(0));
ipv4->AddAddress(
1, Ipv4InterfaceAddress(serverAddress[src], Ipv4Mask(0xff000000)));
}
if (dnode->GetNodeType() == 0 || dnode->GetNodeType() == 2) {
Ptr<Ipv4> ipv4 = dnode->GetObject<Ipv4>();
ipv4->AddInterface(d.Get(1));
ipv4->AddAddress(
1, Ipv4InterfaceAddress(serverAddress[dst], Ipv4Mask(0xff000000)));
}
nbr2if[snode][dnode].idx =
DynamicCast<QbbNetDevice>(d.Get(0))->GetIfIndex();
nbr2if[snode][dnode].up = true;
nbr2if[snode][dnode].delay =
DynamicCast<QbbChannel>(
DynamicCast<QbbNetDevice>(d.Get(0))->GetChannel())
->GetDelay()
.GetTimeStep();
nbr2if[snode][dnode].bw =
DynamicCast<QbbNetDevice>(d.Get(0))->GetDataRate().GetBitRate();
nbr2if[dnode][snode].idx =
DynamicCast<QbbNetDevice>(d.Get(1))->GetIfIndex();
nbr2if[dnode][snode].up = true;
nbr2if[dnode][snode].delay =
DynamicCast<QbbChannel>(
DynamicCast<QbbNetDevice>(d.Get(1))->GetChannel())
->GetDelay()
.GetTimeStep();
nbr2if[dnode][snode].bw =
DynamicCast<QbbNetDevice>(d.Get(1))->GetDataRate().GetBitRate();
char ipstring[16];
sprintf(ipstring, "10.%d.%d.0", i / 254 + 1, i % 254 + 1);
ipv4.SetBase(ipstring, "255.255.255.0");
ipv4.Assign(d);
DynamicCast<QbbNetDevice>(d.Get(0))->TraceConnectWithoutContext(
"QbbPfc", MakeBoundCallback(&get_pfc, pfc_file,
DynamicCast<QbbNetDevice>(d.Get(0))));
DynamicCast<QbbNetDevice>(d.Get(1))->TraceConnectWithoutContext(
"QbbPfc", MakeBoundCallback(&get_pfc, pfc_file,
DynamicCast<QbbNetDevice>(d.Get(1))));
}
nic_rate = get_nic_rate(n);
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() == 1) {
Ptr<SwitchNode> sw = DynamicCast<SwitchNode>(n.Get(i));
uint32_t shift = 3;
for (uint32_t j = 1; j < sw->GetNDevices(); j++) {
Ptr<QbbNetDevice> dev = DynamicCast<QbbNetDevice>(sw->GetDevice(j));
uint64_t rate = dev->GetDataRate().GetBitRate();
NS_ASSERT_MSG(rate2kmin.find(rate) != rate2kmin.end(),
"must set kmin for each link speed");
NS_ASSERT_MSG(rate2kmax.find(rate) != rate2kmax.end(),
"must set kmax for each link speed");
NS_ASSERT_MSG(rate2pmax.find(rate) != rate2pmax.end(),
"must set pmax for each link speed");
sw->m_mmu->ConfigEcn(j, rate2kmin[rate], rate2kmax[rate],
rate2pmax[rate]);
uint64_t delay = DynamicCast<QbbChannel>(dev->GetChannel())
->GetDelay()
.GetTimeStep();
uint32_t headroom = rate * delay / 8 / 1000000000 * 3;
sw->m_mmu->ConfigHdrm(j, headroom);
sw->m_mmu->pfc_a_shift[j] = shift;
while (rate > nic_rate && sw->m_mmu->pfc_a_shift[j] > 0) {
sw->m_mmu->pfc_a_shift[j]--;
rate /= 2;
}
}
sw->m_mmu->ConfigNPort(sw->GetNDevices() - 1);
sw->m_mmu->ConfigBufferSize(buffer_size * 1024 * 1024);
sw->m_mmu->node_id = sw->GetId();
} else if(n.Get(i)->GetNodeType() == 2){
Ptr<NVSwitchNode> sw = DynamicCast<NVSwitchNode>(n.Get(i));
uint32_t shift = 3;
for (uint32_t j = 1; j < sw->GetNDevices(); j++) {
Ptr<QbbNetDevice> dev = DynamicCast<QbbNetDevice>(sw->GetDevice(j));
uint64_t rate = dev->GetDataRate().GetBitRate();
uint64_t delay = DynamicCast<QbbChannel>(dev->GetChannel())
->GetDelay()
.GetTimeStep();
uint32_t headroom = rate * delay / 8 / 1000000000 * 3;
sw->m_mmu->ConfigHdrm(j, headroom);
sw->m_mmu->pfc_a_shift[j] = shift;
while (rate > nic_rate && sw->m_mmu->pfc_a_shift[j] > 0) {
sw->m_mmu->pfc_a_shift[j]--;
rate /= 2;
}
}
sw->m_mmu->ConfigNPort(sw->GetNDevices()-1);
sw->m_mmu->ConfigBufferSize(buffer_size* 1024 * 1024);
sw->m_mmu->node_id = sw->GetId();
}
}
#if ENABLE_QP
FILE *fct_output = fopen(fct_output_file.c_str(), "w");
FILE *send_output = fopen(send_output_file.c_str(), "w");
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() == 0 || n.Get(i)->GetNodeType() == 2) {
Ptr<RdmaHw> rdmaHw = CreateObject<RdmaHw>();
rdmaHw->SetAttribute("ClampTargetRate", BooleanValue(clamp_target_rate));
rdmaHw->SetAttribute("AlphaResumInterval",
DoubleValue(alpha_resume_interval));
rdmaHw->SetAttribute("RPTimer", DoubleValue(rp_timer));
rdmaHw->SetAttribute("FastRecoveryTimes",
UintegerValue(fast_recovery_times));
rdmaHw->SetAttribute("EwmaGain", DoubleValue(ewma_gain));
rdmaHw->SetAttribute("RateAI", DataRateValue(DataRate(rate_ai)));
rdmaHw->SetAttribute("RateHAI", DataRateValue(DataRate(rate_hai)));
rdmaHw->SetAttribute("L2BackToZero", BooleanValue(l2_back_to_zero));
rdmaHw->SetAttribute("L2ChunkSize", UintegerValue(l2_chunk_size));
rdmaHw->SetAttribute("L2AckInterval", UintegerValue(l2_ack_interval));
rdmaHw->SetAttribute("CcMode", UintegerValue(cc_mode));
rdmaHw->SetAttribute("RateDecreaseInterval",
DoubleValue(rate_decrease_interval));
rdmaHw->SetAttribute("MinRate", DataRateValue(DataRate(min_rate)));
rdmaHw->SetAttribute("Mtu", UintegerValue(packet_payload_size));
rdmaHw->SetAttribute("MiThresh", UintegerValue(mi_thresh));
rdmaHw->SetAttribute("VarWin", BooleanValue(var_win));
rdmaHw->SetAttribute("FastReact", BooleanValue(fast_react));
rdmaHw->SetAttribute("MultiRate", BooleanValue(multi_rate));
rdmaHw->SetAttribute("SampleFeedback", BooleanValue(sample_feedback));
rdmaHw->SetAttribute("TargetUtil", DoubleValue(u_target));
rdmaHw->SetAttribute("RateBound", BooleanValue(rate_bound));
rdmaHw->SetAttribute("DctcpRateAI",
DataRateValue(DataRate(dctcp_rate_ai)));
rdmaHw->SetAttribute("GPUsPerServer", UintegerValue(gpus_per_server));
rdmaHw->SetPintSmplThresh(pint_prob);
rdmaHw->SetAttribute("TotalPauseTimes",
UintegerValue(nic_total_pause_time));
Ptr<RdmaDriver> rdma = CreateObject<RdmaDriver>();
Ptr<Node> node = n.Get(i);
rdma->SetNode(node);
rdma->SetRdmaHw(rdmaHw);
node->AggregateObject(rdma);
rdma->Init();
rdma->TraceConnectWithoutContext(
"QpComplete", MakeBoundCallback(qp_finish, fct_output));
rdma->TraceConnectWithoutContext("SendComplete",MakeBoundCallback(send_finish,send_output));
}
}
#endif
if (ack_high_prio)
RdmaEgressQueue::ack_q_idx = 0;
else
RdmaEgressQueue::ack_q_idx = 3;
CalculateRoutes(n);
SetRoutingEntries();
maxRtt = maxBdp = 0;
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() != 0)
continue;
for (uint32_t j = 0; j < node_num; j++) {
if (n.Get(j)->GetNodeType() != 0)
continue;
uint64_t delay = pairDelay[n.Get(i)][n.Get(j)];
uint64_t txDelay = pairTxDelay[n.Get(i)][n.Get(j)];
uint64_t rtt = delay * 2 + txDelay;
uint64_t bw = pairBw[i][j];
uint64_t bdp = rtt * bw / 1000000000 / 8;
pairBdp[n.Get(i)][n.Get(j)] = bdp;
pairRtt[i][j] = rtt;
if (bdp > maxBdp)
maxBdp = bdp;
if (rtt > maxRtt)
maxRtt = rtt;
}
}
printf("maxRtt=%lu maxBdp=%lu\n", maxRtt, maxBdp);
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() == 1) {
Ptr<SwitchNode> sw = DynamicCast<SwitchNode>(n.Get(i));
sw->SetAttribute("CcMode", UintegerValue(cc_mode));
sw->SetAttribute("MaxRtt", UintegerValue(maxRtt));
}
}
NodeContainer trace_nodes;
for (uint32_t i = 0; i < trace_num; i++) {
uint32_t nid;
tracef >> nid;
if (nid >= n.GetN()) {
continue;
}
trace_nodes = NodeContainer(trace_nodes, n.Get(nid));
}
FILE *trace_output = fopen(trace_output_file.c_str(), "w");
if (enable_trace)
qbb.EnableTracing(trace_output, trace_nodes);
{
SimSetting sim_setting;
for (auto i : nbr2if) {
for (auto j : i.second) {
uint16_t node = i.first->GetId();
uint8_t intf = j.second.idx;
uint64_t bps =
DynamicCast<QbbNetDevice>(i.first->GetDevice(j.second.idx))
->GetDataRate()
.GetBitRate();
sim_setting.port_speed[node][intf] = bps;
}
}
sim_setting.win = maxBdp;
sim_setting.Serialize(trace_output);
}
NS_LOG_INFO("Create Applications.");
Time interPacketInterval = Seconds(0.0000005 / 2);
for (uint32_t i = 0; i < node_num; i++) {
if (n.Get(i)->GetNodeType() == 0 || n.Get(i)->GetNodeType() == 2)
for (uint32_t j = 0; j < node_num; j++) {
if (n.Get(j)->GetNodeType() == 0 || n.Get(j)->GetNodeType() == 2)
portNumber[i][j] = 10000;
}
}
flow_input.idx = -1;
topof.close();
tracef.close();
if (link_down_time > 0) {
Simulator::Schedule(Seconds(2) + MicroSeconds(link_down_time),
&TakeDownLink, n, n.Get(link_down_A),
n.Get(link_down_B));
}
}
#endif