in astra-sim-alibabacloud/astra-sim/system/MockNcclGroup.cc [677:759]
std::shared_ptr<FlowModels> MockNcclGroup::genallReduceNVLSTreeFlowModels(
GroupType type,
int rank,
uint64_t data_size) {
MockNcclLog* NcclLog = MockNcclLog::getInstance();
GroupInfo gp_info;
int gp_idx;
int chunk_count = 1;
int chunk_size;
NVLStreechannels nvlstreechannels;
NVLStreechannels::iterator nvlstree;
FlowModels result = {};
if(GroupIndex.count(std::make_pair(rank,type)) == 0){
NcclLog->writeLog(NcclLogLevel::ERROR,"There is no relevant group info, resulting in an error in generating genallReduceNVLSTreeFlowModels.");
return nullptr;
}
gp_idx = GroupIndex[std::make_pair(rank,type)];
gp_info = AllGroups[gp_idx];
nvlstreechannels = AllNVLStreechannels[gp_idx];
NcclLog->writeLog(NcclLogLevel::DEBUG," nvlstreechannels.size() %d",nvlstreechannels.size());
chunk_size = data_size / nvlstreechannels.size() / chunk_count;
for (nvlstree = nvlstreechannels.begin();
nvlstree != nvlstreechannels.end();
nvlstree++) {
std::map<int, std::vector<ncclChannelNode*>>::iterator nvlstreenodes_it;
if (rank == 0) {
for (nvlstreenodes_it = nvlstree->second.begin();
nvlstreenodes_it != nvlstree->second.end();
nvlstreenodes_it++) {
NcclLog->writeLog(NcclLogLevel::DEBUG," rank %d nvls tree nodes ",nvlstreenodes_it->first);
int i = 0;
for (auto nvlstreenode : nvlstreenodes_it->second) {
NcclLog->writeLog(NcclLogLevel::DEBUG," node %d rank %d",i,nvlstreenode->rank);
if(nvlstreenode->up!=nullptr){
NcclLog->writeLog(NcclLogLevel::DEBUG," up %d",nvlstreenode->up->rank);
}
NcclLog->writeLog(NcclLogLevel::DEBUG," down ");
for (auto down : nvlstreenode->down) {
NcclLog->writeLog(NcclLogLevel::DEBUG,"%d ",down->rank);
}
}
}
}
std::unordered_map<ncclChannelNode*, int> upinDegree;
std::unordered_map<ncclChannelNode*, int> downinDegree;
std::unordered_map<ncclChannelNode*, std::vector<int>> nodeprevs;
for (int ck = 0; ck < chunk_count; ck++) {
nodeprevs = {};
std::vector<ncclChannelNode*> ncclchannelnodes;
for (auto nvlstreenodes : nvlstree->second) {
for (auto nvlstreenode : nvlstreenodes.second) {
ncclchannelnodes.push_back(nvlstreenode);
upinDegree[nvlstreenode] = nvlstreenode->down.size();
if (nvlstreenode->up == nullptr)
downinDegree[nvlstreenode] = 0;
else
downinDegree[nvlstreenode] = 1;
}
}
generate_flow_model_nvls_tree_allreduce_up(
ncclchannelnodes,
upinDegree,
nodeprevs,
chunk_size,
ck,
chunk_count,
nvlstree->first,
result);
generate_flow_model_nvls_tree_allreduce_down(
ncclchannelnodes,
downinDegree,
nodeprevs,
chunk_size,
ck,
chunk_count,
nvlstree->first,
result);
}
}
std::shared_ptr<FlowModels> ptr_result =
std::make_shared<FlowModels>(result);
return ptr_result;
}