astra-sim-alibabacloud/astra-sim/system/MockNcclGroup.h (161 lines of code) (raw):
/*
*Copyright (c) 2024, Alibaba Group;
*Licensed under the Apache License, Version 2.0 (the "License");
*you may not use this file except in compliance with the License.
*You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*Unless required by applicable law or agreed to in writing, software
*distributed under the License is distributed on an "AS IS" BASIS,
*WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*See the License for the specific language governing permissions and
*limitations under the License.
*/
#ifndef __MOCKNCCLGROUP_H__
#define __MOCKNCCLGROUP_H__
#include<stdlib.h>
#include <cstdint>
#include <vector>
#include <string>
#include <memory>
#include <map>
#include <unordered_map>
#include "astra-sim/system/Common.hh"
#include"astra-sim/system/MockNccl.h"
using namespace std;
namespace MockNccl {
enum class State;
struct SingleFlow;
struct SingleFlow;
enum class ComType;
struct ncclTree;
struct TuneInfo;
typedef struct TuneInfo* TuneInfo_t;
struct ncclChannelNode;
typedef std::map<std::pair<int,int>,SingleFlow> FlowModels;
typedef std::map<int,std::map<int,std::vector<int>>> RingChannels;
typedef std::map<int,std::map<int,std::vector<ncclChannelNode*>>> NVLStreechannels;
typedef std::map<int,std::map<int,ncclTree>> TreeChannels;
enum GroupType {
TP,
DP,
PP,
EP,
DP_EP,
NONE
};
struct ncclInfo {
ncclFunc_t coll;
TuneInfo_t tuneinfo;
int algorithm;
int protocol;
int nChannels;
int nThreads;
size_t nBytes;
ncclInfo(){};
~ncclInfo(){};
};
struct TuneInfo{
int nNodes;
int nRanks;
int nChannels;
int collNetSupport;
int nvlsSupport;
int minCompCap;
int maxCompCap;
std::vector<ncclTopoGraph*> graphs;
std::vector<std::vector<std::vector<float>>> latencies;
std::vector<std::vector<std::vector<float>>> bandwidths;
TuneInfo(){};
~TuneInfo(){};
TuneInfo(
int _nNodes,
int _nRanks,
int _nChannels,
int _collNetSupport,
int _nvlsSupport,
int _minCompCap,
int _maxCompCap)
: nNodes(_nNodes),
nRanks(_nRanks),
nChannels(_nChannels),
collNetSupport(_collNetSupport),
nvlsSupport(_nvlsSupport),
minCompCap(_minCompCap),
maxCompCap(_maxCompCap) {
graphs = std::vector<ncclTopoGraph*>(NCCL_NUM_ALGORITHMS, nullptr);
latencies = std::vector<std::vector<std::vector<float>>>(
NCCL_NUM_FUNCTIONS,
std::vector<std::vector<float>>(
NCCL_NUM_ALGORITHMS, std::vector<float>(NCCL_NUM_PROTOCOLS, 0)));
bandwidths = std::vector<std::vector<std::vector<float>>>(
NCCL_NUM_FUNCTIONS,
std::vector<std::vector<float>>(
NCCL_NUM_ALGORITHMS, std::vector<float>(NCCL_NUM_PROTOCOLS, 0)));
}
};
class GroupInfo {
public:
int group_index;
GroupType type;
int nNodes;
int nRanks;
std::vector<int> Ranks;
std::vector<int> NVSwitchs;
GroupInfo(){}
GroupInfo(int _group_index, GroupType _type, int _nNodes, int _nRanks, std::vector<int> _Ranks,std::vector<int>_NVSwitchs)
: group_index(_group_index),type(_type), nNodes(_nNodes), nRanks(_nRanks), Ranks(_Ranks),NVSwitchs(_NVSwitchs) {}
~GroupInfo(){}
};
class MockNcclGroup {
struct DoubleBinaryTreeNode {
int node;
DoubleBinaryTreeNode* left;
DoubleBinaryTreeNode* right;
DoubleBinaryTreeNode(int _node) : node(_node), left(nullptr), right(nullptr) {}
};
public:
MockNcclGroup(){}
MockNcclGroup(int _ngpus,int _gpus_per_nodes, int _TP_size,int _DP_size,int _PP_size,int _EP_size,int _DP_EP_size,std::vector<int>_NVSwitch,GPUType _gpu_type);
~MockNcclGroup(){};
std::map<std::pair<int,GroupType>,int> GroupIndex;
std::map<int,GroupInfo> AllGroups;
std::map<int,RingChannels> Allringchannels;
std::map<int,NVLStreechannels> AllNVLStreechannels;
std::map<int,TreeChannels> Alltreechannels;
std::map<int,TreeChannels> AllNVLSchannels;
int g_flow_id;
GPUType gpu_type;
std::map<std::string,int> FlowName2nums;
std::map<std::string ,std::map<int,std::shared_ptr<FlowModels> >> flow_models;
std::map<std::string ,struct ncclInfo*> nccl_infos;
std::shared_ptr<void> getFlowModels(GroupType type , int rank, AstraSim::ComType op,uint64_t data_size,int layer_num,State loopstate);
private:
std::map<int,std::shared_ptr<FlowModels>> genFlowModels(GroupType type , int rank, AstraSim::ComType op,uint64_t data_size);
std::map<int,std::shared_ptr<FlowModels>> genReduceScatterFlowModels(GroupType type , int rank, uint64_t data_size);
std::map<int,std::shared_ptr<FlowModels>> genAlltoAllFlowModels(GroupType type, int rank, uint64_t data_size);
std::map<int,std::shared_ptr<FlowModels>> genAllReduceFlowModels(GroupType type , int rank,uint64_t data_size);
std::map<int,std::shared_ptr<FlowModels>> genAllReduceRingFlowModels(GroupType type , int rank,uint64_t data_size);
std::map<int,std::shared_ptr<FlowModels>> genAllreduceNVLSFlowModels(
GroupType type,
int rank,
uint64_t data_size);
std::shared_ptr<FlowModels>genallReduceNVLSTreeFlowModels(GroupType type,int rank,uint64_t data_size);
FlowModels generate_flow_model_nvls_tree_allreduce_up(std::vector<ncclChannelNode*>nvlstreenodes,std::unordered_map<ncclChannelNode*, int> upinDegree,std::unordered_map<ncclChannelNode*,std::vector<int>>& nodeprevs,int chunk_size,int chunk_id,int chunk_count,int channle_id,FlowModels& result);
FlowModels generate_flow_model_nvls_tree_allreduce_down(std::vector<ncclChannelNode*>nvlstreenodes,std::unordered_map<ncclChannelNode*, int> downinDegree,std::unordered_map<ncclChannelNode*,std::vector<int>>& nodeprevs,int chunk_size,int chunk_id,int chunk_count,int channle_id,FlowModels& result);
std::shared_ptr<FlowModels> genAllReduceTreeFlowModels(GroupType type , int rank,uint64_t data_size);
FlowModels generate_flow_model_tree_allreduce_up(std::map<int,ncclTree> &nodes,std::unordered_map<int, int> upinDegree,std::unordered_map<int,std::vector<int>>& nodeprevs,int chunk_size,int chunk_id,int chunk_count,int channle_id,FlowModels& result);
FlowModels generate_flow_model_tree_allreduce_down(std::map<int,ncclTree> &nodes,std::unordered_map<int, int> downinDegree,std::unordered_map<int,std::vector<int>>& nodeprevs,int chunk_size,int chunk_id,int chunk_count,int channle_id,FlowModels& result);
std::map<int,std::shared_ptr<FlowModels>> genAllGatherFlowModels(GroupType type , int rank,uint64_t data_size);
std::vector<DoubleBinaryTreeNode*> genInterDouBinTree(GroupInfo pgroupinfo);
DoubleBinaryTreeNode* InterDouBinTreeShift(DoubleBinaryTreeNode* root,std::vector<int>nodes);
void ConnInterIntraTree(DoubleBinaryTreeNode*root,std::map<int,std::vector<int>>node2ranks,std::map<int,ncclTree>&TreeChannel);
public:
void generateringchannels(
std::map<int, std::vector<int>> localrings,
MockNccl::GroupInfo* groupInfo,
std::map<int, std::map<int, std::vector<int>>>& ringchannels);
std::map<int, std::vector<int>> gen_local_ring(int rank, GroupType type);
RingChannels genringchannels(
int rank,
GroupType type);
TreeChannels gettreechannels(int rank, GroupType type);
TreeChannels get_nvls_channels(int rank,GroupType type);
NVLStreechannels get_nvls_tree_channels(int rank,GroupType type);
ncclChannelNode* gen_nvls_tree_intra_channels(std::vector<int>intra_topo,std::map<int, vector<ncclChannelNode*>> &nvlstreechannel);
ncclChannelNode* gen_nvls_tree_inter_channels(DoubleBinaryTreeNode* root,std::map<int,ncclChannelNode*> nodencclchannlenodes,std::map<int, vector<ncclChannelNode*>> &nvlstreechannel);
ncclInfo* get_algo_proto_info(
GroupType type,
int rank,
AstraSim::ComType op,
uint64_t data_size);
};
}
#endif