astra-sim-alibabacloud/astra-sim/system/MockNcclChannel.h (98 lines of code) (raw):

/* *Copyright (c) 2024, Alibaba Group; *Licensed under the Apache License, Version 2.0 (the "License"); *you may not use this file except in compliance with the License. *You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 *Unless required by applicable law or agreed to in writing, software *distributed under the License is distributed on an "AS IS" BASIS, *WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *See the License for the specific language governing permissions and *limitations under the License. */ #ifndef __MOCKNCCLCHANNEL_HH__ #define __MOCKNCCLCHANNEL_HH__ #include <vector> #include <map> #include <memory> #include "astra-sim/system/Common.hh" #include "MockNcclGroup.h" namespace MockNccl { struct SingleFlow{ int flow_id; int src; int dest; uint64_t flow_size; std::vector<int>prev; std::vector<int> parent_flow_id; std::vector<int> child_flow_id; int channel_id; int chunk_id; int chunk_count; std::string conn_type; SingleFlow(){}; SingleFlow( int _flow_id, int _src, int _dest, uint64_t _flow_size, std::vector<int>_prev, std::vector<int> _parent_flow_id, std::vector<int> _child_flow_id, int _channel_id, int _chunk_id, int _chunk_count, std::string _conn_type) : flow_id(_flow_id), src(_src), dest(_dest), flow_size(_flow_size), prev(_prev), parent_flow_id(_parent_flow_id), child_flow_id(_child_flow_id), channel_id(_channel_id), chunk_id(_chunk_id), chunk_count(_chunk_count), conn_type(_conn_type) {} ~SingleFlow(){}; }; enum class State{ Forward_Pass, Weight_Gradient, Input_Gradient, }; enum class ComType { None, Reduce_Scatter, All_Gather, All_Reduce, All_to_All, All_Reduce_All_to_All }; struct ncclTree { int depth; int rank; int up; std::vector<int> down; ncclTree(){}; ncclTree(int _depth, int _rank, int _up, std::vector<int> _down) : depth(_depth), rank(_rank), up(_up), down(_down) {}; ~ncclTree(){}; }; struct ncclChannelNode{ int depth; int rank; ncclChannelNode* up; std::vector<ncclChannelNode*> down; ncclChannelNode(){}; ncclChannelNode(int _depth,int _rank,ncclChannelNode* _up,std::vector<ncclChannelNode*>_down):depth(_depth),rank(_rank),up(_up),down(_down){}; ~ncclChannelNode(){}; }; class MockNcclComm{ public: MockNcclComm(int _rank,GroupType _type,MockNcclGroup* _GlobalGroup); ~MockNcclComm(); MockNccl::MockNcclGroup* GlobalGroup; GroupType type; int rank; std::map<int,std::map<int,std::vector<int>>> ringchannels; TreeChannels treechannels; TreeChannels nvlschannels; NVLStreechannels nvlstreechannels; std::map<int,std::map<int,std::vector<int>>> get_rings(); MockNccl::TreeChannels get_treechannels(); MockNccl::TreeChannels get_nvls_channels(); MockNccl::NVLStreechannels get_nvls_tree_channels(); std::shared_ptr<void> get_flow_model(uint64_t data_size,AstraSim::ComType collective_type,int layer_num,State loopstate); struct ncclInfo* get_algo_proto_info(uint64_t data_size,AstraSim::ComType collective_type); }; } #endif