in gloo/alltoallv.cc [119:161]
void alltoallv(AlltoallvOptions& opts) {
const auto& context = opts.context;
transport::UnboundBuffer* in = opts.in.get();
transport::UnboundBuffer* out = opts.out.get();
std::vector<size_t>& inOffsetPerRank = opts.inOffsetPerRank;
std::vector<size_t>& inLengthPerRank = opts.inLengthPerRank;
std::vector<size_t>& outOffsetPerRank = opts.outOffsetPerRank;
std::vector<size_t>& outLengthPerRank = opts.outLengthPerRank;
const auto slot = Slot::build(kAlltoallSlotPrefix, opts.tag);
// Sanity checks.
GLOO_ENFORCE(opts.elementSize > 0);
GLOO_ENFORCE(in != nullptr);
GLOO_ENFORCE(out != nullptr);
int myRank = context->rank;
int worldSize = context->size;
// Local copy.
GLOO_ENFORCE(inLengthPerRank[myRank] == outLengthPerRank[myRank]);
size_t myInOffset = inOffsetPerRank[myRank];
size_t myOutOffset = outOffsetPerRank[myRank];
size_t myChunkSize = inLengthPerRank[myRank];
memcpy(
static_cast<char*>(out->ptr) + myOutOffset,
static_cast<char*>(in->ptr) + myInOffset,
myChunkSize);
// Remote copy.
for (int i = 1; i < worldSize; i++) {
int sendRank = (myRank + i) % worldSize;
int recvRank = (myRank + worldSize - i) % worldSize;
in->send(
sendRank, slot, inOffsetPerRank[sendRank], inLengthPerRank[sendRank]);
out->recv(
recvRank, slot, outOffsetPerRank[recvRank], outLengthPerRank[recvRank]);
}
for (int i = 1; i < worldSize; i++) {
in->waitSend(opts.timeout);
out->waitRecv(opts.timeout);
}
}