void alltoallv()

in gloo/alltoallv.cc [119:161]


void alltoallv(AlltoallvOptions& opts) {
  const auto& context = opts.context;
  transport::UnboundBuffer* in = opts.in.get();
  transport::UnboundBuffer* out = opts.out.get();
  std::vector<size_t>& inOffsetPerRank = opts.inOffsetPerRank;
  std::vector<size_t>& inLengthPerRank = opts.inLengthPerRank;
  std::vector<size_t>& outOffsetPerRank = opts.outOffsetPerRank;
  std::vector<size_t>& outLengthPerRank = opts.outLengthPerRank;
  const auto slot = Slot::build(kAlltoallSlotPrefix, opts.tag);

  // Sanity checks.
  GLOO_ENFORCE(opts.elementSize > 0);
  GLOO_ENFORCE(in != nullptr);
  GLOO_ENFORCE(out != nullptr);

  int myRank = context->rank;
  int worldSize = context->size;

  // Local copy.
  GLOO_ENFORCE(inLengthPerRank[myRank] == outLengthPerRank[myRank]);
  size_t myInOffset = inOffsetPerRank[myRank];
  size_t myOutOffset = outOffsetPerRank[myRank];
  size_t myChunkSize = inLengthPerRank[myRank];
  memcpy(
      static_cast<char*>(out->ptr) + myOutOffset,
      static_cast<char*>(in->ptr) + myInOffset,
      myChunkSize);

  // Remote copy.
  for (int i = 1; i < worldSize; i++) {
    int sendRank = (myRank + i) % worldSize;
    int recvRank = (myRank + worldSize - i) % worldSize;
    in->send(
        sendRank, slot, inOffsetPerRank[sendRank], inLengthPerRank[sendRank]);
    out->recv(
        recvRank, slot, outOffsetPerRank[recvRank], outLengthPerRank[recvRank]);
  }

  for (int i = 1; i < worldSize; i++) {
    in->waitSend(opts.timeout);
    out->waitRecv(opts.timeout);
  }
}