in src/torch_ucc_comm.cpp [108:132]
ucc_status_t oob_allgather_test(void* req) {
torch_ucc_oob_coll_info_t* info =
reinterpret_cast<torch_ucc_oob_coll_info_t*>(req);
try {
for (int r = 0; r < info->size; r++) {
if (!info->store->check({info->getKey("teamr" + std::to_string(r))})) {
return UCC_INPROGRESS;
}
}
for (int r = 0; r < info->size; r++) {
std::vector<uint8_t> data =
info->store->get(info->getKey("teamr" + std::to_string(r)));
memcpy(
(void*)((ptrdiff_t)info->rbuf + info->msglen * r),
data.data(),
info->msglen);
}
} catch (std::exception& ex) {
LOG(ERROR) << "(oob_allgather) Caught exception in Store Operation .. "
<< "[" << ex.what() << "]";
return UCC_ERR_NO_MESSAGE;
}
return UCC_OK;
}