in src/torch_ucc.cpp [188:208]
bool ProcessGroupUCC::WorkUCC::wait(std::chrono::milliseconds /* unused */) {
#ifdef USE_CUDA
if (fence && !torch_ucc_config.blocking_wait[(int)opType_]) {
// block user stream
setAndThrowException();
fence->block(at::cuda::getCurrentCUDAStream());
return true;
}
#endif
// wait for complete
while (!isCompleted())
;
setAndThrowException();
// manually call profiling end callbacks if they are set,
// since progress thread does not own WorkUCC
if (ProcessGroup::Work::recordFunctionEndCallback_) {
ProcessGroup::Work::recordFunctionEndCallback_();
ProcessGroup::Work::recordFunctionEndCallback_ = nullptr;
}
return true;
}