static ncclResult_t ofi_flush()

in src/nccl_ofi_net.c [1981:2026]


static ncclResult_t ofi_flush(void* recvComm, void* data, int size,
			      void *mhandle)
{
	ncclResult_t ret = ncclSuccess;
	recvComm_t *rComm = (recvComm_t *)recvComm;
	nccl_ofi_req_t *req = NULL;
	int done = 0;

	if (size == 0) {
		/*
		 * Flush is an expensive operation. So, don't send fi_read for
		 * 0-sized messages. Since, NCCL issues flush for every irecv(),
		 * we guarantee to sync data to GPU even without it.
		 */
		goto exit;
	}

	if (ofi_nccl_gdr_flush_disable() || !support_gdr)
		goto exit;

	ret = OFI_UNLIKELY(ofi_iflush(recvComm, data, size, mhandle, (void **)&req));
	if (ret != ncclSuccess) {
		goto exit;
	}

	/* Ensure that the request completes */
	while (done == 0) {
		ret = ofi_test(req, &done, NULL);
		/*
		 * If testing request completion fails and returns
		 * not completed, reduce number of inflight requests.
		 */
		if (OFI_UNLIKELY((ret != ncclSuccess) && (done == 0))) {
			rComm->num_inflight_reqs--;
			goto error;
		}
	}

	return ret;

error:
	if (req)
		free_nccl_ofi_req(req, false);
exit:
	return ret;
}