static ncclResult_t ofi_irecv()

in src/nccl_ofi_net.c [1757:1829]


static ncclResult_t ofi_irecv(void* recvComm, void* data, int size,
			      void *mhandle, void** request)
{
	ncclResult_t ret = ncclSuccess;
	ssize_t rc = 0;
	nccl_ofi_req_t *req = NULL;
	recvComm_t *rComm = (recvComm_t *)recvComm;
	void *desc = NULL;

	/* Validate recvComm */
	if (OFI_UNLIKELY(rComm == NULL)) {
		ret = ncclSystemError;
		NCCL_OFI_WARN("Invalid recvComm provided");
		goto error;
	}

	/* Support only NCCL_OFI_MAX_REQUESTS inflight requests. */
	if (OFI_UNLIKELY(rComm->num_inflight_reqs == NCCL_OFI_MAX_REQUESTS)) {
		ret = ncclSystemError;
		NCCL_OFI_WARN("Can not support more than %d inflight requests",
			     NCCL_OFI_MAX_REQUESTS);
		goto error;
	}

	/* Allocate NCCL OFI request */
	req = allocate_nccl_ofi_request(rComm->nccl_ofi_reqs_fl);
	if (OFI_UNLIKELY(req == NULL)) {
		ret = ncclSystemError;
		NCCL_OFI_WARN("Unable to get NCCL OFI request for device %d",
			     rComm->dev);
		goto error;
	}

	/* Progress NCCL OFI */
	ret = nccl_ofi_progress(nccl_ofi_component[rComm->dev]);
	if (OFI_UNLIKELY(ret != 0))
		goto error;

	req->rComm = rComm;
	req->dev = rComm->dev;
	req->direction = NCCL_OFI_RECV;

	if (mhandle != NULL)
		desc = fi_mr_desc(mhandle);

	/* Try posting buffer to local EP */
	rc = fi_trecv(rComm->local_ep, data, size, desc,
		      FI_ADDR_UNSPEC, rComm->tag, 0, &req->ctx);
	if (rc == -FI_EAGAIN) {
		/* Return NULL request */
		*request = NULL;
		goto error;
	}
	else if (rc != 0) {
		NCCL_OFI_WARN("Unable to post receive buffer for dev %d. RC: %zd, ERROR: %s",
			       rComm->dev, rc, fi_strerror(-rc));
		ret = ncclSystemError;
		goto error;
	}

	rComm->num_inflight_reqs++;

	/* Return request to NCCL */
	*request = req;

	goto exit;

error:
	if (req)
		free_nccl_ofi_req(req, false);
exit:
	return ret;
}