static int __gnix_rndzv_req()

in prov/gni/src/gnix_msg.c [1098:1273]


static int __gnix_rndzv_req(void *arg)
{
	struct gnix_fab_req *req = (struct gnix_fab_req *)arg;
	struct gnix_fid_ep *ep = req->gnix_ep;
	struct gnix_nic *nic = ep->nic;
	struct gnix_tx_descriptor *txd, *tail_txd = NULL;
	gni_return_t status;
	int rc;
	int use_tx_cq_blk = 0;
	struct fid_mr *auto_mr = NULL;
	int inject_err = _gnix_req_inject_err(req);
	int head_off, head_len, tail_len;
	void *tail_data = NULL;

	GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n");

	if (req->vc->modes & GNIX_VC_MODE_XPMEM)
		return  __gnix_rndzv_req_xpmem(req);

	if (!req->msg.recv_md[0]) {
		rc = _gnix_mr_reg(&ep->domain->domain_fid.fid,
				  (void *)req->msg.recv_info[0].recv_addr,
				  req->msg.recv_info[0].recv_len,
				  FI_READ | FI_WRITE, 0, 0, 0,
				  &auto_mr, NULL, ep->auth_key, GNIX_PROV_REG);
		if (rc != FI_SUCCESS) {
			GNIX_DEBUG(FI_LOG_EP_DATA,
				  "Failed to auto-register local buffer: %d\n",
				  rc);

			return -FI_EAGAIN;
		}
		req->msg.recv_flags |= FI_LOCAL_MR;
		req->msg.recv_md[0] = container_of(auto_mr,
						   struct gnix_fid_mem_desc,
						   mr_fid);
		req->msg.recv_info[0].mem_hndl = req->msg.recv_md[0]->mem_hndl;
		GNIX_DEBUG(FI_LOG_EP_DATA, "auto-reg MR: %p\n", auto_mr);
	}

	rc = _gnix_nic_tx_alloc(nic, &txd);
	if (rc) {
		GNIX_DEBUG(FI_LOG_EP_DATA, "_gnix_nic_tx_alloc() failed: %d\n",
			  rc);
		return -FI_ENOSPC;
	}

	txd->completer_fn = __gnix_rndzv_req_complete;
	txd->req = req;


	use_tx_cq_blk = (ep->domain->data_progress == FI_PROGRESS_AUTO) ? 1 : 0;

	txd->gni_desc.type = GNI_POST_RDMA_GET;
	txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
	txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
	txd->gni_desc.local_mem_hndl = req->msg.recv_info[0].mem_hndl;
	txd->gni_desc.remote_mem_hndl = req->msg.rma_mdh;
	txd->gni_desc.rdma_mode = 0;
	txd->gni_desc.src_cq_hndl = (use_tx_cq_blk) ?
					nic->tx_cq_blk : nic->tx_cq;

	head_off = req->msg.send_info[0].send_addr & GNI_READ_ALIGN_MASK;
	head_len = head_off ? GNI_READ_ALIGN - head_off : 0;
	tail_len = (req->msg.send_info[0].send_addr + req->msg.send_info[0].send_len) &
			GNI_READ_ALIGN_MASK;

	txd->gni_desc.local_addr = (uint64_t)req->msg.recv_info[0].recv_addr + head_len;
	txd->gni_desc.remote_addr = (uint64_t)req->msg.send_info[0].send_addr + head_len;
	txd->gni_desc.length = req->msg.send_info[0].send_len - head_len - tail_len;

	if (req->msg.recv_flags & GNIX_MSG_GET_TAIL) {
		/* The user ended up with a send matching a receive with a
		 * buffer that is too short and unaligned... what a way to
		 * behave.  We could not have forseen which unaligned data to
		 * send across with the rndzv_start request, so we do an extra
		 * TX here to pull the random unaligned bytes. */
		rc = _gnix_nic_tx_alloc(nic, &tail_txd);
		if (rc) {
			_gnix_nic_tx_free(nic, txd);
			GNIX_DEBUG(FI_LOG_EP_DATA,
				  "_gnix_nic_tx_alloc() failed (tail): %d\n",
				  rc);
			return -FI_ENOSPC;
		}

		if (req->int_tx_buf_e == NULL) {
			req->int_tx_buf_e = _gnix_ep_get_int_tx_buf(ep);
			if (req->int_tx_buf_e == NULL) {
				GNIX_FATAL(FI_LOG_EP_DATA,
					  "RAN OUT OF INT_TX_BUFS");
				/* TODO return error */
			}
		}

		req->int_tx_buf = ((struct gnix_int_tx_buf *)
				   req->int_tx_buf_e)->buf;
		req->int_tx_mdh = _gnix_ep_get_int_tx_mdh(req->int_tx_buf_e);

		tail_txd->completer_fn = __gnix_rndzv_req_complete;
		tail_txd->req = req;

		tail_data = (void *)((req->msg.send_info[0].send_addr +
				      req->msg.send_info[0].send_len) &
				      ~GNI_READ_ALIGN_MASK);

		tail_txd->gni_desc.type = GNI_POST_FMA_GET;
		tail_txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
		tail_txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
		tail_txd->gni_desc.local_mem_hndl = req->int_tx_mdh;
		tail_txd->gni_desc.remote_mem_hndl = req->msg.rma_mdh;
		tail_txd->gni_desc.rdma_mode = 0;
		tail_txd->gni_desc.src_cq_hndl = nic->tx_cq;
		tail_txd->gni_desc.local_addr = (uint64_t)req->int_tx_buf;
		tail_txd->gni_desc.remote_addr = (uint64_t)tail_data;
		tail_txd->gni_desc.length = GNI_READ_ALIGN;

		GNIX_DEBUG(FI_LOG_EP_DATA, "Using two GETs\n");
	}

	COND_ACQUIRE(nic->requires_lock, &nic->lock);

	if (inject_err) {
		_gnix_nic_txd_err_inject(nic, txd);
		status = GNI_RC_SUCCESS;
	} else {
		status = GNI_PostRdma(req->vc->gni_ep, &txd->gni_desc);
	}

	if (status != GNI_RC_SUCCESS) {
		COND_RELEASE(nic->requires_lock, &nic->lock);
		if (tail_txd)
			_gnix_nic_tx_free(nic, tail_txd);
		_gnix_nic_tx_free(nic, txd);
		GNIX_DEBUG(FI_LOG_EP_DATA, "GNI_PostRdma failed: %s\n",
			  gni_err_str[status]);

		GNIX_DEBUG(FI_LOG_EP_DATA, "\n");
		return gnixu_to_fi_errno(status);
	}

	if (req->msg.recv_flags & GNIX_MSG_GET_TAIL) {
		if (OFI_UNLIKELY(inject_err)) {
			_gnix_nic_txd_err_inject(nic, tail_txd);
			status = GNI_RC_SUCCESS;
		} else {
			status = GNI_PostFma(req->vc->gni_ep,
					     &tail_txd->gni_desc);
		}

		if (status != GNI_RC_SUCCESS) {
			COND_RELEASE(nic->requires_lock, &nic->lock);
			_gnix_nic_tx_free(nic, tail_txd);

			/* Wait for the first TX to complete, then retransmit
			 * the entire thing. */
			ofi_atomic_set32(&req->msg.outstanding_txds, 1);
			req->msg.status = GNI_RC_TRANSACTION_ERROR;

			GNIX_DEBUG(FI_LOG_EP_DATA, "GNI_PostFma() failed: %s\n",
				  gni_err_str[status]);
			return FI_SUCCESS;
		}

		/* Wait for both TXs to complete, then process the request. */
		ofi_atomic_set32(&req->msg.outstanding_txds, 2);
		req->msg.status = GNI_RC_SUCCESS;

	}

	COND_RELEASE(nic->requires_lock, &nic->lock);

	GNIX_DEBUG(FI_LOG_EP_DATA, "Initiated RNDZV GET, req: %p\n", req);

	return gnixu_to_fi_errno(status);
}