in prov/gni/src/gnix_msg.c [1098:1273]
static int __gnix_rndzv_req(void *arg)
{
struct gnix_fab_req *req = (struct gnix_fab_req *)arg;
struct gnix_fid_ep *ep = req->gnix_ep;
struct gnix_nic *nic = ep->nic;
struct gnix_tx_descriptor *txd, *tail_txd = NULL;
gni_return_t status;
int rc;
int use_tx_cq_blk = 0;
struct fid_mr *auto_mr = NULL;
int inject_err = _gnix_req_inject_err(req);
int head_off, head_len, tail_len;
void *tail_data = NULL;
GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n");
if (req->vc->modes & GNIX_VC_MODE_XPMEM)
return __gnix_rndzv_req_xpmem(req);
if (!req->msg.recv_md[0]) {
rc = _gnix_mr_reg(&ep->domain->domain_fid.fid,
(void *)req->msg.recv_info[0].recv_addr,
req->msg.recv_info[0].recv_len,
FI_READ | FI_WRITE, 0, 0, 0,
&auto_mr, NULL, ep->auth_key, GNIX_PROV_REG);
if (rc != FI_SUCCESS) {
GNIX_DEBUG(FI_LOG_EP_DATA,
"Failed to auto-register local buffer: %d\n",
rc);
return -FI_EAGAIN;
}
req->msg.recv_flags |= FI_LOCAL_MR;
req->msg.recv_md[0] = container_of(auto_mr,
struct gnix_fid_mem_desc,
mr_fid);
req->msg.recv_info[0].mem_hndl = req->msg.recv_md[0]->mem_hndl;
GNIX_DEBUG(FI_LOG_EP_DATA, "auto-reg MR: %p\n", auto_mr);
}
rc = _gnix_nic_tx_alloc(nic, &txd);
if (rc) {
GNIX_DEBUG(FI_LOG_EP_DATA, "_gnix_nic_tx_alloc() failed: %d\n",
rc);
return -FI_ENOSPC;
}
txd->completer_fn = __gnix_rndzv_req_complete;
txd->req = req;
use_tx_cq_blk = (ep->domain->data_progress == FI_PROGRESS_AUTO) ? 1 : 0;
txd->gni_desc.type = GNI_POST_RDMA_GET;
txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
txd->gni_desc.local_mem_hndl = req->msg.recv_info[0].mem_hndl;
txd->gni_desc.remote_mem_hndl = req->msg.rma_mdh;
txd->gni_desc.rdma_mode = 0;
txd->gni_desc.src_cq_hndl = (use_tx_cq_blk) ?
nic->tx_cq_blk : nic->tx_cq;
head_off = req->msg.send_info[0].send_addr & GNI_READ_ALIGN_MASK;
head_len = head_off ? GNI_READ_ALIGN - head_off : 0;
tail_len = (req->msg.send_info[0].send_addr + req->msg.send_info[0].send_len) &
GNI_READ_ALIGN_MASK;
txd->gni_desc.local_addr = (uint64_t)req->msg.recv_info[0].recv_addr + head_len;
txd->gni_desc.remote_addr = (uint64_t)req->msg.send_info[0].send_addr + head_len;
txd->gni_desc.length = req->msg.send_info[0].send_len - head_len - tail_len;
if (req->msg.recv_flags & GNIX_MSG_GET_TAIL) {
/* The user ended up with a send matching a receive with a
* buffer that is too short and unaligned... what a way to
* behave. We could not have forseen which unaligned data to
* send across with the rndzv_start request, so we do an extra
* TX here to pull the random unaligned bytes. */
rc = _gnix_nic_tx_alloc(nic, &tail_txd);
if (rc) {
_gnix_nic_tx_free(nic, txd);
GNIX_DEBUG(FI_LOG_EP_DATA,
"_gnix_nic_tx_alloc() failed (tail): %d\n",
rc);
return -FI_ENOSPC;
}
if (req->int_tx_buf_e == NULL) {
req->int_tx_buf_e = _gnix_ep_get_int_tx_buf(ep);
if (req->int_tx_buf_e == NULL) {
GNIX_FATAL(FI_LOG_EP_DATA,
"RAN OUT OF INT_TX_BUFS");
/* TODO return error */
}
}
req->int_tx_buf = ((struct gnix_int_tx_buf *)
req->int_tx_buf_e)->buf;
req->int_tx_mdh = _gnix_ep_get_int_tx_mdh(req->int_tx_buf_e);
tail_txd->completer_fn = __gnix_rndzv_req_complete;
tail_txd->req = req;
tail_data = (void *)((req->msg.send_info[0].send_addr +
req->msg.send_info[0].send_len) &
~GNI_READ_ALIGN_MASK);
tail_txd->gni_desc.type = GNI_POST_FMA_GET;
tail_txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
tail_txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
tail_txd->gni_desc.local_mem_hndl = req->int_tx_mdh;
tail_txd->gni_desc.remote_mem_hndl = req->msg.rma_mdh;
tail_txd->gni_desc.rdma_mode = 0;
tail_txd->gni_desc.src_cq_hndl = nic->tx_cq;
tail_txd->gni_desc.local_addr = (uint64_t)req->int_tx_buf;
tail_txd->gni_desc.remote_addr = (uint64_t)tail_data;
tail_txd->gni_desc.length = GNI_READ_ALIGN;
GNIX_DEBUG(FI_LOG_EP_DATA, "Using two GETs\n");
}
COND_ACQUIRE(nic->requires_lock, &nic->lock);
if (inject_err) {
_gnix_nic_txd_err_inject(nic, txd);
status = GNI_RC_SUCCESS;
} else {
status = GNI_PostRdma(req->vc->gni_ep, &txd->gni_desc);
}
if (status != GNI_RC_SUCCESS) {
COND_RELEASE(nic->requires_lock, &nic->lock);
if (tail_txd)
_gnix_nic_tx_free(nic, tail_txd);
_gnix_nic_tx_free(nic, txd);
GNIX_DEBUG(FI_LOG_EP_DATA, "GNI_PostRdma failed: %s\n",
gni_err_str[status]);
GNIX_DEBUG(FI_LOG_EP_DATA, "\n");
return gnixu_to_fi_errno(status);
}
if (req->msg.recv_flags & GNIX_MSG_GET_TAIL) {
if (OFI_UNLIKELY(inject_err)) {
_gnix_nic_txd_err_inject(nic, tail_txd);
status = GNI_RC_SUCCESS;
} else {
status = GNI_PostFma(req->vc->gni_ep,
&tail_txd->gni_desc);
}
if (status != GNI_RC_SUCCESS) {
COND_RELEASE(nic->requires_lock, &nic->lock);
_gnix_nic_tx_free(nic, tail_txd);
/* Wait for the first TX to complete, then retransmit
* the entire thing. */
ofi_atomic_set32(&req->msg.outstanding_txds, 1);
req->msg.status = GNI_RC_TRANSACTION_ERROR;
GNIX_DEBUG(FI_LOG_EP_DATA, "GNI_PostFma() failed: %s\n",
gni_err_str[status]);
return FI_SUCCESS;
}
/* Wait for both TXs to complete, then process the request. */
ofi_atomic_set32(&req->msg.outstanding_txds, 2);
req->msg.status = GNI_RC_SUCCESS;
}
COND_RELEASE(nic->requires_lock, &nic->lock);
GNIX_DEBUG(FI_LOG_EP_DATA, "Initiated RNDZV GET, req: %p\n", req);
return gnixu_to_fi_errno(status);
}