in prov/gni/src/gnix_msg.c [3562:3776]
ssize_t _gnix_sendv(struct gnix_fid_ep *ep, const struct iovec *iov,
void **mdesc, size_t count, uint64_t dest_addr,
void *context, uint64_t flags, uint64_t tag)
{
int i, ret = FI_SUCCESS;
unsigned long long cum_len = 0;
void *tmp = NULL;
struct gnix_vc *vc = NULL;
struct gnix_fab_req *req = NULL;
struct fid_mr *auto_mr;
int connected;
GNIX_DEBUG(FI_LOG_EP_DATA, "iov_count = %lu\n", count);
if (!ep->send_cq && !ep->send_cntr) {
return -FI_ENOCQ;
}
if (!(flags & FI_TAGGED)) {
if (!ep->ep_ops.msg_send_allowed)
return -FI_EOPNOTSUPP;
} else {
if (!ep->ep_ops.tagged_send_allowed)
return -FI_EOPNOTSUPP;
}
req = _gnix_fr_alloc(ep);
if (req == NULL) {
return -FI_ENOSPC;
}
GNIX_DEBUG(FI_LOG_EP_DATA, "Created req - %p\n", req);
/* calculate cumulative size of the iovec buf lens */
for (i = 0; i < count; i++) {
/* TODO: handle possible overflow */
cum_len += iov[i].iov_len;
GNIX_DEBUG(FI_LOG_EP_DATA, "iov[%d].iov_len = %lu\n", i, iov[i].iov_len);
}
/* Fill out fabric request */
if (flags & FI_TAGGED) {
req->type = GNIX_FAB_RQ_TSENDV;
req->msg.tag = tag;
req->msg.ignore = 0;
} else {
req->type = GNIX_FAB_RQ_SENDV;
req->msg.tag = 0;
req->msg.ignore = ~0;
}
req->gnix_ep = ep;
req->user_context = context;
req->work_fn = _gnix_send_req;
req->flags = flags;
req->msg.send_flags = flags;
req->msg.imm = 0;
req->msg.parent = NULL;
/*
* If the cum_len is >= ep->domain->params.msg_rendezvous_thresh
* transfer the iovec entries individually.
*
* For this case, use CtPostFma for iovec lengths that are smaller than
* the rendezvous thresh. For CtPostFma:
* the sum of the iov lens must be either <= 1GB or <= 1MB if the comm
* dom is configured with FmaSharing.
* otherwise use PostRdma.
*/
if (cum_len >= ep->domain->params.msg_rendezvous_thresh) {
if (!mdesc) { /* Register the memory for the user */
for (i = 0; i < count; i++) {
auto_mr = NULL;
ret = _gnix_mr_reg(&ep->domain->domain_fid.fid,
iov[i].iov_base,
iov[i].iov_len,
FI_READ | FI_WRITE, 0, 0, 0,
&auto_mr, NULL, ep->auth_key, GNIX_PROV_REG);
if (ret != FI_SUCCESS) {
GNIX_DEBUG(FI_LOG_EP_DATA,
"Failed to auto-register"
" local buffer: %s\n",
fi_strerror(-ret));
for (i--; i >= 0; i--) {
ret = fi_close(&req->msg.send_md[i]->mr_fid.fid);
if (ret != FI_SUCCESS) {
GNIX_FATAL(FI_LOG_DOMAIN,
"failed to release auto-registered region, "
"rc=%d\n", ret);
}
}
goto err_mr_reg;
}
req->msg.send_md[i] = container_of(
(void *) auto_mr,
struct gnix_fid_mem_desc,
mr_fid);
req->msg.send_info[i].send_addr = (uint64_t) iov[i].iov_base;
req->msg.send_info[i].send_len = iov[i].iov_len;
req->msg.send_info[i].mem_hndl =
req->msg.send_md[i]->mem_hndl;
GNIX_DEBUG(FI_LOG_EP_DATA, "iov[%d].iov_len = %lu,"
" req->msg.send_info[%d].send_addr = "
"%p, req->msg.send_info[%d].send_len "
"= %lu\n", i, iov[i].iov_len, i,
(void *) req->msg.send_info[i].send_addr,
i, req->msg.send_info[i].send_len);
GNIX_DEBUG(FI_LOG_EP_DATA, "req->msg.send_md[%d] "
"= %p\n", i,
req->msg.send_md[i]);
GNIX_DEBUG(FI_LOG_EP_DATA, "auto-reg MR: %p\n",
req->msg.send_md[i]);
}
req->msg.send_flags |= FI_LOCAL_MR;
} else { /* User registered their memory */
for (i = 0; i < count; i++) {
if (!mdesc[i]) {
GNIX_WARN(FI_LOG_EP_DATA,
"invalid memory reg"
"istration (%p).\n",
mdesc[i]);
ret = -FI_EINVAL;
goto err_mr_reg;
}
req->msg.send_md[i] =
container_of(mdesc[i],
struct gnix_fid_mem_desc,
mr_fid);
req->msg.send_info[i].send_addr = (uint64_t) iov[i].iov_base;
req->msg.send_info[i].send_len = iov[i].iov_len;
req->msg.send_info[i].mem_hndl =
req->msg.send_md[i]->mem_hndl;
}
}
req->msg.send_iov_cnt = count;
req->msg.send_flags |= GNIX_MSG_RENDEZVOUS;
} else {
/*
* TODO: Use buddy allocator with max alloc lim of
* ep->domain->params.msg_rendezvous_thresh
*/
/* This is freed in __comp_eager_msg_w_data */
tmp = malloc(cum_len);
assert(tmp != NULL);
__gnix_msg_pack_data_from_iov((uint64_t) tmp, cum_len,
iov, count);
req->msg.send_info[0].send_addr = (uint64_t) tmp;
req->msg.send_info[0].send_len = cum_len;
}
if ((flags & GNIX_SUPPRESS_COMPLETION) ||
(ep->send_selective_completion &&
!(flags & FI_COMPLETION))) {
req->msg.send_flags &= ~FI_COMPLETION;
} else {
req->msg.send_flags |= FI_COMPLETION;
}
req->msg.cum_send_len = (size_t) cum_len;
COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);
ret = _gnix_vc_ep_get_vc(ep, dest_addr, &vc);
if (ret != FI_SUCCESS) {
goto err_get_vc;
}
req->vc = vc;
ret = _gnix_vc_queue_tx_req(req);
connected = (vc->conn_state == GNIX_VC_CONNECTED);
COND_RELEASE(ep->requires_lock, &ep->vc_lock);
/*
* If a new VC was allocated, progress CM before returning.
* If the VC is connected and there's a backlog, poke
* the nic progress engine befure returning.
*/
if (!connected) {
_gnix_cm_nic_progress(ep->cm_nic);
} else if (!dlist_empty(&vc->tx_queue)) {
_gnix_nic_progress(vc->ep->nic);
}
return ret;
err_get_vc:
COND_RELEASE(ep->requires_lock, &ep->vc_lock);
if (req->msg.send_flags & FI_LOCAL_MR) {
for (i = 0; i < count; i++) {
fi_close(&req->msg.send_md[i]->mr_fid.fid);
}
}
err_mr_reg:
_gnix_fr_free(ep, req);
return ret;
}