in prov/gni/src/gnix_atomic.c [492:702]
ssize_t _gnix_atomic(struct gnix_fid_ep *ep,
enum gnix_fab_req_type fr_type,
const struct fi_msg_atomic *msg,
const struct fi_ioc *comparev,
void **compare_desc,
size_t compare_count,
struct fi_ioc *resultv,
void **result_desc,
size_t result_count,
uint64_t flags)
{
struct gnix_vc *vc;
struct gnix_fab_req *req;
struct gnix_fid_mem_desc *md = NULL;
int rc, len;
struct fid_mr *auto_mr = NULL;
void *mdesc = NULL;
uint64_t compare_operand = 0;
void *loc_addr = NULL;
int dt_len, dt_align;
int connected;
if (!(flags & FI_INJECT) && !ep->send_cq &&
(((fr_type == GNIX_FAB_RQ_AMO ||
fr_type == GNIX_FAB_RQ_NAMO_AX ||
fr_type == GNIX_FAB_RQ_NAMO_AX_S) &&
!ep->write_cntr) ||
((fr_type == GNIX_FAB_RQ_FAMO ||
fr_type == GNIX_FAB_RQ_CAMO ||
fr_type == GNIX_FAB_RQ_NAMO_FAX ||
fr_type == GNIX_FAB_RQ_NAMO_FAX_S) &&
!ep->read_cntr))) {
return -FI_ENOCQ;
}
if (!ep || !msg || !msg->msg_iov ||
msg->msg_iov[0].count != 1 ||
msg->iov_count != GNIX_MAX_ATOMIC_IOV_LIMIT ||
!msg->rma_iov)
return -FI_EINVAL;
/*
* see fi_atomic man page
*/
if ((msg->op != FI_ATOMIC_READ) &&
!msg->msg_iov[0].addr)
return -FI_EINVAL;
if (flags & FI_TRIGGER) {
struct fi_triggered_context *trigger_context =
(struct fi_triggered_context *)msg->context;
if ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) ||
(flags & FI_INJECT)) {
return -FI_EINVAL;
}
}
if (fr_type == GNIX_FAB_RQ_CAMO) {
if (!comparev || !comparev[0].addr || compare_count != 1)
return -FI_EINVAL;
compare_operand = *(uint64_t *)comparev[0].addr;
}
dt_len = ofi_datatype_size(msg->datatype);
dt_align = dt_len - 1;
len = dt_len * msg->msg_iov->count;
if (msg->rma_iov->addr & dt_align) {
GNIX_INFO(FI_LOG_EP_DATA,
"Invalid target alignment: %d (mask 0x%x)\n",
msg->rma_iov->addr, dt_align);
return -FI_EINVAL;
}
/* need a memory descriptor for all fetching and comparison AMOs */
if (fr_type == GNIX_FAB_RQ_FAMO ||
fr_type == GNIX_FAB_RQ_CAMO ||
fr_type == GNIX_FAB_RQ_NAMO_FAX ||
fr_type == GNIX_FAB_RQ_NAMO_FAX_S) {
if (!resultv || !resultv[0].addr || result_count != 1)
return -FI_EINVAL;
loc_addr = resultv[0].addr;
if ((uint64_t)loc_addr & dt_align) {
GNIX_INFO(FI_LOG_EP_DATA,
"Invalid source alignment: %d (mask 0x%x)\n",
loc_addr, dt_align);
return -FI_EINVAL;
}
if (!result_desc || !result_desc[0]) {
rc = _gnix_mr_reg(&ep->domain->domain_fid.fid,
loc_addr, len, FI_READ | FI_WRITE,
0, 0, 0, &auto_mr,
NULL, ep->auth_key, GNIX_PROV_REG);
if (rc != FI_SUCCESS) {
GNIX_INFO(FI_LOG_EP_DATA,
"Failed to auto-register local buffer: %d\n",
rc);
return rc;
}
flags |= FI_LOCAL_MR;
mdesc = (void *)auto_mr;
GNIX_INFO(FI_LOG_EP_DATA, "auto-reg MR: %p\n",
auto_mr);
} else {
mdesc = result_desc[0];
}
}
/* setup fabric request */
req = _gnix_fr_alloc(ep);
if (!req) {
GNIX_INFO(FI_LOG_EP_DATA, "_gnix_fr_alloc() failed\n");
rc = -FI_ENOSPC;
goto err_fr_alloc;
}
req->type = fr_type;
req->gnix_ep = ep;
req->user_context = msg->context;
req->work_fn = _gnix_amo_post_req;
if (mdesc) {
md = container_of(mdesc, struct gnix_fid_mem_desc, mr_fid);
}
req->amo.loc_md = (void *)md;
req->amo.loc_addr = (uint64_t)loc_addr;
if ((fr_type == GNIX_FAB_RQ_NAMO_AX) ||
(fr_type == GNIX_FAB_RQ_NAMO_FAX) ||
(fr_type == GNIX_FAB_RQ_NAMO_AX_S) ||
(fr_type == GNIX_FAB_RQ_NAMO_FAX_S)) {
req->amo.first_operand =
*(uint64_t *)msg->msg_iov[0].addr;
req->amo.second_operand =
*((uint64_t *)(msg->msg_iov[0].addr) + 1);
} else if (msg->op == FI_ATOMIC_READ) {
req->amo.first_operand = 0xFFFFFFFFFFFFFFFF; /* operand to FAND */
} else if (msg->op == FI_CSWAP) {
req->amo.first_operand = compare_operand;
req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr;
} else if (msg->op == FI_MSWAP) {
req->amo.first_operand = ~compare_operand;
req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr;
req->amo.second_operand &= compare_operand;
} else {
req->amo.first_operand = *(uint64_t *)msg->msg_iov[0].addr;
}
req->amo.rem_addr = msg->rma_iov->addr;
req->amo.rem_mr_key = msg->rma_iov->key;
req->amo.len = len;
req->amo.imm = msg->data;
req->amo.datatype = msg->datatype;
req->amo.op = msg->op;
req->flags = flags;
/* Inject interfaces always suppress completions. If
* SELECTIVE_COMPLETION is set, honor any setting. Otherwise, always
* deliver a completion. */
if ((flags & GNIX_SUPPRESS_COMPLETION) ||
(ep->send_selective_completion && !(flags & FI_COMPLETION))) {
req->flags &= ~FI_COMPLETION;
} else {
req->flags |= FI_COMPLETION;
}
COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);
/* find VC for target */
rc = _gnix_vc_ep_get_vc(ep, msg->addr, &vc);
if (rc) {
GNIX_INFO(FI_LOG_EP_DATA,
"_gnix_vc_ep_get_vc() failed, addr: %lx, rc:\n",
msg->addr, rc);
goto err_get_vc;
}
req->vc = vc;
rc = _gnix_vc_queue_tx_req(req);
connected = (vc->conn_state == GNIX_VC_CONNECTED);
COND_RELEASE(ep->requires_lock, &ep->vc_lock);
/*
*If a new VC was allocated, progress CM before returning.
* If the VC is connected and there's a backlog, poke
* the nic progress engine befure returning.
*/
if (!connected) {
_gnix_cm_nic_progress(ep->cm_nic);
} else if (!dlist_empty(&vc->tx_queue)) {
_gnix_nic_progress(vc->ep->nic);
}
return rc;
err_get_vc:
COND_RELEASE(ep->requires_lock, &ep->vc_lock);
err_fr_alloc:
if (auto_mr) {
fi_close(&auto_mr->fid);
}
return rc;
}