in hw/qib/qib_rc.c [1714:2131]
void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_ibport *ibp = &rcd->ppd->ibport_data;
struct ib_other_headers *ohdr;
u32 opcode;
u32 hdrsize;
u32 psn;
u32 pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
int diff;
struct ib_reth *reth;
unsigned long flags;
int ret;
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
hdrsize = 8 + 12; /* LRH + BTH */
} else {
ohdr = &hdr->u.l.oth;
hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
}
opcode = be32_to_cpu(ohdr->bth[0]);
if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
return;
psn = be32_to_cpu(ohdr->bth[2]);
opcode >>= 24;
/*
* Process responses (ACKs) before anything else. Note that the
* packet sequence number will be for something in the send work
* queue rather than the expected receive packet sequence number.
* In other words, this QP is the requester.
*/
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
hdrsize, pmtu, rcd);
return;
}
/* Compute 24 bits worth of difference. */
diff = qib_cmp24(psn, qp->r_psn);
if (unlikely(diff)) {
if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
return;
goto send_ack;
}
/* Check for opcode sequence errors. */
switch (qp->r_state) {
case OP(SEND_FIRST):
case OP(SEND_MIDDLE):
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break;
goto nack_inv;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_MIDDLE):
if (opcode == OP(RDMA_WRITE_MIDDLE) ||
opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
break;
goto nack_inv;
default:
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
opcode == OP(RDMA_WRITE_MIDDLE) ||
opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
goto nack_inv;
/*
* Note that it is up to the requester to not send a new
* RDMA read or atomic operation before receiving an ACK
* for the previous operation.
*/
break;
}
if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
goto rnr_nak;
qp->r_rcv_len = 0;
fallthrough;
case OP(SEND_MIDDLE):
case OP(RDMA_WRITE_MIDDLE):
send_middle:
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4)))
goto nack_inv;
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
/* consume RWQE */
ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret)
goto rnr_nak;
goto send_last_imm;
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
goto rnr_nak;
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
goto no_immediate_data;
fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
goto send_last;
case OP(SEND_LAST):
case OP(RDMA_WRITE_LAST):
no_immediate_data:
wc.wc_flags = 0;
wc.ex.imm_data = 0;
send_last:
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
goto nack_inv;
/* Don't count the CRC. */
tlen -= (hdrsize + pad + 4);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
break;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
else
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
break;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto nack_inv;
/* consume RWQE */
reth = &ohdr->u.rc.reth;
hdrsize += sizeof(*reth);
qp->r_len = be32_to_cpu(reth->length);
qp->r_rcv_len = 0;
qp->r_sge.sg_list = NULL;
if (qp->r_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
/* Check rkey & NAK */
ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
rkey, IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
goto nack_acc;
qp->r_sge.num_sge = 1;
} else {
qp->r_sge.num_sge = 0;
qp->r_sge.sge.mr = NULL;
qp->r_sge.sge.vaddr = NULL;
qp->r_sge.sge.length = 0;
qp->r_sge.sge.sge_length = 0;
}
if (opcode == OP(RDMA_WRITE_FIRST))
goto send_middle;
else if (opcode == OP(RDMA_WRITE_ONLY))
goto no_immediate_data;
ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret) {
rvt_put_ss(&qp->r_sge);
goto rnr_nak;
}
wc.ex.imm_data = ohdr->u.rc.imm_data;
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
goto send_last;
case OP(RDMA_READ_REQUEST): {
struct rvt_ack_entry *e;
u32 len;
u8 next;
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto nack_inv;
next = qp->r_head_ack_queue + 1;
/* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */
if (next > QIB_MAX_RDMA_ATOMIC)
next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (unlikely(next == qp->s_tail_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
goto nack_inv_unlck;
qib_update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
reth = &ohdr->u.rc.reth;
len = be32_to_cpu(reth->length);
if (len) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
/* Check rkey & NAK */
ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
rkey, IB_ACCESS_REMOTE_READ);
if (unlikely(!ok))
goto nack_acc_unlck;
/*
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
qp->r_psn += rvt_div_mtu(qp, len - 1);
} else {
e->rdma_sge.mr = NULL;
e->rdma_sge.vaddr = NULL;
e->rdma_sge.length = 0;
e->rdma_sge.sge_length = 0;
}
e->opcode = opcode;
e->sent = 0;
e->psn = psn;
e->lpsn = qp->r_psn;
/*
* We need to increment the MSN here instead of when we
* finish sending the result since a duplicate request would
* increment it more than once.
*/
qp->r_msn++;
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
/* Schedule the send tasklet. */
qp->s_flags |= RVT_S_RESP_PENDING;
qib_schedule_send(qp);
goto sunlock;
}
case OP(COMPARE_SWAP):
case OP(FETCH_ADD): {
struct ib_atomic_eth *ateth;
struct rvt_ack_entry *e;
u64 vaddr;
atomic64_t *maddr;
u64 sdata;
u32 rkey;
u8 next;
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto nack_inv;
next = qp->r_head_ack_queue + 1;
if (next > QIB_MAX_RDMA_ATOMIC)
next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (unlikely(next == qp->s_tail_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
goto nack_inv_unlck;
qib_update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
vaddr = get_ib_ateth_vaddr(ateth);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
sdata = get_ib_ateth_swap(ateth);
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
get_ib_ateth_compare(ateth),
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
e->opcode = opcode;
e->sent = 0;
e->psn = psn;
e->lpsn = psn;
qp->r_msn++;
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
/* Schedule the send tasklet. */
qp->s_flags |= RVT_S_RESP_PENDING;
qib_schedule_send(qp);
goto sunlock;
}
default:
/* NAK unknown opcodes. */
goto nack_inv;
}
qp->r_psn++;
qp->r_state = opcode;
qp->r_ack_psn = psn;
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
if (psn & (1 << 31))
goto send_ack;
return;
rnr_nak:
qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
qp->r_ack_psn = qp->r_psn;
/* Queue RNR NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
nack_op_err:
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
nack_inv_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_inv:
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
nack_acc_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
qib_send_rc_ack(qp);
return;
sunlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
}