in hw/hfi1/rc.c [2763:3207]
void hfi1_rc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
void *data = packet->payload;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_qp_priv *qpriv = qp->priv;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct ib_other_headers *ohdr = packet->ohdr;
u32 opcode = packet->opcode;
u32 hdrsize = packet->hlen;
u32 psn = ib_bth_get_psn(packet->ohdr);
u32 pad = packet->pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
int diff;
struct ib_reth *reth;
unsigned long flags;
int ret;
bool copy_last = false, fecn;
u32 rkey;
u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
lockdep_assert_held(&qp->r_lock);
if (hfi1_ruc_check_hdr(ibp, packet))
return;
fecn = process_ecn(qp, packet);
opfn_trigger_conn_request(qp, be32_to_cpu(ohdr->bth[1]));
/*
* Process responses (ACKs) before anything else. Note that the
* packet sequence number will be for something in the send work
* queue rather than the expected receive packet sequence number.
* In other words, this QP is the requester.
*/
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
rc_rcv_resp(packet);
return;
}
/* Compute 24 bits worth of difference. */
diff = delta_psn(psn, qp->r_psn);
if (unlikely(diff)) {
if (rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
return;
goto send_ack;
}
/* Check for opcode sequence errors. */
switch (qp->r_state) {
case OP(SEND_FIRST):
case OP(SEND_MIDDLE):
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
opcode == OP(SEND_LAST_WITH_INVALIDATE))
break;
goto nack_inv;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_MIDDLE):
if (opcode == OP(RDMA_WRITE_MIDDLE) ||
opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
break;
goto nack_inv;
default:
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
opcode == OP(SEND_LAST_WITH_INVALIDATE) ||
opcode == OP(RDMA_WRITE_MIDDLE) ||
opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
goto nack_inv;
/*
* Note that it is up to the requester to not send a new
* RDMA read or atomic operation before receiving an ACK
* for the previous operation.
*/
break;
}
if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
goto rnr_nak;
qp->r_rcv_len = 0;
fallthrough;
case OP(SEND_MIDDLE):
case OP(RDMA_WRITE_MIDDLE):
send_middle:
/* Check for invalid length PMTU or posted rwqe len. */
/*
* There will be no padding for 9B packet but 16B packets
* will come in with some padding since we always add
* CRC and LT bytes which will need to be flit aligned
*/
if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
goto nack_inv;
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
/* consume RWQE */
ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret)
goto rnr_nak;
goto send_last_imm;
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
case OP(SEND_ONLY_WITH_INVALIDATE):
ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
goto rnr_nak;
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
goto no_immediate_data;
if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
goto send_last_inv;
fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
goto send_last;
case OP(SEND_LAST_WITH_INVALIDATE):
send_last_inv:
rkey = be32_to_cpu(ohdr->u.ieth);
if (rvt_invalidate_rkey(qp, rkey))
goto no_immediate_data;
wc.ex.invalidate_rkey = rkey;
wc.wc_flags = IB_WC_WITH_INVALIDATE;
goto send_last;
case OP(RDMA_WRITE_LAST):
copy_last = rvt_is_user_qp(qp);
fallthrough;
case OP(SEND_LAST):
no_immediate_data:
wc.wc_flags = 0;
wc.ex.imm_data = 0;
send_last:
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + extra_bytes)))
goto nack_inv;
/* Don't count the CRC(and padding and LT byte for 16B). */
tlen -= (hdrsize + extra_bytes);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
break;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
else
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
* 11.4.2 of IBTA Vol. 1).
*
* However, the way the SL is chosen below is consistent
* with the way that IB/qib works and is trying avoid
* introducing incompatibilities.
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
break;
case OP(RDMA_WRITE_ONLY):
copy_last = rvt_is_user_qp(qp);
fallthrough;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto nack_inv;
/* consume RWQE */
reth = &ohdr->u.rc.reth;
qp->r_len = be32_to_cpu(reth->length);
qp->r_rcv_len = 0;
qp->r_sge.sg_list = NULL;
if (qp->r_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
/* Check rkey & NAK */
ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
rkey, IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
goto nack_acc;
qp->r_sge.num_sge = 1;
} else {
qp->r_sge.num_sge = 0;
qp->r_sge.sge.mr = NULL;
qp->r_sge.sge.vaddr = NULL;
qp->r_sge.sge.length = 0;
qp->r_sge.sge.sge_length = 0;
}
if (opcode == OP(RDMA_WRITE_FIRST))
goto send_middle;
else if (opcode == OP(RDMA_WRITE_ONLY))
goto no_immediate_data;
ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret) {
/* peer will send again */
rvt_put_ss(&qp->r_sge);
goto rnr_nak;
}
wc.ex.imm_data = ohdr->u.rc.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
goto send_last;
case OP(RDMA_READ_REQUEST): {
struct rvt_ack_entry *e;
u32 len;
u8 next;
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto nack_inv;
next = qp->r_head_ack_queue + 1;
/* s_ack_queue is size rvt_size_atomic()+1 so use > not >= */
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (unlikely(next == qp->s_acked_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
goto nack_inv_unlck;
update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
release_rdma_sge_mr(e);
reth = &ohdr->u.rc.reth;
len = be32_to_cpu(reth->length);
if (len) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
/* Check rkey & NAK */
ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
rkey, IB_ACCESS_REMOTE_READ);
if (unlikely(!ok))
goto nack_acc_unlck;
/*
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
qp->r_psn += rvt_div_mtu(qp, len - 1);
} else {
e->rdma_sge.mr = NULL;
e->rdma_sge.vaddr = NULL;
e->rdma_sge.length = 0;
e->rdma_sge.sge_length = 0;
}
e->opcode = opcode;
e->sent = 0;
e->psn = psn;
e->lpsn = qp->r_psn;
/*
* We need to increment the MSN here instead of when we
* finish sending the result since a duplicate request would
* increment it more than once.
*/
qp->r_msn++;
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
qpriv->r_tid_alloc = qp->r_head_ack_queue;
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
if (fecn)
qp->s_flags |= RVT_S_ECN;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
return;
}
case OP(COMPARE_SWAP):
case OP(FETCH_ADD): {
struct ib_atomic_eth *ateth = &ohdr->u.atomic_eth;
u64 vaddr = get_ib_ateth_vaddr(ateth);
bool opfn = opcode == OP(COMPARE_SWAP) &&
vaddr == HFI1_VERBS_E_ATOMIC_VADDR;
struct rvt_ack_entry *e;
atomic64_t *maddr;
u64 sdata;
u32 rkey;
u8 next;
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
!opfn))
goto nack_inv;
next = qp->r_head_ack_queue + 1;
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (unlikely(next == qp->s_acked_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
goto nack_inv_unlck;
update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
release_rdma_sge_mr(e);
/* Process OPFN special virtual address */
if (opfn) {
opfn_conn_response(qp, e, ateth);
goto ack;
}
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
sdata = get_ib_ateth_swap(ateth);
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
(u64)atomic64_add_return(sdata, maddr) - sdata :
(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
get_ib_ateth_compare(ateth),
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
ack:
e->opcode = opcode;
e->sent = 0;
e->psn = psn;
e->lpsn = psn;
qp->r_msn++;
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
qpriv->r_tid_alloc = qp->r_head_ack_queue;
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
if (fecn)
qp->s_flags |= RVT_S_ECN;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
return;
}
default:
/* NAK unknown opcodes. */
goto nack_inv;
}
qp->r_psn++;
qp->r_state = opcode;
qp->r_ack_psn = psn;
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
if (psn & IB_BTH_REQ_ACK || fecn) {
if (packet->numpkt == 0 || fecn ||
qp->r_adefered >= HFI1_PSN_CREDIT) {
rc_cancel_ack(qp);
goto send_ack;
}
qp->r_adefered++;
rc_defered_ack(rcd, qp);
}
return;
rnr_nak:
qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK;
qp->r_ack_psn = qp->r_psn;
/* Queue RNR NAK for later */
rc_defered_ack(rcd, qp);
return;
nack_op_err:
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
rc_defered_ack(rcd, qp);
return;
nack_inv_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_inv:
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
rc_defered_ack(rcd, qp);
return;
nack_acc_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
hfi1_send_rc_ack(packet, fecn);
}