in hw/hfi1/rc.c [67:377]
static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
struct ib_other_headers *ohdr,
struct hfi1_pkt_state *ps)
{
struct rvt_ack_entry *e;
u32 hwords, hdrlen;
u32 len = 0;
u32 bth0 = 0, bth2 = 0;
u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
int middle = 0;
u32 pmtu = qp->pmtu;
struct hfi1_qp_priv *qpriv = qp->priv;
bool last_pkt;
u32 delta;
u8 next = qp->s_tail_ack_queue;
struct tid_rdma_request *req;
trace_hfi1_rsp_make_rc_ack(qp, 0);
lockdep_assert_held(&qp->s_lock);
/* Don't send an ACK if we aren't supposed to. */
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto bail;
if (qpriv->hdr_type == HFI1_PKT_TYPE_9B)
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
else
/* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
hwords = 7;
switch (qp->s_ack_state) {
case OP(RDMA_READ_RESPONSE_LAST):
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
release_rdma_sge_mr(e);
fallthrough;
case OP(ATOMIC_ACKNOWLEDGE):
/*
* We can increment the tail pointer now that the last
* response has been sent instead of only being
* constructed.
*/
if (++next > rvt_size_atomic(&dev->rdi))
next = 0;
/*
* Only advance the s_acked_ack_queue pointer if there
* have been no TID RDMA requests.
*/
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
if (e->opcode != TID_OP(WRITE_REQ) &&
qp->s_acked_ack_queue == qp->s_tail_ack_queue)
qp->s_acked_ack_queue = next;
qp->s_tail_ack_queue = next;
trace_hfi1_rsp_make_rc_ack(qp, e->psn);
fallthrough;
case OP(SEND_ONLY):
case OP(ACKNOWLEDGE):
/* Check for no next entry in the queue. */
if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
if (qp->s_flags & RVT_S_ACK_PENDING)
goto normal;
goto bail;
}
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
/* Check for tid write fence */
if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
hfi1_tid_rdma_ack_interlock(qp, e)) {
iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
goto bail;
}
if (e->opcode == OP(RDMA_READ_REQUEST)) {
/*
* If a RDMA read response is being resent and
* we haven't seen the duplicate request yet,
* then stop sending the remaining responses the
* responder has seen until the requester re-sends it.
*/
len = e->rdma_sge.sge_length;
if (len && !e->rdma_sge.mr) {
if (qp->s_acked_ack_queue ==
qp->s_tail_ack_queue)
qp->s_acked_ack_queue =
qp->r_head_ack_queue;
qp->s_tail_ack_queue = qp->r_head_ack_queue;
goto bail;
}
/* Copy SGE state in case we need to resend */
ps->s_txreq->mr = e->rdma_sge.mr;
if (ps->s_txreq->mr)
rvt_get_mr(ps->s_txreq->mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
if (len > pmtu) {
len = pmtu;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
} else {
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
e->sent = 1;
}
ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_rdma_psn = e->psn;
bth2 = mask_psn(qp->s_ack_rdma_psn++);
} else if (e->opcode == TID_OP(WRITE_REQ)) {
/*
* If a TID RDMA WRITE RESP is being resent, we have to
* wait for the actual request. All requests that are to
* be resent will have their state set to
* TID_REQUEST_RESEND. When the new request arrives, the
* state will be changed to TID_REQUEST_RESEND_ACTIVE.
*/
req = ack_to_tid_req(e);
if (req->state == TID_REQUEST_RESEND ||
req->state == TID_REQUEST_INIT_RESEND)
goto bail;
qp->s_ack_state = TID_OP(WRITE_RESP);
qp->s_ack_rdma_psn = mask_psn(e->psn + req->cur_seg);
goto write_resp;
} else if (e->opcode == TID_OP(READ_REQ)) {
/*
* If a TID RDMA read response is being resent and
* we haven't seen the duplicate request yet,
* then stop sending the remaining responses the
* responder has seen until the requester re-sends it.
*/
len = e->rdma_sge.sge_length;
if (len && !e->rdma_sge.mr) {
if (qp->s_acked_ack_queue ==
qp->s_tail_ack_queue)
qp->s_acked_ack_queue =
qp->r_head_ack_queue;
qp->s_tail_ack_queue = qp->r_head_ack_queue;
goto bail;
}
/* Copy SGE state in case we need to resend */
ps->s_txreq->mr = e->rdma_sge.mr;
if (ps->s_txreq->mr)
rvt_get_mr(ps->s_txreq->mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
qp->s_ack_state = TID_OP(READ_RESP);
goto read_resp;
} else {
/* COMPARE_SWAP or FETCH_ADD */
ps->s_txreq->ss = NULL;
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = rvt_compute_aeth(qp);
ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = mask_psn(e->psn);
e->sent = 1;
}
trace_hfi1_tid_write_rsp_make_rc_ack(qp);
bth0 = qp->s_ack_state << 24;
break;
case OP(RDMA_READ_RESPONSE_FIRST):
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
fallthrough;
case OP(RDMA_READ_RESPONSE_MIDDLE):
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
if (ps->s_txreq->mr)
rvt_get_mr(ps->s_txreq->mr);
len = qp->s_ack_rdma_sge.sge.sge_length;
if (len > pmtu) {
len = pmtu;
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
} else {
ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
e->sent = 1;
}
bth0 = qp->s_ack_state << 24;
bth2 = mask_psn(qp->s_ack_rdma_psn++);
break;
case TID_OP(WRITE_RESP):
write_resp:
/*
* 1. Check if RVT_S_ACK_PENDING is set. If yes,
* goto normal.
* 2. Attempt to allocate TID resources.
* 3. Remove RVT_S_RESP_PENDING flags from s_flags
* 4. If resources not available:
* 4.1 Set RVT_S_WAIT_TID_SPACE
* 4.2 Queue QP on RCD TID queue
* 4.3 Put QP on iowait list.
* 4.4 Build IB RNR NAK with appropriate timeout value
* 4.5 Return indication progress made.
* 5. If resources are available:
* 5.1 Program HW flow CSRs
* 5.2 Build TID RDMA WRITE RESP packet
* 5.3 If more resources needed, do 2.1 - 2.3.
* 5.4 Wake up next QP on RCD TID queue.
* 5.5 Return indication progress made.
*/
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
req = ack_to_tid_req(e);
/*
* Send scheduled RNR NAK's. RNR NAK's need to be sent at
* segment boundaries, not at request boundaries. Don't change
* s_ack_state because we are still in the middle of a request
*/
if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND &&
qp->s_tail_ack_queue == qpriv->r_tid_alloc &&
req->cur_seg == req->alloc_seg) {
qpriv->rnr_nak_state = TID_RNR_NAK_SENT;
goto normal_no_state;
}
bth2 = mask_psn(qp->s_ack_rdma_psn);
hdrlen = hfi1_build_tid_rdma_write_resp(qp, e, ohdr, &bth1,
bth2, &len,
&ps->s_txreq->ss);
if (!hdrlen)
return 0;
hwords += hdrlen;
bth0 = qp->s_ack_state << 24;
qp->s_ack_rdma_psn++;
trace_hfi1_tid_req_make_rc_ack_write(qp, 0, e->opcode, e->psn,
e->lpsn, req);
if (req->cur_seg != req->total_segs)
break;
e->sent = 1;
/* Do not free e->rdma_sge until all data are received */
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
break;
case TID_OP(READ_RESP):
read_resp:
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
delta = hfi1_build_tid_rdma_read_resp(qp, e, ohdr, &bth0,
&bth1, &bth2, &len,
&last_pkt);
if (delta == 0)
goto error_qp;
hwords += delta;
if (last_pkt) {
e->sent = 1;
/*
* Increment qp->s_tail_ack_queue through s_ack_state
* transition.
*/
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
}
break;
case TID_OP(READ_REQ):
goto bail;
default:
normal:
/*
* Send a regular ACK.
* Set the s_ack_state so we wait until after sending
* the ACK before setting s_ack_state to ACKNOWLEDGE
* (see above).
*/
qp->s_ack_state = OP(SEND_ONLY);
normal_no_state:
if (qp->s_nak_state)
ohdr->u.aeth =
cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
(qp->s_nak_state <<
IB_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
len = 0;
bth0 = OP(ACKNOWLEDGE) << 24;
bth2 = mask_psn(qp->s_ack_psn);
qp->s_flags &= ~RVT_S_ACK_PENDING;
ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
ps->s_txreq->ss = NULL;
}
qp->s_rdma_ack_cnt++;
ps->s_txreq->sde = qpriv->s_sde;
ps->s_txreq->s_cur_size = len;
ps->s_txreq->hdr_dwords = hwords;
hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps);
return 1;
error_qp:
spin_unlock_irqrestore(&qp->s_lock, ps->flags);
spin_lock_irqsave(&qp->r_lock, ps->flags);
spin_lock(&qp->s_lock);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
spin_unlock(&qp->s_lock);
spin_unlock_irqrestore(&qp->r_lock, ps->flags);
spin_lock_irqsave(&qp->s_lock, ps->flags);
bail:
qp->s_ack_state = OP(ACKNOWLEDGE);
/*
* Ensure s_rdma_ack_cnt changes are committed prior to resetting
* RVT_S_RESP_PENDING
*/
smp_wmb();
qp->s_flags &= ~(RVT_S_RESP_PENDING
| RVT_S_ACK_PENDING
| HFI1_S_AHG_VALID);
return 0;
}