in hw/hfi1/rc.c [388:1178]
int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_other_headers *ohdr;
struct rvt_sge_state *ss = NULL;
struct rvt_swqe *wqe;
struct hfi1_swqe_priv *wpriv;
struct tid_rdma_request *req = NULL;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
u32 hwords = 5;
u32 len = 0;
u32 bth0 = 0, bth2 = 0;
u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
u32 pmtu = qp->pmtu;
char newreq;
int middle = 0;
int delta;
struct tid_rdma_flow *flow = NULL;
struct tid_rdma_params *remote;
trace_hfi1_sender_make_rc_req(qp);
lockdep_assert_held(&qp->s_lock);
ps->s_txreq = get_txreq(ps->dev, qp);
if (!ps->s_txreq)
goto bail_no_tx;
if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
else
ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
} else {
/* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
hwords = 7;
if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
(hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
else
ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
}
/* Sending responses has higher priority over sending requests. */
if ((qp->s_flags & RVT_S_RESP_PENDING) &&
make_rc_ack(dev, qp, ohdr, ps))
return 1;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (iowait_sdma_pending(&priv->s_iowait)) {
qp->s_flags |= RVT_S_WAIT_DMA;
goto bail;
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done_free_tx;
}
if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK | HFI1_S_WAIT_HALT))
goto bail;
if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
qp->s_flags |= RVT_S_WAIT_PSN;
goto bail;
}
qp->s_sending_psn = qp->s_psn;
qp->s_sending_hpsn = qp->s_psn - 1;
}
/* Send a request. */
wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
check_s_state:
switch (qp->s_state) {
default:
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
goto bail;
/*
* Resend an old request or start a new one.
*
* We keep track of the current SWQE so that
* we don't reset the "furthest progress" state
* if we need to back up.
*/
newreq = 0;
if (qp->s_cur == qp->s_tail) {
/* Check if send work queue is empty. */
if (qp->s_tail == READ_ONCE(qp->s_head)) {
clear_ahg(qp);
goto bail;
}
/*
* If a fence is requested, wait for previous
* RDMA read and atomic operations to finish.
* However, there is no need to guard against
* TID RDMA READ after TID RDMA READ.
*/
if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
qp->s_num_rd_atomic &&
(wqe->wr.opcode != IB_WR_TID_RDMA_READ ||
priv->pending_tid_r_segs < qp->s_num_rd_atomic)) {
qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail;
}
/*
* Local operations are processed immediately
* after all prior requests have completed
*/
if (wqe->wr.opcode == IB_WR_REG_MR ||
wqe->wr.opcode == IB_WR_LOCAL_INV) {
int local_ops = 0;
int err = 0;
if (qp->s_last != qp->s_cur)
goto bail;
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
if (++qp->s_tail == qp->s_size)
qp->s_tail = 0;
if (!(wqe->wr.send_flags &
RVT_SEND_COMPLETION_ONLY)) {
err = rvt_invalidate_rkey(
qp,
wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
rvt_send_complete(qp, wqe,
err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
goto done_free_tx;
}
newreq = 1;
qp->s_psn = wqe->psn;
}
/*
* Note that we have to be careful not to modify the
* original work request since we may need to resend
* it.
*/
len = wqe->length;
ss = &qp->s_sge;
bth2 = mask_psn(qp->s_psn);
/*
* Interlock between various IB requests and TID RDMA
* if necessary.
*/
if ((priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) ||
hfi1_tid_rdma_wqe_interlock(qp, wqe))
goto bail;
switch (wqe->wr.opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
case IB_WR_SEND_WITH_INV:
/* If no credit, return. */
if (!rvt_rc_credit_avail(qp, wqe))
goto bail;
if (len > pmtu) {
qp->s_state = OP(SEND_FIRST);
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_SEND) {
qp->s_state = OP(SEND_ONLY);
} else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
} else {
qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE);
/* Invalidate rkey comes after the BTH */
ohdr->u.ieth = cpu_to_be32(
wqe->wr.ex.invalidate_rkey);
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
bth2 |= IB_BTH_REQ_ACK;
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_RDMA_WRITE:
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
goto no_flow_control;
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
if (!rvt_rc_credit_avail(qp, wqe))
goto bail;
no_flow_control:
put_ib_reth_vaddr(
wqe->rdma_wr.remote_addr,
&ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
if (len > pmtu) {
qp->s_state = OP(RDMA_WRITE_FIRST);
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
qp->s_state = OP(RDMA_WRITE_ONLY);
} else {
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after RETH */
ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
}
bth2 |= IB_BTH_REQ_ACK;
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_TID_RDMA_WRITE:
if (newreq) {
/*
* Limit the number of TID RDMA WRITE requests.
*/
if (atomic_read(&priv->n_tid_requests) >=
HFI1_TID_RDMA_WRITE_CNT)
goto bail;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
}
hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr,
&bth1, &bth2,
&len);
ss = NULL;
if (priv->s_tid_cur == HFI1_QP_WQE_INVALID) {
priv->s_tid_cur = qp->s_cur;
if (priv->s_tid_tail == HFI1_QP_WQE_INVALID) {
priv->s_tid_tail = qp->s_cur;
priv->s_state = TID_OP(WRITE_RESP);
}
} else if (priv->s_tid_cur == priv->s_tid_head) {
struct rvt_swqe *__w;
struct tid_rdma_request *__r;
__w = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
__r = wqe_to_tid_req(__w);
/*
* The s_tid_cur pointer is advanced to s_cur if
* any of the following conditions about the WQE
* to which s_ti_cur currently points to are
* satisfied:
* 1. The request is not a TID RDMA WRITE
* request,
* 2. The request is in the INACTIVE or
* COMPLETE states (TID RDMA READ requests
* stay at INACTIVE and TID RDMA WRITE
* transition to COMPLETE when done),
* 3. The request is in the ACTIVE or SYNC
* state and the number of completed
* segments is equal to the total segment
* count.
* (If ACTIVE, the request is waiting for
* ACKs. If SYNC, the request has not
* received any responses because it's
* waiting on a sync point.)
*/
if (__w->wr.opcode != IB_WR_TID_RDMA_WRITE ||
__r->state == TID_REQUEST_INACTIVE ||
__r->state == TID_REQUEST_COMPLETE ||
((__r->state == TID_REQUEST_ACTIVE ||
__r->state == TID_REQUEST_SYNC) &&
__r->comp_seg == __r->total_segs)) {
if (priv->s_tid_tail ==
priv->s_tid_cur &&
priv->s_state ==
TID_OP(WRITE_DATA_LAST)) {
priv->s_tid_tail = qp->s_cur;
priv->s_state =
TID_OP(WRITE_RESP);
}
priv->s_tid_cur = qp->s_cur;
}
/*
* A corner case: when the last TID RDMA WRITE
* request was completed, s_tid_head,
* s_tid_cur, and s_tid_tail all point to the
* same location. Other requests are posted and
* s_cur wraps around to the same location,
* where a new TID RDMA WRITE is posted. In
* this case, none of the indices need to be
* updated. However, the priv->s_state should.
*/
if (priv->s_tid_tail == qp->s_cur &&
priv->s_state == TID_OP(WRITE_DATA_LAST))
priv->s_state = TID_OP(WRITE_RESP);
}
req = wqe_to_tid_req(wqe);
if (newreq) {
priv->s_tid_head = qp->s_cur;
priv->pending_tid_w_resp += req->total_segs;
atomic_inc(&priv->n_tid_requests);
atomic_dec(&priv->n_requests);
} else {
req->state = TID_REQUEST_RESEND;
req->comp_seg = delta_psn(bth2, wqe->psn);
/*
* Pull back any segments since we are going
* to re-receive them.
*/
req->setup_head = req->clear_tail;
priv->pending_tid_w_resp +=
delta_psn(wqe->lpsn, bth2) + 1;
}
trace_hfi1_tid_write_sender_make_req(qp, newreq);
trace_hfi1_tid_req_make_req_write(qp, newreq,
wqe->wr.opcode,
wqe->psn, wqe->lpsn,
req);
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_RDMA_READ:
/*
* Don't allow more operations to be started
* than the QP limits allow.
*/
if (qp->s_num_rd_atomic >=
qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
qp->s_num_rd_atomic++;
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
put_ib_reth_vaddr(
wqe->rdma_wr.remote_addr,
&ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
ss = NULL;
len = 0;
bth2 |= IB_BTH_REQ_ACK;
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_TID_RDMA_READ:
trace_hfi1_tid_read_sender_make_req(qp, newreq);
wpriv = wqe->priv;
req = wqe_to_tid_req(wqe);
trace_hfi1_tid_req_make_req_read(qp, newreq,
wqe->wr.opcode,
wqe->psn, wqe->lpsn,
req);
delta = cmp_psn(qp->s_psn, wqe->psn);
/*
* Don't allow more operations to be started
* than the QP limits allow. We could get here under
* three conditions; (1) It's a new request; (2) We are
* sending the second or later segment of a request,
* but the qp->s_state is set to OP(RDMA_READ_REQUEST)
* when the last segment of a previous request is
* received just before this; (3) We are re-sending a
* request.
*/
if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
if (newreq) {
struct tid_rdma_flow *flow =
&req->flows[req->setup_head];
/*
* Set up s_sge as it is needed for TID
* allocation. However, if the pages have been
* walked and mapped, skip it. An earlier try
* has failed to allocate the TID entries.
*/
if (!flow->npagesets) {
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
qp->s_len = wqe->length;
req->isge = 0;
req->clear_tail = req->setup_head;
req->flow_idx = req->setup_head;
req->state = TID_REQUEST_ACTIVE;
}
} else if (delta == 0) {
/* Re-send a request */
req->cur_seg = 0;
req->comp_seg = 0;
req->ack_pending = 0;
req->flow_idx = req->clear_tail;
req->state = TID_REQUEST_RESEND;
}
req->s_next_psn = qp->s_psn;
/* Read one segment at a time */
len = min_t(u32, req->seg_len,
wqe->length - req->seg_len * req->cur_seg);
delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr,
&bth1, &bth2,
&len);
if (delta <= 0) {
/* Wait for TID space */
goto bail;
}
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
hwords += delta;
ss = &wpriv->ss;
/* Check if this is the last segment */
if (req->cur_seg >= req->total_segs &&
++qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
/*
* Don't allow more operations to be started
* than the QP limits allow.
*/
if (qp->s_num_rd_atomic >=
qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
qp->s_num_rd_atomic++;
fallthrough;
case IB_WR_OPFN:
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_OPFN) {
qp->s_state = OP(COMPARE_SWAP);
put_ib_ateth_swap(wqe->atomic_wr.swap,
&ohdr->u.atomic_eth);
put_ib_ateth_compare(wqe->atomic_wr.compare_add,
&ohdr->u.atomic_eth);
} else {
qp->s_state = OP(FETCH_ADD);
put_ib_ateth_swap(wqe->atomic_wr.compare_add,
&ohdr->u.atomic_eth);
put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
}
put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
&ohdr->u.atomic_eth);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
ss = NULL;
len = 0;
bth2 |= IB_BTH_REQ_ACK;
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
default:
goto bail;
}
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ) {
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
qp->s_len = wqe->length;
}
if (newreq) {
qp->s_tail++;
if (qp->s_tail >= qp->s_size)
qp->s_tail = 0;
}
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
qp->s_psn = wqe->lpsn + 1;
else if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
qp->s_psn = req->s_next_psn;
else
qp->s_psn++;
break;
case OP(RDMA_READ_RESPONSE_FIRST):
/*
* qp->s_state is normally set to the opcode of the
* last packet constructed for new requests and therefore
* is never set to RDMA read response.
* RDMA_READ_RESPONSE_FIRST is used by the ACK processing
* thread to indicate a SEND needs to be restarted from an
* earlier PSN without interfering with the sending thread.
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
fallthrough;
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
fallthrough;
case OP(SEND_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
len = pmtu;
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
break;
}
if (wqe->wr.opcode == IB_WR_SEND) {
qp->s_state = OP(SEND_LAST);
} else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
} else {
qp->s_state = OP(SEND_LAST_WITH_INVALIDATE);
/* invalidate data comes after the BTH */
ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey);
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
bth2 |= IB_BTH_REQ_ACK;
qp->s_cur++;
if (qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
case OP(RDMA_READ_RESPONSE_LAST):
/*
* qp->s_state is normally set to the opcode of the
* last packet constructed for new requests and therefore
* is never set to RDMA read response.
* RDMA_READ_RESPONSE_LAST is used by the ACK processing
* thread to indicate a RDMA write needs to be restarted from
* an earlier PSN without interfering with the sending thread.
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
fallthrough;
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
fallthrough;
case OP(RDMA_WRITE_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
len = pmtu;
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
break;
}
if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
qp->s_state = OP(RDMA_WRITE_LAST);
} else {
qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
}
bth2 |= IB_BTH_REQ_ACK;
qp->s_cur++;
if (qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
case OP(RDMA_READ_RESPONSE_MIDDLE):
/*
* qp->s_state is normally set to the opcode of the
* last packet constructed for new requests and therefore
* is never set to RDMA read response.
* RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
* thread to indicate a RDMA read needs to be restarted from
* an earlier PSN without interfering with the sending thread.
* See restart_rc().
*/
len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
put_ib_reth_vaddr(
wqe->rdma_wr.remote_addr + len,
&ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
bth2 = mask_psn(qp->s_psn) | IB_BTH_REQ_ACK;
qp->s_psn = wqe->lpsn + 1;
ss = NULL;
len = 0;
qp->s_cur++;
if (qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case TID_OP(WRITE_RESP):
/*
* This value for s_state is used for restarting a TID RDMA
* WRITE request. See comment in OP(RDMA_READ_RESPONSE_MIDDLE
* for more).
*/
req = wqe_to_tid_req(wqe);
req->state = TID_REQUEST_RESEND;
rcu_read_lock();
remote = rcu_dereference(priv->tid_rdma.remote);
req->comp_seg = delta_psn(qp->s_psn, wqe->psn);
len = wqe->length - (req->comp_seg * remote->max_len);
rcu_read_unlock();
bth2 = mask_psn(qp->s_psn);
hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr, &bth1,
&bth2, &len);
qp->s_psn = wqe->lpsn + 1;
ss = NULL;
qp->s_state = TID_OP(WRITE_REQ);
priv->pending_tid_w_resp += delta_psn(wqe->lpsn, bth2) + 1;
priv->s_tid_cur = qp->s_cur;
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
trace_hfi1_tid_req_make_req_write(qp, 0, wqe->wr.opcode,
wqe->psn, wqe->lpsn, req);
break;
case TID_OP(READ_RESP):
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
goto bail;
/* This is used to restart a TID read request */
req = wqe_to_tid_req(wqe);
wpriv = wqe->priv;
/*
* Back down. The field qp->s_psn has been set to the psn with
* which the request should be restart. It's OK to use division
* as this is on the retry path.
*/
req->cur_seg = delta_psn(qp->s_psn, wqe->psn) / priv->pkts_ps;
/*
* The following function need to be redefined to return the
* status to make sure that we find the flow. At the same
* time, we can use the req->state change to check if the
* call succeeds or not.
*/
req->state = TID_REQUEST_RESEND;
hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
if (req->state != TID_REQUEST_ACTIVE) {
/*
* Failed to find the flow. Release all allocated tid
* resources.
*/
hfi1_kern_exp_rcv_clear_all(req);
hfi1_kern_clear_hw_flow(priv->rcd, qp);
hfi1_trdma_send_complete(qp, wqe, IB_WC_LOC_QP_OP_ERR);
goto bail;
}
req->state = TID_REQUEST_RESEND;
len = min_t(u32, req->seg_len,
wqe->length - req->seg_len * req->cur_seg);
flow = &req->flows[req->flow_idx];
len -= flow->sent;
req->s_next_psn = flow->flow_state.ib_lpsn + 1;
delta = hfi1_build_tid_rdma_read_packet(wqe, ohdr, &bth1,
&bth2, &len);
if (delta <= 0) {
/* Wait for TID space */
goto bail;
}
hwords += delta;
ss = &wpriv->ss;
/* Check if this is the last segment */
if (req->cur_seg >= req->total_segs &&
++qp->s_cur == qp->s_size)
qp->s_cur = 0;
qp->s_psn = req->s_next_psn;
trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
wqe->psn, wqe->lpsn, req);
break;
case TID_OP(READ_REQ):
req = wqe_to_tid_req(wqe);
delta = cmp_psn(qp->s_psn, wqe->psn);
/*
* If the current WR is not TID RDMA READ, or this is the start
* of a new request, we need to change the qp->s_state so that
* the request can be set up properly.
*/
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ || delta == 0 ||
qp->s_cur == qp->s_tail) {
qp->s_state = OP(RDMA_READ_REQUEST);
if (delta == 0 || qp->s_cur == qp->s_tail)
goto check_s_state;
else
goto bail;
}
/* Rate limiting */
if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
wpriv = wqe->priv;
/* Read one segment at a time */
len = min_t(u32, req->seg_len,
wqe->length - req->seg_len * req->cur_seg);
delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr, &bth1,
&bth2, &len);
if (delta <= 0) {
/* Wait for TID space */
goto bail;
}
hwords += delta;
ss = &wpriv->ss;
/* Check if this is the last segment */
if (req->cur_seg >= req->total_segs &&
++qp->s_cur == qp->s_size)
qp->s_cur = 0;
qp->s_psn = req->s_next_psn;
trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
wqe->psn, wqe->lpsn, req);
break;
}
qp->s_sending_hpsn = bth2;
delta = delta_psn(bth2, wqe->psn);
if (delta && delta % HFI1_PSN_CREDIT == 0 &&
wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
bth2 |= IB_BTH_REQ_ACK;
if (qp->s_flags & RVT_S_SEND_ONE) {
qp->s_flags &= ~RVT_S_SEND_ONE;
qp->s_flags |= RVT_S_WAIT_ACK;
bth2 |= IB_BTH_REQ_ACK;
}
qp->s_len -= len;
ps->s_txreq->hdr_dwords = hwords;
ps->s_txreq->sde = priv->s_sde;
ps->s_txreq->ss = ss;
ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(
qp,
ohdr,
bth0 | (qp->s_state << 24),
bth1,
bth2,
middle,
ps);
return 1;
done_free_tx:
hfi1_put_txreq(ps->s_txreq);
ps->s_txreq = NULL;
return 1;
bail:
hfi1_put_txreq(ps->s_txreq);
bail_no_tx:
ps->s_txreq = NULL;
qp->s_flags &= ~RVT_S_BUSY;
/*
* If we didn't get a txreq, the QP will be woken up later to try
* again. Set the flags to indicate which work item to wake
* up.
*/
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
return 0;
}