static int make_rc_ack()

in hw/hfi1/rc.c [67:377]


static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
		       struct ib_other_headers *ohdr,
		       struct hfi1_pkt_state *ps)
{
	struct rvt_ack_entry *e;
	u32 hwords, hdrlen;
	u32 len = 0;
	u32 bth0 = 0, bth2 = 0;
	u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
	int middle = 0;
	u32 pmtu = qp->pmtu;
	struct hfi1_qp_priv *qpriv = qp->priv;
	bool last_pkt;
	u32 delta;
	u8 next = qp->s_tail_ack_queue;
	struct tid_rdma_request *req;

	trace_hfi1_rsp_make_rc_ack(qp, 0);
	lockdep_assert_held(&qp->s_lock);
	/* Don't send an ACK if we aren't supposed to. */
	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
		goto bail;

	if (qpriv->hdr_type == HFI1_PKT_TYPE_9B)
		/* header size in 32-bit words LRH+BTH = (8+12)/4. */
		hwords = 5;
	else
		/* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
		hwords = 7;

	switch (qp->s_ack_state) {
	case OP(RDMA_READ_RESPONSE_LAST):
	case OP(RDMA_READ_RESPONSE_ONLY):
		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
		release_rdma_sge_mr(e);
		fallthrough;
	case OP(ATOMIC_ACKNOWLEDGE):
		/*
		 * We can increment the tail pointer now that the last
		 * response has been sent instead of only being
		 * constructed.
		 */
		if (++next > rvt_size_atomic(&dev->rdi))
			next = 0;
		/*
		 * Only advance the s_acked_ack_queue pointer if there
		 * have been no TID RDMA requests.
		 */
		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
		if (e->opcode != TID_OP(WRITE_REQ) &&
		    qp->s_acked_ack_queue == qp->s_tail_ack_queue)
			qp->s_acked_ack_queue = next;
		qp->s_tail_ack_queue = next;
		trace_hfi1_rsp_make_rc_ack(qp, e->psn);
		fallthrough;
	case OP(SEND_ONLY):
	case OP(ACKNOWLEDGE):
		/* Check for no next entry in the queue. */
		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
			if (qp->s_flags & RVT_S_ACK_PENDING)
				goto normal;
			goto bail;
		}

		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
		/* Check for tid write fence */
		if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
		    hfi1_tid_rdma_ack_interlock(qp, e)) {
			iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
			goto bail;
		}
		if (e->opcode == OP(RDMA_READ_REQUEST)) {
			/*
			 * If a RDMA read response is being resent and
			 * we haven't seen the duplicate request yet,
			 * then stop sending the remaining responses the
			 * responder has seen until the requester re-sends it.
			 */
			len = e->rdma_sge.sge_length;
			if (len && !e->rdma_sge.mr) {
				if (qp->s_acked_ack_queue ==
				    qp->s_tail_ack_queue)
					qp->s_acked_ack_queue =
						qp->r_head_ack_queue;
				qp->s_tail_ack_queue = qp->r_head_ack_queue;
				goto bail;
			}
			/* Copy SGE state in case we need to resend */
			ps->s_txreq->mr = e->rdma_sge.mr;
			if (ps->s_txreq->mr)
				rvt_get_mr(ps->s_txreq->mr);
			qp->s_ack_rdma_sge.sge = e->rdma_sge;
			qp->s_ack_rdma_sge.num_sge = 1;
			ps->s_txreq->ss = &qp->s_ack_rdma_sge;
			if (len > pmtu) {
				len = pmtu;
				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
			} else {
				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
				e->sent = 1;
			}
			ohdr->u.aeth = rvt_compute_aeth(qp);
			hwords++;
			qp->s_ack_rdma_psn = e->psn;
			bth2 = mask_psn(qp->s_ack_rdma_psn++);
		} else if (e->opcode == TID_OP(WRITE_REQ)) {
			/*
			 * If a TID RDMA WRITE RESP is being resent, we have to
			 * wait for the actual request. All requests that are to
			 * be resent will have their state set to
			 * TID_REQUEST_RESEND. When the new request arrives, the
			 * state will be changed to TID_REQUEST_RESEND_ACTIVE.
			 */
			req = ack_to_tid_req(e);
			if (req->state == TID_REQUEST_RESEND ||
			    req->state == TID_REQUEST_INIT_RESEND)
				goto bail;
			qp->s_ack_state = TID_OP(WRITE_RESP);
			qp->s_ack_rdma_psn = mask_psn(e->psn + req->cur_seg);
			goto write_resp;
		} else if (e->opcode == TID_OP(READ_REQ)) {
			/*
			 * If a TID RDMA read response is being resent and
			 * we haven't seen the duplicate request yet,
			 * then stop sending the remaining responses the
			 * responder has seen until the requester re-sends it.
			 */
			len = e->rdma_sge.sge_length;
			if (len && !e->rdma_sge.mr) {
				if (qp->s_acked_ack_queue ==
				    qp->s_tail_ack_queue)
					qp->s_acked_ack_queue =
						qp->r_head_ack_queue;
				qp->s_tail_ack_queue = qp->r_head_ack_queue;
				goto bail;
			}
			/* Copy SGE state in case we need to resend */
			ps->s_txreq->mr = e->rdma_sge.mr;
			if (ps->s_txreq->mr)
				rvt_get_mr(ps->s_txreq->mr);
			qp->s_ack_rdma_sge.sge = e->rdma_sge;
			qp->s_ack_rdma_sge.num_sge = 1;
			qp->s_ack_state = TID_OP(READ_RESP);
			goto read_resp;
		} else {
			/* COMPARE_SWAP or FETCH_ADD */
			ps->s_txreq->ss = NULL;
			len = 0;
			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
			ohdr->u.at.aeth = rvt_compute_aeth(qp);
			ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
			hwords += sizeof(ohdr->u.at) / sizeof(u32);
			bth2 = mask_psn(e->psn);
			e->sent = 1;
		}
		trace_hfi1_tid_write_rsp_make_rc_ack(qp);
		bth0 = qp->s_ack_state << 24;
		break;

	case OP(RDMA_READ_RESPONSE_FIRST):
		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
		fallthrough;
	case OP(RDMA_READ_RESPONSE_MIDDLE):
		ps->s_txreq->ss = &qp->s_ack_rdma_sge;
		ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
		if (ps->s_txreq->mr)
			rvt_get_mr(ps->s_txreq->mr);
		len = qp->s_ack_rdma_sge.sge.sge_length;
		if (len > pmtu) {
			len = pmtu;
			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
		} else {
			ohdr->u.aeth = rvt_compute_aeth(qp);
			hwords++;
			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
			e = &qp->s_ack_queue[qp->s_tail_ack_queue];
			e->sent = 1;
		}
		bth0 = qp->s_ack_state << 24;
		bth2 = mask_psn(qp->s_ack_rdma_psn++);
		break;

	case TID_OP(WRITE_RESP):
write_resp:
		/*
		 * 1. Check if RVT_S_ACK_PENDING is set. If yes,
		 *    goto normal.
		 * 2. Attempt to allocate TID resources.
		 * 3. Remove RVT_S_RESP_PENDING flags from s_flags
		 * 4. If resources not available:
		 *    4.1 Set RVT_S_WAIT_TID_SPACE
		 *    4.2 Queue QP on RCD TID queue
		 *    4.3 Put QP on iowait list.
		 *    4.4 Build IB RNR NAK with appropriate timeout value
		 *    4.5 Return indication progress made.
		 * 5. If resources are available:
		 *    5.1 Program HW flow CSRs
		 *    5.2 Build TID RDMA WRITE RESP packet
		 *    5.3 If more resources needed, do 2.1 - 2.3.
		 *    5.4 Wake up next QP on RCD TID queue.
		 *    5.5 Return indication progress made.
		 */

		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
		req = ack_to_tid_req(e);

		/*
		 * Send scheduled RNR NAK's. RNR NAK's need to be sent at
		 * segment boundaries, not at request boundaries. Don't change
		 * s_ack_state because we are still in the middle of a request
		 */
		if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND &&
		    qp->s_tail_ack_queue == qpriv->r_tid_alloc &&
		    req->cur_seg == req->alloc_seg) {
			qpriv->rnr_nak_state = TID_RNR_NAK_SENT;
			goto normal_no_state;
		}

		bth2 = mask_psn(qp->s_ack_rdma_psn);
		hdrlen = hfi1_build_tid_rdma_write_resp(qp, e, ohdr, &bth1,
							bth2, &len,
							&ps->s_txreq->ss);
		if (!hdrlen)
			return 0;

		hwords += hdrlen;
		bth0 = qp->s_ack_state << 24;
		qp->s_ack_rdma_psn++;
		trace_hfi1_tid_req_make_rc_ack_write(qp, 0, e->opcode, e->psn,
						     e->lpsn, req);
		if (req->cur_seg != req->total_segs)
			break;

		e->sent = 1;
		/* Do not free e->rdma_sge until all data are received */
		qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
		break;

	case TID_OP(READ_RESP):
read_resp:
		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
		ps->s_txreq->ss = &qp->s_ack_rdma_sge;
		delta = hfi1_build_tid_rdma_read_resp(qp, e, ohdr, &bth0,
						      &bth1, &bth2, &len,
						      &last_pkt);
		if (delta == 0)
			goto error_qp;
		hwords += delta;
		if (last_pkt) {
			e->sent = 1;
			/*
			 * Increment qp->s_tail_ack_queue through s_ack_state
			 * transition.
			 */
			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
		}
		break;
	case TID_OP(READ_REQ):
		goto bail;

	default:
normal:
		/*
		 * Send a regular ACK.
		 * Set the s_ack_state so we wait until after sending
		 * the ACK before setting s_ack_state to ACKNOWLEDGE
		 * (see above).
		 */
		qp->s_ack_state = OP(SEND_ONLY);
normal_no_state:
		if (qp->s_nak_state)
			ohdr->u.aeth =
				cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
					    (qp->s_nak_state <<
					     IB_AETH_CREDIT_SHIFT));
		else
			ohdr->u.aeth = rvt_compute_aeth(qp);
		hwords++;
		len = 0;
		bth0 = OP(ACKNOWLEDGE) << 24;
		bth2 = mask_psn(qp->s_ack_psn);
		qp->s_flags &= ~RVT_S_ACK_PENDING;
		ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
		ps->s_txreq->ss = NULL;
	}
	qp->s_rdma_ack_cnt++;
	ps->s_txreq->sde = qpriv->s_sde;
	ps->s_txreq->s_cur_size = len;
	ps->s_txreq->hdr_dwords = hwords;
	hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps);
	return 1;
error_qp:
	spin_unlock_irqrestore(&qp->s_lock, ps->flags);
	spin_lock_irqsave(&qp->r_lock, ps->flags);
	spin_lock(&qp->s_lock);
	rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
	spin_unlock(&qp->s_lock);
	spin_unlock_irqrestore(&qp->r_lock, ps->flags);
	spin_lock_irqsave(&qp->s_lock, ps->flags);
bail:
	qp->s_ack_state = OP(ACKNOWLEDGE);
	/*
	 * Ensure s_rdma_ack_cnt changes are committed prior to resetting
	 * RVT_S_RESP_PENDING
	 */
	smp_wmb();
	qp->s_flags &= ~(RVT_S_RESP_PENDING
				| RVT_S_ACK_PENDING
				| HFI1_S_AHG_VALID);
	return 0;
}