static int _mlx4_ib_post_send()

in hw/mlx4/qp.c [3483:3796]


static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
			      const struct ib_send_wr **bad_wr, bool drain)
{
	struct mlx4_ib_qp *qp = to_mqp(ibqp);
	void *wqe;
	struct mlx4_wqe_ctrl_seg *ctrl;
	struct mlx4_wqe_data_seg *dseg;
	unsigned long flags;
	int nreq;
	int err = 0;
	unsigned ind;
	int size;
	unsigned seglen;
	__be32 dummy;
	__be32 *lso_wqe;
	__be32 lso_hdr_sz;
	__be32 blh;
	int i;
	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);

	if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
		struct mlx4_ib_sqp *sqp = qp->sqp;

		if (sqp->roce_v2_gsi) {
			struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
			enum ib_gid_type gid_type;
			union ib_gid gid;

			if (!fill_gid_by_hw_index(mdev, qp->port,
					   ah->av.ib.gid_index,
					   &gid, &gid_type))
				qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
						to_mqp(sqp->roce_v2_gsi) : qp;
			else
				pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
				       ah->av.ib.gid_index);
		}
	}

	spin_lock_irqsave(&qp->sq.lock, flags);
	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR &&
	    !drain) {
		err = -EIO;
		*bad_wr = wr;
		nreq = 0;
		goto out;
	}

	ind = qp->sq_next_wqe;

	for (nreq = 0; wr; ++nreq, wr = wr->next) {
		lso_wqe = &dummy;
		blh = 0;

		if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
			err = -ENOMEM;
			*bad_wr = wr;
			goto out;
		}

		if (unlikely(wr->num_sge > qp->sq.max_gs)) {
			err = -EINVAL;
			*bad_wr = wr;
			goto out;
		}

		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
		qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id;

		ctrl->srcrb_flags =
			(wr->send_flags & IB_SEND_SIGNALED ?
			 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
			(wr->send_flags & IB_SEND_SOLICITED ?
			 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
			((wr->send_flags & IB_SEND_IP_CSUM) ?
			 cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
				     MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
			qp->sq_signal_bits;

		ctrl->imm = send_ieth(wr);

		wqe += sizeof *ctrl;
		size = sizeof *ctrl / 16;

		switch (qp->mlx4_ib_qp_type) {
		case MLX4_IB_QPT_RC:
		case MLX4_IB_QPT_UC:
			switch (wr->opcode) {
			case IB_WR_ATOMIC_CMP_AND_SWP:
			case IB_WR_ATOMIC_FETCH_AND_ADD:
			case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
				set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
					      atomic_wr(wr)->rkey);
				wqe  += sizeof (struct mlx4_wqe_raddr_seg);

				set_atomic_seg(wqe, atomic_wr(wr));
				wqe  += sizeof (struct mlx4_wqe_atomic_seg);

				size += (sizeof (struct mlx4_wqe_raddr_seg) +
					 sizeof (struct mlx4_wqe_atomic_seg)) / 16;

				break;

			case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
				set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
					      atomic_wr(wr)->rkey);
				wqe  += sizeof (struct mlx4_wqe_raddr_seg);

				set_masked_atomic_seg(wqe, atomic_wr(wr));
				wqe  += sizeof (struct mlx4_wqe_masked_atomic_seg);

				size += (sizeof (struct mlx4_wqe_raddr_seg) +
					 sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;

				break;

			case IB_WR_RDMA_READ:
			case IB_WR_RDMA_WRITE:
			case IB_WR_RDMA_WRITE_WITH_IMM:
				set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
					      rdma_wr(wr)->rkey);
				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
				size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
				break;

			case IB_WR_LOCAL_INV:
				ctrl->srcrb_flags |=
					cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
				set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
				wqe  += sizeof (struct mlx4_wqe_local_inval_seg);
				size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
				break;

			case IB_WR_REG_MR:
				ctrl->srcrb_flags |=
					cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
				set_reg_seg(wqe, reg_wr(wr));
				wqe  += sizeof(struct mlx4_wqe_fmr_seg);
				size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
				break;

			default:
				/* No extra segments required for sends */
				break;
			}
			break;

		case MLX4_IB_QPT_TUN_SMI_OWNER:
			err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
						     &seglen);
			if (unlikely(err)) {
				*bad_wr = wr;
				goto out;
			}
			wqe  += seglen;
			size += seglen / 16;
			break;
		case MLX4_IB_QPT_TUN_SMI:
		case MLX4_IB_QPT_TUN_GSI:
			/* this is a UD qp used in MAD responses to slaves. */
			set_datagram_seg(wqe, ud_wr(wr));
			/* set the forced-loopback bit in the data seg av */
			*(__be32 *) wqe |= cpu_to_be32(0x80000000);
			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
			break;
		case MLX4_IB_QPT_UD:
			set_datagram_seg(wqe, ud_wr(wr));
			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;

			if (wr->opcode == IB_WR_LSO) {
				err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen,
						&lso_hdr_sz, &blh);
				if (unlikely(err)) {
					*bad_wr = wr;
					goto out;
				}
				lso_wqe = (__be32 *) wqe;
				wqe  += seglen;
				size += seglen / 16;
			}
			break;

		case MLX4_IB_QPT_PROXY_SMI_OWNER:
			err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
						     &seglen);
			if (unlikely(err)) {
				*bad_wr = wr;
				goto out;
			}
			wqe  += seglen;
			size += seglen / 16;
			/* to start tunnel header on a cache-line boundary */
			add_zero_len_inline(wqe);
			wqe += 16;
			size++;
			build_tunnel_header(ud_wr(wr), wqe, &seglen);
			wqe  += seglen;
			size += seglen / 16;
			break;
		case MLX4_IB_QPT_PROXY_SMI:
		case MLX4_IB_QPT_PROXY_GSI:
			/* If we are tunneling special qps, this is a UD qp.
			 * In this case we first add a UD segment targeting
			 * the tunnel qp, and then add a header with address
			 * information */
			set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe,
						ud_wr(wr),
						qp->mlx4_ib_qp_type);
			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
			build_tunnel_header(ud_wr(wr), wqe, &seglen);
			wqe  += seglen;
			size += seglen / 16;
			break;

		case MLX4_IB_QPT_SMI:
		case MLX4_IB_QPT_GSI:
			err = build_mlx_header(qp, ud_wr(wr), ctrl, &seglen);
			if (unlikely(err)) {
				*bad_wr = wr;
				goto out;
			}
			wqe  += seglen;
			size += seglen / 16;
			break;

		default:
			break;
		}

		/*
		 * Write data segments in reverse order, so as to
		 * overwrite cacheline stamp last within each
		 * cacheline.  This avoids issues with WQE
		 * prefetching.
		 */

		dseg = wqe;
		dseg += wr->num_sge - 1;
		size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);

		/* Add one more inline data segment for ICRC for MLX sends */
		if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
			     qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
			     qp->mlx4_ib_qp_type &
			     (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
			set_mlx_icrc_seg(dseg + 1);
			size += sizeof (struct mlx4_wqe_data_seg) / 16;
		}

		for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
			set_data_seg(dseg, wr->sg_list + i);

		/*
		 * Possibly overwrite stamping in cacheline with LSO
		 * segment only after making sure all data segments
		 * are written.
		 */
		wmb();
		*lso_wqe = lso_hdr_sz;

		ctrl->qpn_vlan.fence_size = (wr->send_flags & IB_SEND_FENCE ?
					     MLX4_WQE_CTRL_FENCE : 0) | size;

		/*
		 * Make sure descriptor is fully written before
		 * setting ownership bit (because HW can start
		 * executing as soon as we do).
		 */
		wmb();

		if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
			*bad_wr = wr;
			err = -EINVAL;
			goto out;
		}

		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;

		/*
		 * We can improve latency by not stamping the last
		 * send queue WQE until after ringing the doorbell, so
		 * only stamp here if there are still more WQEs to post.
		 */
		if (wr->next)
			stamp_send_wqe(qp, ind + qp->sq_spare_wqes);
		ind++;
	}

out:
	if (likely(nreq)) {
		qp->sq.head += nreq;

		/*
		 * Make sure that descriptors are written before
		 * doorbell record.
		 */
		wmb();

		writel_relaxed(qp->doorbell_qpn,
			to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);

		stamp_send_wqe(qp, ind + qp->sq_spare_wqes - 1);

		qp->sq_next_wqe = ind;
	}

	spin_unlock_irqrestore(&qp->sq.lock, flags);

	return err;
}