in hw/mlx4/qp.c [3483:3796]
static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr, bool drain)
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
void *wqe;
struct mlx4_wqe_ctrl_seg *ctrl;
struct mlx4_wqe_data_seg *dseg;
unsigned long flags;
int nreq;
int err = 0;
unsigned ind;
int size;
unsigned seglen;
__be32 dummy;
__be32 *lso_wqe;
__be32 lso_hdr_sz;
__be32 blh;
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
struct mlx4_ib_sqp *sqp = qp->sqp;
if (sqp->roce_v2_gsi) {
struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
enum ib_gid_type gid_type;
union ib_gid gid;
if (!fill_gid_by_hw_index(mdev, qp->port,
ah->av.ib.gid_index,
&gid, &gid_type))
qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
to_mqp(sqp->roce_v2_gsi) : qp;
else
pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
ah->av.ib.gid_index);
}
}
spin_lock_irqsave(&qp->sq.lock, flags);
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR &&
!drain) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
goto out;
}
ind = qp->sq_next_wqe;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
lso_wqe = &dummy;
blh = 0;
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
if (unlikely(wr->num_sge > qp->sq.max_gs)) {
err = -EINVAL;
*bad_wr = wr;
goto out;
}
ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
ctrl->srcrb_flags =
(wr->send_flags & IB_SEND_SIGNALED ?
cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
(wr->send_flags & IB_SEND_SOLICITED ?
cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
((wr->send_flags & IB_SEND_IP_CSUM) ?
cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
qp->sq_signal_bits;
ctrl->imm = send_ieth(wr);
wqe += sizeof *ctrl;
size = sizeof *ctrl / 16;
switch (qp->mlx4_ib_qp_type) {
case MLX4_IB_QPT_RC:
case MLX4_IB_QPT_UC:
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
atomic_wr(wr)->rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
set_atomic_seg(wqe, atomic_wr(wr));
wqe += sizeof (struct mlx4_wqe_atomic_seg);
size += (sizeof (struct mlx4_wqe_raddr_seg) +
sizeof (struct mlx4_wqe_atomic_seg)) / 16;
break;
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
atomic_wr(wr)->rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
set_masked_atomic_seg(wqe, atomic_wr(wr));
wqe += sizeof (struct mlx4_wqe_masked_atomic_seg);
size += (sizeof (struct mlx4_wqe_raddr_seg) +
sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
rdma_wr(wr)->rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
break;
case IB_WR_LOCAL_INV:
ctrl->srcrb_flags |=
cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
wqe += sizeof (struct mlx4_wqe_local_inval_seg);
size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
break;
case IB_WR_REG_MR:
ctrl->srcrb_flags |=
cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
set_reg_seg(wqe, reg_wr(wr));
wqe += sizeof(struct mlx4_wqe_fmr_seg);
size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
break;
default:
/* No extra segments required for sends */
break;
}
break;
case MLX4_IB_QPT_TUN_SMI_OWNER:
err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
&seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
}
wqe += seglen;
size += seglen / 16;
break;
case MLX4_IB_QPT_TUN_SMI:
case MLX4_IB_QPT_TUN_GSI:
/* this is a UD qp used in MAD responses to slaves. */
set_datagram_seg(wqe, ud_wr(wr));
/* set the forced-loopback bit in the data seg av */
*(__be32 *) wqe |= cpu_to_be32(0x80000000);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
break;
case MLX4_IB_QPT_UD:
set_datagram_seg(wqe, ud_wr(wr));
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen,
&lso_hdr_sz, &blh);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
}
lso_wqe = (__be32 *) wqe;
wqe += seglen;
size += seglen / 16;
}
break;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
&seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
}
wqe += seglen;
size += seglen / 16;
/* to start tunnel header on a cache-line boundary */
add_zero_len_inline(wqe);
wqe += 16;
size++;
build_tunnel_header(ud_wr(wr), wqe, &seglen);
wqe += seglen;
size += seglen / 16;
break;
case MLX4_IB_QPT_PROXY_SMI:
case MLX4_IB_QPT_PROXY_GSI:
/* If we are tunneling special qps, this is a UD qp.
* In this case we first add a UD segment targeting
* the tunnel qp, and then add a header with address
* information */
set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe,
ud_wr(wr),
qp->mlx4_ib_qp_type);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
build_tunnel_header(ud_wr(wr), wqe, &seglen);
wqe += seglen;
size += seglen / 16;
break;
case MLX4_IB_QPT_SMI:
case MLX4_IB_QPT_GSI:
err = build_mlx_header(qp, ud_wr(wr), ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
}
wqe += seglen;
size += seglen / 16;
break;
default:
break;
}
/*
* Write data segments in reverse order, so as to
* overwrite cacheline stamp last within each
* cacheline. This avoids issues with WQE
* prefetching.
*/
dseg = wqe;
dseg += wr->num_sge - 1;
size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
/* Add one more inline data segment for ICRC for MLX sends */
if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
qp->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
set_mlx_icrc_seg(dseg + 1);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
set_data_seg(dseg, wr->sg_list + i);
/*
* Possibly overwrite stamping in cacheline with LSO
* segment only after making sure all data segments
* are written.
*/
wmb();
*lso_wqe = lso_hdr_sz;
ctrl->qpn_vlan.fence_size = (wr->send_flags & IB_SEND_FENCE ?
MLX4_WQE_CTRL_FENCE : 0) | size;
/*
* Make sure descriptor is fully written before
* setting ownership bit (because HW can start
* executing as soon as we do).
*/
wmb();
if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
*bad_wr = wr;
err = -EINVAL;
goto out;
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
/*
* We can improve latency by not stamping the last
* send queue WQE until after ringing the doorbell, so
* only stamp here if there are still more WQEs to post.
*/
if (wr->next)
stamp_send_wqe(qp, ind + qp->sq_spare_wqes);
ind++;
}
out:
if (likely(nreq)) {
qp->sq.head += nreq;
/*
* Make sure that descriptors are written before
* doorbell record.
*/
wmb();
writel_relaxed(qp->doorbell_qpn,
to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
stamp_send_wqe(qp, ind + qp->sq_spare_wqes - 1);
qp->sq_next_wqe = ind;
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
return err;
}