[net-next 13/15] net/mlx5: DR, Supporting inline WQE when possible

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
	Itamar Gozlan <igozlan@nvidia.com>,
	Yevgeny Kliteynik <kliteyn@nvidia.com>
Subject: [net-next 13/15] net/mlx5: DR, Supporting inline WQE when possible
Date: Wed, 16 Aug 2023 14:00:47 -0700	[thread overview]
Message-ID: <20230816210049.54733-14-saeed@kernel.org> (raw)
In-Reply-To: <20230816210049.54733-1-saeed@kernel.org>

From: Itamar Gozlan <igozlan@nvidia.com>

In WQE (Work Queue Entry), the two types of data segments memories are
pointers and inline data, where inline data is passed directly as
part of the WQE.
For software steering, the maximal inline size should be less than
2*MLX5_SEND_WQE_BB, i.e., the potential data must fit with the required
inline WQE headers.

Two consecutive blocks (MLX5_SEND_WQE_BB) are not guaranteed to reside
on the same memory page. Hence, writes to MLX5_SEND_WQE_BB should be
done separately, i.e., each MLX5_SEND_WQE_BB  should be obtained using
the mlx5_wq_cyc_get_wqe macro.

Signed-off-by: Itamar Gozlan <igozlan@nvidia.com>
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/steering/dr_send.c     | 115 ++++++++++++++++--
 1 file changed, 102 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 6fa06ba2d346..4e8527a724f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -52,6 +52,7 @@ struct dr_qp_init_attr {
 	u32 cqn;
 	u32 pdn;
 	u32 max_send_wr;
+	u32 max_send_sge;
 	struct mlx5_uars_page *uar;
 	u8 isolate_vl_tc:1;
 };
@@ -246,6 +247,37 @@ static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
 	return err == CQ_POLL_ERR ? err : npolled;
 }
 
+static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr)
+{
+	return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) +
+				  sizeof(struct mlx5_wqe_flow_update_ctrl_seg) +
+				  sizeof(struct mlx5_wqe_header_modify_argument_update_seg));
+}
+
+/* We calculate for specific RC QP with the required functionality */
+static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr)
+{
+	int update_arg_size;
+	int inl_size = 0;
+	int tot_size;
+	int size;
+
+	update_arg_size = dr_qp_get_args_update_send_wqe_size(attr);
+
+	size = sizeof(struct mlx5_wqe_ctrl_seg) +
+	       sizeof(struct mlx5_wqe_raddr_seg);
+	inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) +
+				DR_STE_SIZE, 16);
+
+	size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg);
+
+	size = max(size, update_arg_size);
+
+	tot_size = max(size, inl_size);
+
+	return ALIGN(tot_size, MLX5_SEND_WQE_BB);
+}
+
 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 					 struct dr_qp_init_attr *attr)
 {
@@ -253,6 +285,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 	u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
 	struct mlx5_wq_param wqp;
 	struct mlx5dr_qp *dr_qp;
+	int wqe_size;
 	int inlen;
 	void *qpc;
 	void *in;
@@ -332,6 +365,15 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 	if (err)
 		goto err_in;
 	dr_qp->uar = attr->uar;
+	wqe_size = dr_qp_calc_rc_send_wqe(attr);
+	dr_qp->max_inline_data = min(wqe_size -
+				     (sizeof(struct mlx5_wqe_ctrl_seg) +
+				      sizeof(struct mlx5_wqe_raddr_seg) +
+				      sizeof(struct mlx5_wqe_inline_seg)),
+				     (2 * MLX5_SEND_WQE_BB -
+				      (sizeof(struct mlx5_wqe_ctrl_seg) +
+				       sizeof(struct mlx5_wqe_raddr_seg) +
+				       sizeof(struct mlx5_wqe_inline_seg))));
 
 	return dr_qp;
 
@@ -395,8 +437,48 @@ dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
 		MLX5_SEND_WQE_DS;
 }
 
+static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp,
+			       struct dr_data_seg *data_seg, void *wqe)
+{
+	int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) +
+				sizeof(struct mlx5_wqe_raddr_seg) +
+				sizeof(struct mlx5_wqe_inline_seg);
+	struct mlx5_wqe_inline_seg *seg;
+	int left_space;
+	int inl = 0;
+	void *addr;
+	int len;
+	int idx;
+
+	seg = wqe;
+	wqe += sizeof(*seg);
+	addr = (void *)(unsigned long)(data_seg->addr);
+	len  = data_seg->length;
+	inl += len;
+	left_space = MLX5_SEND_WQE_BB - inline_header_size;
+
+	if (likely(len > left_space)) {
+		memcpy(wqe, addr, left_space);
+		len -= left_space;
+		addr += left_space;
+		idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1);
+		wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
+	}
+
+	memcpy(wqe, addr, len);
+
+	if (likely(inl)) {
+		seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
+		return DIV_ROUND_UP(inl + sizeof(seg->byte_count),
+				    MLX5_SEND_WQE_DS);
+	} else {
+		return 0;
+	}
+}
+
 static void
-dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
+dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp,
+				  struct mlx5_wqe_ctrl_seg *wq_ctrl,
 				  u64 remote_addr,
 				  u32 rkey,
 				  struct dr_data_seg *data_seg,
@@ -412,15 +494,17 @@ dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
 	wq_raddr->reserved = 0;
 
 	wq_dseg = (void *)(wq_raddr + 1);
+	/* WQE ctrl segment + WQE remote addr segment */
+	*size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS;
 
-	wq_dseg->byte_count = cpu_to_be32(data_seg->length);
-	wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
-	wq_dseg->addr = cpu_to_be64(data_seg->addr);
-
-	*size = (sizeof(*wq_ctrl) +    /* WQE ctrl segment */
-		 sizeof(*wq_dseg) +    /* WQE data segment */
-		 sizeof(*wq_raddr)) /  /* WQE remote addr segment */
-		MLX5_SEND_WQE_DS;
+	if (data_seg->send_flags & IB_SEND_INLINE) {
+		*size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg);
+	} else {
+		wq_dseg->byte_count = cpu_to_be32(data_seg->length);
+		wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
+		wq_dseg->addr = cpu_to_be64(data_seg->addr);
+		*size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS;  /* WQE data segment */
+	}
 }
 
 static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
@@ -451,7 +535,7 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
 	switch (opcode) {
 	case MLX5_OPCODE_RDMA_READ:
 	case MLX5_OPCODE_RDMA_WRITE:
-		dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
+		dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr,
 						  rkey, data_seg, &size);
 		break;
 	case MLX5_OPCODE_FLOW_TBL_ACCESS:
@@ -572,7 +656,7 @@ static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
 		send_info->write.send_flags |= IB_SEND_SIGNALED;
 	else
-		send_info->write.send_flags = 0;
+		send_info->write.send_flags &= ~IB_SEND_SIGNALED;
 }
 
 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
@@ -596,9 +680,13 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
 	}
 
 	send_ring->pending_wqe++;
+	if (!send_info->write.lkey)
+		send_info->write.send_flags |= IB_SEND_INLINE;
 
 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
 		send_info->write.send_flags |= IB_SEND_SIGNALED;
+	else
+		send_info->write.send_flags &= ~IB_SEND_SIGNALED;
 
 	send_ring->pending_wqe++;
 	send_info->read.length = send_info->write.length;
@@ -608,9 +696,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
 	send_info->read.lkey = send_ring->sync_mr->mkey;
 
 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
-		send_info->read.send_flags = IB_SEND_SIGNALED;
+		send_info->read.send_flags |= IB_SEND_SIGNALED;
 	else
-		send_info->read.send_flags = 0;
+		send_info->read.send_flags &= ~IB_SEND_SIGNALED;
 }
 
 static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
@@ -1257,6 +1345,7 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
 	dmn->send_ring->cq->qp = dmn->send_ring->qp;
 
 	dmn->info.max_send_wr = QUEUE_SIZE;
+	init_attr.max_send_sge = 1;
 	dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
 					DR_STE_SIZE);
 
-- 
2.41.0

next prev parent reply	other threads:[~2023-08-16 21:01 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-16 21:00 [pull request][net-next 00/15] mlx5 updates 2023-08-16 Saeed Mahameed
2023-08-16 21:00 ` [net-next 01/15] net/mlx5e: aRFS, Prevent repeated kernel rule migrations requests Saeed Mahameed
2023-08-16 21:00 ` [net-next 02/15] net/mlx5e: aRFS, Warn if aRFS table does not exist for aRFS rule Saeed Mahameed
2023-08-16 21:00 ` [net-next 03/15] net/mlx5e: aRFS, Introduce ethtool stats Saeed Mahameed
2023-08-16 21:00 ` [net-next 04/15] net/mlx5e: Fix spelling mistake "Faided" -> "Failed" Saeed Mahameed
2023-08-16 21:00 ` [net-next 05/15] net/mlx5: IRQ, consolidate irq and affinity mask allocation Saeed Mahameed
2023-08-16 21:00 ` [net-next 06/15] net/mlx5: DR, Fix code indentation Saeed Mahameed
2023-08-16 21:00 ` [net-next 07/15] net/mlx5: DR, Remove unneeded local variable Saeed Mahameed
2023-08-16 21:00 ` [net-next 08/15] net/mlx5: Remove health syndrome enum duplication Saeed Mahameed
2023-08-16 21:00 ` [net-next 09/15] net/mlx5: Update dead links in Kconfig documentation Saeed Mahameed
2023-08-16 21:00 ` [net-next 10/15] net/mlx5: Call mlx5_esw_offloads_rep_load/unload() for uplink port directly Saeed Mahameed
2023-08-16 21:00 ` [net-next 11/15] net/mlx5: Remove VPORT_UPLINK handling from devlink_port.c Saeed Mahameed
2023-08-16 21:00 ` [net-next 12/15] net/mlx5: Rename devlink port ops struct for PFs/VFs Saeed Mahameed
2023-08-16 21:00 ` Saeed Mahameed [this message]
2023-08-16 21:00 ` [net-next 14/15] net/mlx5: Convert PCI error values to generic errnos Saeed Mahameed
2023-08-18 22:28   ` Jakub Kicinski
2023-08-19  2:55     ` Jakub Kicinski
2023-08-21 17:53       ` Saeed Mahameed
2023-08-16 21:00 ` [net-next 15/15] net/mlx5: Devcom, only use devcom after NULL check in mlx5_devcom_send_event() Saeed Mahameed

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:6fa06ba2d34 dfblob:4e8527a724f )
 OR (
bs:"[net-next 13/15] net/mlx5: DR, Supporting inline WQE when possible" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230816210049.54733-14-saeed@kernel.org \
    --to=saeed@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=igozlan@nvidia.com \
    --cc=kliteyn@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=tariqt@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).