All of lore.kernel.org
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org, Maxim Mikityanskiy <maximmi@nvidia.com>,
	Tariq Toukan <tariqt@nvidia.com>,
	Saeed Mahameed <saeedm@nvidia.com>
Subject: [net-next 03/15] net/mlx5e: Build SKB in place over the first fragment in non-linear legacy RQ
Date: Thu, 17 Mar 2022 11:54:12 -0700	[thread overview]
Message-ID: <20220317185424.287982-4-saeed@kernel.org> (raw)
In-Reply-To: <20220317185424.287982-1-saeed@kernel.org>

From: Maxim Mikityanskiy <maximmi@nvidia.com>

As a performance optimization and preparation to enabling XDP multi
buffer on non-linear legacy RQ, build the linear part of the SKB over
the first fragment, instead of allocating a new buffer and copying the
first 256 bytes there.

To achieve this, add headroom and tailroom to the first fragment.

Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/params.c   | 43 ++++++++++++-----
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 48 ++++++++++---------
 2 files changed, 57 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 0f258e7a65e0..5c4711be6fae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -188,12 +188,18 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
 			  struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk)
 {
-	bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
-		mlx5e_rx_is_linear_skb(params, xsk) :
-		mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+	u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
 
-	return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ?
-		mlx5e_get_linear_rq_headroom(params, xsk) : 0;
+	if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
+		return linear_headroom;
+
+	if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+		return linear_headroom;
+
+	if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+		return linear_headroom;
+
+	return 0;
 }
 
 u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
@@ -392,10 +398,10 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e
 	};
 }
 
-static int mlx5e_max_nonlinear_mtu(int frag_size)
+static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size)
 {
 	/* Optimization for small packets: the last fragment is bigger than the others. */
-	return (MLX5E_MAX_RX_FRAGS - 1) * frag_size + PAGE_SIZE;
+	return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
 }
 
 #define DEFAULT_FRAG_SIZE (2048)
@@ -407,7 +413,9 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
 {
 	u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	int frag_size_max = DEFAULT_FRAG_SIZE;
+	int first_frag_size_max;
 	u32 buf_size = 0;
+	u16 headroom;
 	int max_mtu;
 	int i;
 
@@ -427,11 +435,15 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
 		goto out;
 	}
 
-	max_mtu = mlx5e_max_nonlinear_mtu(frag_size_max);
+	headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+	first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+	max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max);
 	if (byte_count > max_mtu) {
 		frag_size_max = PAGE_SIZE;
+		first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
 
-		max_mtu = mlx5e_max_nonlinear_mtu(frag_size_max);
+		max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max);
 		if (byte_count > max_mtu) {
 			mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n",
 				      params->sw_mtu, max_mtu);
@@ -443,13 +455,22 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
 	while (buf_size < byte_count) {
 		int frag_size = byte_count - buf_size;
 
-		if (i < MLX5E_MAX_RX_FRAGS - 1)
+		if (i == 0)
+			frag_size = min(frag_size, first_frag_size_max);
+		else if (i < MLX5E_MAX_RX_FRAGS - 1)
 			frag_size = min(frag_size, frag_size_max);
 
 		info->arr[i].frag_size = frag_size;
+		buf_size += frag_size;
+
+		if (i == 0) {
+			/* Ensure that headroom and tailroom are included. */
+			frag_size += headroom;
+			frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+		}
+
 		info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
 
-		buf_size += frag_size;
 		i++;
 	}
 	info->num_frags = i;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 6eda906342c0..b06aac087b2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1560,43 +1560,45 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 			     struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
 {
 	struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
-	struct mlx5e_wqe_frag_info *head_wi = wi;
-	u16 headlen      = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt);
-	u16 frag_headlen = headlen;
-	u16 byte_cnt     = cqe_bcnt - headlen;
+	u16 rx_headroom = rq->buff.headroom;
+	struct mlx5e_dma_info *di = wi->di;
+	u32 frag_consumed_bytes;
+	u32 first_frag_size;
 	struct sk_buff *skb;
+	void *va;
+
+	va = page_address(di->page) + wi->offset;
+	frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
+	first_frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + frag_consumed_bytes);
+
+	dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
+				      first_frag_size, DMA_FROM_DEVICE);
+	net_prefetch(va + rx_headroom);
 
 	/* XDP is not supported in this configuration, as incoming packets
 	 * might spread among multiple pages.
 	 */
-	skb = napi_alloc_skb(rq->cq.napi,
-			     ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
-	if (unlikely(!skb)) {
-		rq->stats->buff_alloc_err++;
+	skb = mlx5e_build_linear_skb(rq, va, first_frag_size, rx_headroom,
+				     frag_consumed_bytes, 0);
+	if (unlikely(!skb))
 		return NULL;
-	}
 
-	net_prefetchw(skb->data);
+	page_ref_inc(di->page);
 
-	while (byte_cnt) {
-		u16 frag_consumed_bytes =
-			min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt);
+	cqe_bcnt -= frag_consumed_bytes;
+	frag_info++;
+	wi++;
 
-		mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset + frag_headlen,
+	while (cqe_bcnt) {
+		frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
+
+		mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset,
 				   frag_consumed_bytes, frag_info->frag_stride);
-		byte_cnt -= frag_consumed_bytes;
-		frag_headlen = 0;
+		cqe_bcnt -= frag_consumed_bytes;
 		frag_info++;
 		wi++;
 	}
 
-	/* copy header */
-	mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, head_wi->offset,
-			      headlen);
-	/* skb linear part was allocated with headlen and aligned to long */
-	skb->tail += headlen;
-	skb->len  += headlen;
-
 	return skb;
 }
 
-- 
2.35.1


  parent reply	other threads:[~2022-03-17 18:54 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-17 18:54 [pull request][net-next 00/15] mlx5 updates 2022-03-17 Saeed Mahameed
2022-03-17 18:54 ` [net-next 01/15] net/mlx5e: Validate MTU when building non-linear legacy RQ fragments info Saeed Mahameed
2022-03-18 11:00   ` patchwork-bot+netdevbpf
2022-03-17 18:54 ` [net-next 02/15] net/mlx5e: Add headroom only to the first fragment in legacy RQ Saeed Mahameed
2022-03-17 18:54 ` Saeed Mahameed [this message]
2022-03-17 18:54 ` [net-next 04/15] net/mlx5e: RX, Test the XDP program existence out of the handler Saeed Mahameed
2022-03-17 18:54 ` [net-next 05/15] net/mlx5e: Drop the len output parameter from mlx5e_xdp_handle Saeed Mahameed
2022-03-17 18:54 ` [net-next 06/15] net/mlx5e: Drop cqe_bcnt32 from mlx5e_skb_from_cqe_mpwrq_linear Saeed Mahameed
2022-03-17 18:54 ` [net-next 07/15] net/mlx5: DR, Adjust structure member to reduce memory hole Saeed Mahameed
2022-03-17 18:54 ` [net-next 08/15] net/mlx5: DR, Remove mr_addr rkey from struct mlx5dr_icm_chunk Saeed Mahameed
2022-03-17 18:54 ` [net-next 09/15] net/mlx5: DR, Remove icm_addr from mlx5dr_icm_chunk to reduce memory Saeed Mahameed
2022-03-17 18:54 ` [net-next 10/15] net/mlx5: DR, Remove num_of_entries byte_size from struct mlx5_dr_icm_chunk Saeed Mahameed
2022-03-17 18:54 ` [net-next 11/15] net/mlx5: DR, Remove 4 members from mlx5dr_ste_htbl to reduce memory Saeed Mahameed
2022-03-17 18:54 ` [net-next 12/15] net/mlx5: DR, Remove hw_ste from mlx5dr_ste " Saeed Mahameed
2022-03-17 18:54 ` [net-next 13/15] net/mlx5: CT: Remove extra rhashtable remove on tuple entries Saeed Mahameed
2022-03-17 18:54 ` [net-next 14/15] net/mlx5: Remove unused exported contiguous coherent buffer allocation API Saeed Mahameed
2022-03-17 18:54 ` [net-next 15/15] net/mlx5: Remove unused fill page array API function Saeed Mahameed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220317185424.287982-4-saeed@kernel.org \
    --to=saeed@kernel.org \
    --cc=davem@davemloft.net \
    --cc=kuba@kernel.org \
    --cc=maximmi@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@nvidia.com \
    --cc=tariqt@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.