netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Amery Hung <ameryhung@gmail.com>
To: bpf@vger.kernel.org
Cc: netdev@vger.kernel.org, alexei.starovoitov@gmail.com,
	andrii@kernel.org, daniel@iogearbox.net, kuba@kernel.org,
	stfomichev@gmail.com, martin.lau@kernel.org,
	mohsin.bashr@gmail.com, noren@nvidia.com, dtatulea@nvidia.com,
	saeedm@nvidia.com, tariqt@nvidia.com, mbloch@nvidia.com,
	maciej.fijalkowski@intel.com, kernel-team@meta.com
Subject: [PATCH bpf-next v2 1/7] net/mlx5e: Fix generating skb from nonlinear xdp_buff
Date: Fri,  5 Sep 2025 10:33:45 -0700	[thread overview]
Message-ID: <20250905173352.3759457-2-ameryhung@gmail.com> (raw)
In-Reply-To: <20250905173352.3759457-1-ameryhung@gmail.com>

xdp programs can change the layout of an xdp_buff through
bpf_xdp_adjust_tail() and bpf_xdp_adjust_head(). Therefore, the driver
cannot assume the size of the linear data area nor fragments. Fix the
bug in mlx5 by generating skb according to xdp_buff after xdp programs
run.

Currently, when handling multi-buf xdp, the mlx5 driver assumes the
layout of an xdp_buff to be unchanged. That is, the linear data area
continues to be empty and fragments remains the same. This may cause
the driver to generate erroneous skb or triggering a kernel
warning. When an xdp program added linear data through
bpf_xdp_adjust_head(), the linear data will be ignored as
mlx5e_build_linear_skb() builds an skb without linear data and then
pull data from fragments to fill the linear data area. When an xdp
program has shrunk the non-linear data through bpf_xdp_adjust_tail(),
the delta passed to __pskb_pull_tail() may exceed the actual nonlinear
data size and trigger the BUG_ON in it.

To fix the issue, first record the original number of fragments. If the
number of fragments changes after the xdp program runs, rewind the end
fragment pointer by the difference and recalculate the truesize. Then,
build the skb with linear data area matching the xdp_buff. Finally, only
pull data in if there is non-linear data and fill the linear part up to
256 bytes.

Fixes: f52ac7028bec ("net/mlx5e: RX, Add XDP multi-buffer support in Striding RQ")
Signed-off-by: Amery Hung <ameryhung@gmail.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 38 +++++++++++++++++--
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index b8c609d91d11..6b6bb90cf003 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1729,6 +1729,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
 	struct mlx5e_wqe_frag_info *head_wi = wi;
 	u16 rx_headroom = rq->buff.headroom;
 	struct mlx5e_frag_page *frag_page;
+	u8 nr_frags_free, old_nr_frags;
 	struct skb_shared_info *sinfo;
 	u32 frag_consumed_bytes;
 	struct bpf_prog *prog;
@@ -1772,17 +1773,27 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
 		wi++;
 	}
 
+	old_nr_frags = sinfo->nr_frags;
+
 	prog = rcu_dereference(rq->xdp_prog);
 	if (prog && mlx5e_xdp_handle(rq, prog, mxbuf)) {
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
 			struct mlx5e_wqe_frag_info *pwi;
 
+			wi -= old_nr_frags - sinfo->nr_frags;
+
 			for (pwi = head_wi; pwi < wi; pwi++)
 				pwi->frag_page->frags++;
 		}
 		return NULL; /* page/packet was consumed by XDP */
 	}
 
+	nr_frags_free = old_nr_frags - sinfo->nr_frags;
+	if (unlikely(nr_frags_free)) {
+		wi -= nr_frags_free;
+		truesize -= nr_frags_free * frag_info->frag_stride;
+	}
+
 	skb = mlx5e_build_linear_skb(
 		rq, mxbuf->xdp.data_hard_start, rq->buff.frame0_sz,
 		mxbuf->xdp.data - mxbuf->xdp.data_hard_start,
@@ -2004,6 +2015,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 	u32 byte_cnt       = cqe_bcnt;
 	struct skb_shared_info *sinfo;
 	unsigned int truesize = 0;
+	u32 pg_consumed_bytes;
 	struct bpf_prog *prog;
 	struct sk_buff *skb;
 	u32 linear_frame_sz;
@@ -2057,7 +2069,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 
 	while (byte_cnt) {
 		/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
-		u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
+		pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
 
 		if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
 			truesize += pg_consumed_bytes;
@@ -2073,10 +2085,15 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 	}
 
 	if (prog) {
+		u8 nr_frags_free, old_nr_frags = sinfo->nr_frags;
+		u32 len;
+
 		if (mlx5e_xdp_handle(rq, prog, mxbuf)) {
 			if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
 				struct mlx5e_frag_page *pfp;
 
+				frag_page -= old_nr_frags - sinfo->nr_frags;
+
 				for (pfp = head_page; pfp < frag_page; pfp++)
 					pfp->frags++;
 
@@ -2087,9 +2104,22 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 			return NULL; /* page/packet was consumed by XDP */
 		}
 
+		len = mxbuf->xdp.data_end - mxbuf->xdp.data;
+
+		nr_frags_free = old_nr_frags - sinfo->nr_frags;
+		if (unlikely(nr_frags_free)) {
+			frag_page -= nr_frags_free;
+
+			/* the last frag is always freed first */
+			truesize -= ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
+			while (--nr_frags_free)
+				truesize -= nr_frags_free *
+					    ALIGN(PAGE_SIZE, BIT(rq->mpwqe.log_stride_sz));
+		}
+
 		skb = mlx5e_build_linear_skb(
 			rq, mxbuf->xdp.data_hard_start, linear_frame_sz,
-			mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0,
+			mxbuf->xdp.data - mxbuf->xdp.data_hard_start, len,
 			mxbuf->xdp.data - mxbuf->xdp.data_meta);
 		if (unlikely(!skb)) {
 			mlx5e_page_release_fragmented(rq->page_pool,
@@ -2114,8 +2144,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 			do
 				pagep->frags++;
 			while (++pagep < frag_page);
+
+			headlen = min_t(u16, MLX5E_RX_MAX_HEAD - len, sinfo->xdp_frags_size);
+			__pskb_pull_tail(skb, headlen);
 		}
-		__pskb_pull_tail(skb, headlen);
 	} else {
 		dma_addr_t addr;
 
-- 
2.47.3


  reply	other threads:[~2025-09-05 17:33 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-05 17:33 [PATCH bpf-next v2 0/7] Add kfunc bpf_xdp_pull_data Amery Hung
2025-09-05 17:33 ` Amery Hung [this message]
2025-09-08 14:41   ` [PATCH bpf-next v2 1/7] net/mlx5e: Fix generating skb from nonlinear xdp_buff Dragos Tatulea
2025-09-08 17:23     ` Amery Hung
2025-09-05 17:33 ` [PATCH bpf-next v2 2/7] bpf: Allow bpf_xdp_shrink_data to shrink a frag from head and tail Amery Hung
2025-09-05 17:33 ` [PATCH bpf-next v2 3/7] bpf: Support pulling non-linear xdp data Amery Hung
2025-09-08 19:27   ` Martin KaFai Lau
2025-09-08 22:28     ` Amery Hung
2025-09-09  1:54   ` Jakub Kicinski
2025-09-10 15:17     ` Amery Hung
2025-09-10 18:04       ` Jakub Kicinski
2025-09-10 19:11         ` Amery Hung
2025-09-05 17:33 ` [PATCH bpf-next v2 4/7] bpf: Clear packet pointers after changing packet data in kfuncs Amery Hung
2025-09-05 17:33 ` [PATCH bpf-next v2 5/7] bpf: Support specifying linear xdp packet data size in test_run Amery Hung
2025-09-05 17:33 ` [PATCH bpf-next v2 6/7] selftests/bpf: Test bpf_xdp_pull_data Amery Hung
2025-09-05 17:33 ` [PATCH bpf-next v2 7/7] selftests: drv-net: Pull data before parsing headers Amery Hung

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250905173352.3759457-2-ameryhung@gmail.com \
    --to=ameryhung@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=dtatulea@nvidia.com \
    --cc=kernel-team@meta.com \
    --cc=kuba@kernel.org \
    --cc=maciej.fijalkowski@intel.com \
    --cc=martin.lau@kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=mohsin.bashr@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=noren@nvidia.com \
    --cc=saeedm@nvidia.com \
    --cc=stfomichev@gmail.com \
    --cc=tariqt@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).