Netdev List
 help / color / mirror / Atom feed
From: "Nabil S. Alramli" <dev@nalramli.com>
To: saeedm@nvidia.com, tariqt@nvidia.com, mbloch@nvidia.com,
	dtatulea@nvidia.com
Cc: dev@nalramli.com, nalramli@fastly.com, leon@kernel.org,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [mellanox/mlx5-next RFC 1/1] net/mlx5: RX, Fix refcount warning on frag page release
Date: Thu, 25 Jun 2026 13:40:59 -0400	[thread overview]
Message-ID: <20260625174059.2879717-2-dev@nalramli.com> (raw)
In-Reply-To: <20260625174059.2879717-1-dev@nalramli.com>

Under memory pressure, mlx5 driver has WARNING during fragmented page
release. This happens because there is a discrepency between what mlx5
thinks the page fragment counter is vs what the page_pool actually says it
is.

The cause of the issue is page allocations on concurrent cpus, which
increment the non-atomic u16 page counter mlx5e_frag_page.frags, while at
the same time the page reference counter net_iov.pp_ref_count is atomically
incremented. That sometimes leads to a difference in the counts and
therefore triggers the warning in page_pool_unref_netmem:

```
	ret = atomic_long_sub_return(nr, pp_ref_count);
	WARN_ON(ret < 0);
```

The actual stack trace looks like this:

```
WARNING: CPU: 37 PID: 447795 at include/net/page_pool/helpers.h:277 mlx5e_page_release_fragmented.isra.0+0x51/0x60 [mlx5_core]
Tainted: [S]=CPU_OUT_OF_SPEC, [O]=OOT_MODULE
Hardware name: *
RIP: 0010:mlx5e_page_release_fragmented.isra.0+0x51/0x60 [mlx5_core]
RSP: 0018:ffffc90019814d98 EFLAGS: 00010293
RAX: 000000000000003f RBX: ffff88c0993d0a10 RCX: ffffea02424592c0
RDX: 0000000000000001 RSI: ffffea02424592c0 RDI: ffff88c090e20000
RBP: 000000000000000a R08: 0000000000001409 R09: 0000000000000006
R10: 0000000000000000 R11: ffff88c095fbc040 R12: 000000000000141f
R13: 0000000000000009 R14: ffff88c090e20000 R15: 0000000000000001
FS:  00007f34149fa6c0(0000) GS:ffff89200fa40000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ed0265eb000 CR3: 0000005091cbe000 CR4: 0000000000350ef0
Call Trace:
 <IRQ>
 mlx5e_free_rx_wqes+0x7b/0xa0 [mlx5_core]
 mlx5e_post_rx_wqes+0x1ac/0x5a0 [mlx5_core]
 mlx5e_napi_poll+0x5e5/0x6f0 [mlx5_core]
 __napi_poll+0x2b/0x1a0
 net_rx_action+0x30e/0x370
 ? sched_clock+0x9/0x10
 ? sched_clock_cpu+0xf/0x170
 handle_softirqs+0xe2/0x2a0
 common_interrupt+0x85/0xa0
 </IRQ>
 <TASK>
 asm_common_interrupt+0x26/0x40
RIP: 0010:page_counter_uncharge+0x34/0x90
RSP: 0018:ffffc900e728bb00 EFLAGS: 00000213
RAX: ffff88aff4762000 RBX: ffff88aff4762100 RCX: 0000000000000304
RDX: 0000000000000001 RSI: 00000000004e9e1a RDI: ffff88aff4762100
RBP: 0000000000000001 R08: ffff891ea0560048 R09: 00007ffffffff000
R10: 0000000000001000 R11: ffff891ae8061b00 R12: ffffffffffffffff
R13: ffff89107fcfd4c0 R14: ffff891ae8061b00 R15: ffff892002fe1400
 uncharge_batch+0x40/0xd0
```

The fix is to use an atomic page fragment counter, so it will always match
the number of references held in the page_pool.

Signed-off-by: Nabil S. Alramli <dev@nalramli.com>
Fixes: 6f5742846053 ("net/mlx5e: RX, Enable skb page recycling through the page_pool")
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 39 ++++++++++---------
 3 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 2270e2e550dd..c164106eb85d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -568,7 +568,7 @@ struct mlx5e_icosq {
 
 struct mlx5e_frag_page {
 	netmem_ref netmem;
-	u16 frags;
+	atomic_long_t frags;
 };
 
 enum mlx5e_wqe_frag_flag {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5a46870c4b74..571a0df9f604 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -400,7 +400,7 @@ static int mlx5e_rq_alloc_mpwqe_linear_info(struct mlx5e_rq *rq, int node,
 	rq->mpwqe.linear_info = li;
 
 	/* Set to max to force allocation on first run. */
-	li->frag_page.frags = li->max_frags;
+	atomic_long_set(&li->frag_page.frags, li->max_frags);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5b60aa47c75b..ee360fa0c316 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -284,7 +284,7 @@ static int mlx5e_page_alloc_fragmented(struct page_pool *pp,
 
 	*frag_page = (struct mlx5e_frag_page) {
 		.netmem	= netmem,
-		.frags	= 0,
+		.frags	= ATOMIC_LONG_INIT(0),
 	};
 
 	return 0;
@@ -293,7 +293,7 @@ static int mlx5e_page_alloc_fragmented(struct page_pool *pp,
 static void mlx5e_page_release_fragmented(struct page_pool *pp,
 					  struct mlx5e_frag_page *frag_page)
 {
-	u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags;
+	u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - atomic_long_read(&frag_page->frags);
 	netmem_ref netmem = frag_page->netmem;
 
 	if (page_pool_unref_netmem(netmem, drain_count) == 0)
@@ -304,7 +304,7 @@ static int mlx5e_mpwqe_linear_page_refill(struct mlx5e_rq *rq)
 {
 	struct mlx5e_mpw_linear_info *li = rq->mpwqe.linear_info;
 
-	if (likely(li->frag_page.frags < li->max_frags))
+	if (likely(atomic_long_read(&li->frag_page.frags) < li->max_frags))
 		return 0;
 
 	if (likely(li->frag_page.netmem)) {
@@ -323,7 +323,8 @@ static void *mlx5e_mpwqe_get_linear_page_frag(struct mlx5e_rq *rq)
 	if (unlikely(mlx5e_mpwqe_linear_page_refill(rq)))
 		return NULL;
 
-	frag_offset = li->frag_page.frags << MLX5E_XDP_LOG_MAX_LINEAR_SZ;
+	frag_offset = atomic_long_read(&li->frag_page.frags) <<
+		      MLX5E_XDP_LOG_MAX_LINEAR_SZ;
 	WARN_ON(frag_offset >= BIT(rq->mpwqe.page_shift));
 
 	return netmem_address(li->frag_page.netmem) + frag_offset;
@@ -568,7 +569,7 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
 		return;
 	}
 
-	frag_page->frags++;
+	atomic_long_inc(&frag_page->frags);
 	skb_add_rx_frag_netmem(skb, next_frag, netmem,
 			       frag_offset, len, truesize);
 }
@@ -744,7 +745,7 @@ void mlx5e_mpwqe_dealloc_linear_page(struct mlx5e_rq *rq)
 	 * things in a good state for re-allocation.
 	 */
 	li->frag_page.netmem = 0;
-	li->frag_page.frags = li->max_frags;
+	atomic_long_set(&li->frag_page.frags, li->max_frags);
 }
 
 INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
@@ -1615,7 +1616,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
 
 	/* queue up for recycling/reuse */
 	skb_mark_for_recycle(skb);
-	frag_page->frags++;
+	atomic_long_inc(&frag_page->frags);
 
 	return skb;
 }
@@ -1683,7 +1684,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
 				struct mlx5e_wqe_frag_info *pwi;
 
 				for (pwi = head_wi; pwi < wi; pwi++)
-					pwi->frag_page->frags++;
+					atomic_long_inc(&pwi->frag_page->frags);
 			}
 			return NULL; /* page/packet was consumed by XDP */
 		}
@@ -1702,7 +1703,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
 		return NULL;
 
 	skb_mark_for_recycle(skb);
-	head_wi->frag_page->frags++;
+	atomic_long_inc(&head_wi->frag_page->frags);
 
 	if (xdp_buff_has_frags(&mxbuf->xdp)) {
 		/* sinfo->nr_frags is reset by build_skb, calculate again. */
@@ -1711,7 +1712,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
 					  xdp_buff_get_skb_flags(&mxbuf->xdp));
 
 		for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++)
-			pwi->frag_page->frags++;
+			atomic_long_inc(&pwi->frag_page->frags);
 	}
 
 	return skb;
@@ -1760,7 +1761,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	if (!skb) {
 		/* probably for XDP */
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
-			wi->frag_page->frags++;
+			atomic_long_inc(&wi->frag_page->frags);
 		goto wq_cyc_pop;
 	}
 
@@ -1808,7 +1809,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	if (!skb) {
 		/* probably for XDP */
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
-			wi->frag_page->frags++;
+			atomic_long_inc(&wi->frag_page->frags);
 		goto wq_cyc_pop;
 	}
 
@@ -2011,9 +2012,9 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 				struct mlx5e_frag_page *pfp;
 
 				for (pfp = head_page; pfp < frag_page; pfp++)
-					pfp->frags++;
+					atomic_long_inc(&pfp->frags);
 
-				linear_page->frags++;
+				atomic_long_inc(&linear_page->frags);
 			}
 			return NULL; /* page/packet was consumed by XDP */
 		}
@@ -2035,7 +2036,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 			return NULL;
 
 		skb_mark_for_recycle(skb);
-		linear_page->frags++;
+		atomic_long_inc(&linear_page->frags);
 
 		if (xdp_buff_has_frags(&mxbuf->xdp)) {
 			struct mlx5e_frag_page *pagep;
@@ -2048,7 +2049,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 
 			pagep = head_page;
 			do
-				pagep->frags++;
+				atomic_long_inc(&pagep->frags);
 			while (++pagep < frag_page);
 
 			headlen = min_t(u16, MLX5E_RX_MAX_HEAD - len,
@@ -2068,7 +2069,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 
 			pagep = frag_page - sinfo->nr_frags;
 			do
-				pagep->frags++;
+				atomic_long_inc(&pagep->frags);
 			while (++pagep < frag_page);
 		}
 		/* copy header */
@@ -2121,7 +2122,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 				 cqe_bcnt, mxbuf);
 		if (mlx5e_xdp_handle(rq, prog, mxbuf)) {
 			if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
-				frag_page->frags++;
+				atomic_long_inc(&frag_page->frags);
 			return NULL; /* page/packet was consumed by XDP */
 		}
 
@@ -2136,7 +2137,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 
 	/* queue up for recycling/reuse */
 	skb_mark_for_recycle(skb);
-	frag_page->frags++;
+	atomic_long_inc(&frag_page->frags);
 
 	return skb;
 }
-- 
2.43.0


  reply	other threads:[~2026-06-25 18:42 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-25 17:40 [mellanox/mlx5-next RFC 1/1] net/mlx5: RX, Fix refcount warning on frag page release Nabil S. Alramli
2026-06-25 17:40 ` Nabil S. Alramli [this message]
2026-06-26 13:12   ` Dragos Tatulea
2026-06-26 18:02     ` Nabil S. Alramli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260625174059.2879717-2-dev@nalramli.com \
    --to=dev@nalramli.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=dtatulea@nvidia.com \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=nalramli@fastly.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=tariqt@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox