All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Xing <kerneljasonxing@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, bjorn@kernel.org, magnus.karlsson@intel.com,
	maciej.fijalkowski@intel.com, jonathan.lemon@gmail.com,
	sdf@fomichev.me, ast@kernel.org, daniel@iogearbox.net,
	hawk@kernel.org, john.fastabend@gmail.com
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
	Jason Xing <kernelxing@tencent.com>
Subject: [PATCH net v2 8/8] xsk: fix u64 descriptor address truncation on 32-bit architectures
Date: Mon, 20 Apr 2026 16:28:05 +0800	[thread overview]
Message-ID: <20260420082805.14844-9-kerneljasonxing@gmail.com> (raw)
In-Reply-To: <20260420082805.14844-1-kerneljasonxing@gmail.com>

From: Jason Xing <kernelxing@tencent.com>

In copy mode TX, xsk_skb_destructor_set_addr() stores the 64-bit
descriptor address into skb_shinfo(skb)->destructor_arg (void *) via a
uintptr_t cast:

    skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL);

On 32-bit architectures uintptr_t is 32 bits, so the upper 32 bits of
the descriptor address are silently dropped. In XDP_ZEROCOPY unaligned
mode the chunk offset is encoded in bits 48-63 of the descriptor
address (XSK_UNALIGNED_BUF_OFFSET_SHIFT = 48), meaning the offset is
lost entirely. The completion queue then returns a truncated address to
userspace, making buffer recycling impossible.

Fix this by handling the 32-bit case directly in
xsk_skb_destructor_set_addr(): when !CONFIG_64BIT, allocate an xsk_addrs
struct (the same path already used for multi-descriptor SKBs) to store
the full u64 address.

Extend xsk_drop_untrans_skb() to free the xsk_addrs allocation on 32-bit
when the skb is dropped before transmission. Note that here we don't use
0x1UL method to judge in this case.

Also extend xsk_skb_destructor_is_addr() to cover 32-bit case like above.

The overhead is one extra kmem_cache_zalloc per first descriptor on
32-bit only; 64-bit builds are completely unchanged.

Closes: https://lore.kernel.org/all/20260419045824.D9E5EC2BCAF@smtp.kernel.org/
Fixes: 0ebc27a4c67d ("xsk: avoid data corruption on cq descriptor number")
Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 net/xdp/xsk.c | 54 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 7 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 6b17974ca825..bd49dbd9875b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -556,9 +556,23 @@ static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool)
 	return ret;
 }
 
+/*
+ * On 64-bit, destructor_arg can store an inline address directly
+ * (tagged with bit 0 set). On 32-bit, all addresses go through an
+ * allocated xsk_addrs struct instead. In that case this function
+ * returns true only when destructor_arg is NULL (set_addr has not
+ * yet been called or has failed).
+ *
+ * For all callers:
+ *   return true: no xsk_addrs struct to handle
+ *   return false: destructor_arg points to an xsk_addrs struct
+ */
 static bool xsk_skb_destructor_is_addr(struct sk_buff *skb)
 {
-	return (uintptr_t)skb_shinfo(skb)->destructor_arg & 0x1UL;
+	if (IS_ENABLED(CONFIG_64BIT))
+		return (uintptr_t)skb_shinfo(skb)->destructor_arg & 0x1UL;
+	else
+		return !skb_shinfo(skb)->destructor_arg;
 }
 
 static u64 xsk_skb_destructor_get_addr(struct sk_buff *skb)
@@ -566,9 +580,21 @@ static u64 xsk_skb_destructor_get_addr(struct sk_buff *skb)
 	return (u64)((uintptr_t)skb_shinfo(skb)->destructor_arg & ~0x1UL);
 }
 
-static void xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr)
+static int xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr)
 {
+	if (!IS_ENABLED(CONFIG_64BIT)) {
+		struct xsk_addrs *xsk_addr;
+
+		xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
+		if (!xsk_addr)
+			return -ENOMEM;
+		xsk_addr->addrs[0] = addr;
+		skb_shinfo(skb)->destructor_arg = (void *)xsk_addr;
+		return 0;
+	}
+
 	skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL);
+	return 0;
 }
 
 static void xsk_inc_num_desc(struct sk_buff *skb)
@@ -644,14 +670,14 @@ void xsk_destruct_skb(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
-static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs,
-			      u64 addr)
+static int xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs,
+			     u64 addr)
 {
 	skb->dev = xs->dev;
 	skb->priority = READ_ONCE(xs->sk.sk_priority);
 	skb->mark = READ_ONCE(xs->sk.sk_mark);
 	skb->destructor = xsk_destruct_skb;
-	xsk_skb_destructor_set_addr(skb, addr);
+	return xsk_skb_destructor_set_addr(skb, addr);
 }
 
 static void xsk_consume_skb(struct sk_buff *skb)
@@ -719,6 +745,12 @@ static int xsk_skb_metadata(struct sk_buff *skb, void *buffer,
 
 static void xsk_drop_untrans_skb(struct sk_buff *skb)
 {
+	if (!IS_ENABLED(CONFIG_64BIT) && !xsk_skb_destructor_is_addr(skb)) {
+		struct xsk_addrs *xsk_addr;
+
+		xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
+		kmem_cache_free(xsk_tx_generic_cache, xsk_addr);
+	}
 	skb->destructor = sock_wfree;
 	kfree_skb(skb);
 }
@@ -746,7 +778,12 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
 
 		skb_reserve(skb, hr);
 
-		xsk_skb_init_misc(skb, xs, desc->addr);
+		err = xsk_skb_init_misc(skb, xs, desc->addr);
+		if (unlikely(err)) {
+			xsk_drop_untrans_skb(skb);
+			return ERR_PTR(err);
+		}
+
 		if (desc->options & XDP_TX_METADATA) {
 			err = xsk_skb_metadata(skb, buffer, desc, pool, hr);
 			if (unlikely(err)) {
@@ -845,7 +882,10 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 			if (unlikely(err))
 				goto free_err;
 
-			xsk_skb_init_misc(skb, xs, desc->addr);
+			err = xsk_skb_init_misc(skb, xs, desc->addr);
+			if (unlikely(err))
+				goto free_err;
+
 			if (desc->options & XDP_TX_METADATA) {
 				err = xsk_skb_metadata(skb, buffer, desc,
 						       xs->pool, hr);
-- 
2.41.3


  parent reply	other threads:[~2026-04-20  8:28 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-20  8:27 [PATCH net v2 0/8] xsk: fix bugs around xsk skb allocation Jason Xing
2026-04-20  8:27 ` [PATCH net v2 1/8] xsk: reject sw-csum UMEM binding to IFF_TX_SKB_NO_LINEAR devices Jason Xing
2026-04-20 19:34   ` Stanislav Fomichev
2026-04-20 23:51     ` Jason Xing
2026-04-21 22:20       ` Stanislav Fomichev
2026-04-21  9:40   ` sashiko-bot
2026-04-21 12:39     ` Jason Xing
2026-04-20  8:27 ` [PATCH net v2 2/8] xsk: handle NULL dereference of the skb without frags issue Jason Xing
2026-04-20 19:34   ` Stanislav Fomichev
2026-04-21  9:40   ` sashiko-bot
2026-04-21 12:46     ` Jason Xing
2026-04-20  8:28 ` [PATCH net v2 3/8] xsk: fix use-after-free of xs->skb in xsk_build_skb() free_err path Jason Xing
2026-04-20 19:34   ` Stanislav Fomichev
2026-04-21  0:01     ` Jason Xing
2026-04-21  9:40   ` sashiko-bot
2026-04-21 12:51     ` Jason Xing
2026-04-20  8:28 ` [PATCH net v2 4/8] xsk: prevent CQ desync when freeing half-built skbs in xsk_build_skb() Jason Xing
2026-04-20 19:34   ` Stanislav Fomichev
2026-04-21  0:51     ` Jason Xing
2026-04-20  8:28 ` [PATCH net v2 5/8] xsk: avoid skb leak in XDP_TX_METADATA case Jason Xing
2026-04-21  9:40   ` sashiko-bot
2026-04-21 12:58     ` Jason Xing
2026-04-20  8:28 ` [PATCH net v2 6/8] xsk: free the skb when hitting the upper bound MAX_SKB_FRAGS Jason Xing
2026-04-20  8:28 ` [PATCH net v2 7/8] xsk: fix xsk_addrs slab leak on multi-buffer error path Jason Xing
2026-04-20 19:58   ` Stanislav Fomichev
2026-04-20  8:28 ` Jason Xing [this message]
2026-04-20 19:49   ` [PATCH net v2 8/8] xsk: fix u64 descriptor address truncation on 32-bit architectures Stanislav Fomichev
2026-04-21  0:49     ` Jason Xing
2026-04-21 22:23       ` Stanislav Fomichev
2026-04-22  2:54         ` Jason Xing
2026-04-21  9:40   ` sashiko-bot
2026-04-21 13:01     ` Jason Xing

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260420082805.14844-9-kerneljasonxing@gmail.com \
    --to=kerneljasonxing@gmail.com \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=jonathan.lemon@gmail.com \
    --cc=kernelxing@tencent.com \
    --cc=kuba@kernel.org \
    --cc=maciej.fijalkowski@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.