From: Jason Xing <kerneljasonxing@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, bjorn@kernel.org, magnus.karlsson@intel.com,
maciej.fijalkowski@intel.com, jonathan.lemon@gmail.com,
sdf@fomichev.me, ast@kernel.org, daniel@iogearbox.net,
hawk@kernel.org, john.fastabend@gmail.com, horms@kernel.org,
andrew+netdev@lunn.ch
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
Jason Xing <kernelxing@tencent.com>
Subject: [PATCH net v5 8/8] xsk: fix u64 descriptor address truncation on 32-bit architectures
Date: Sat, 2 May 2026 23:07:22 +0300 [thread overview]
Message-ID: <20260502200722.53960-9-kerneljasonxing@gmail.com> (raw)
In-Reply-To: <20260502200722.53960-1-kerneljasonxing@gmail.com>
From: Jason Xing <kernelxing@tencent.com>
In copy mode TX, xsk_skb_destructor_set_addr() stores the 64-bit
descriptor address into skb_shinfo(skb)->destructor_arg (void *) via a
uintptr_t cast:
skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL);
On 32-bit architectures uintptr_t is 32 bits, so the upper 32 bits of
the descriptor address are silently dropped. In XDP_ZEROCOPY unaligned
mode the chunk offset is encoded in bits 48-63 of the descriptor
address (XSK_UNALIGNED_BUF_OFFSET_SHIFT = 48), meaning the offset is
lost entirely. The completion queue then returns a truncated address to
userspace, making buffer recycling impossible.
Fix this by handling the 32-bit case directly in
xsk_skb_destructor_set_addr(): when !CONFIG_64BIT, allocate an
xsk_addrs struct (the same path already used for multi-descriptor
SKBs) to store the full u64 address. The existing tagged-pointer logic
in xsk_skb_destructor_is_addr() stays unchanged: slab pointers returned
from kmem_cache_zalloc() are always word-aligned and therefore have
bit 0 clear, which correctly identifies them as a struct pointer
rather than an inline tagged address on every architecture.
Factor the shared kmem_cache_zalloc + destructor_arg assignment into
__xsk_addrs_alloc() and add a wrapper xsk_addrs_alloc() that handles
the inline-to-list upgrade (is_addr check + get_addr + num_descs = 1).
The three former open-coded kmem_cache_zalloc call sites now reduce to
a single call each.
Propagate the -ENOMEM from xsk_skb_destructor_set_addr() through
xsk_skb_init_misc() so the caller can clean up the skb via kfree_skb()
before skb->destructor is installed.
The overhead is one extra kmem_cache_zalloc per first descriptor on
32-bit only; 64-bit builds are completely unchanged.
Closes: https://lore.kernel.org/all/20260419045824.D9E5EC2BCAF@smtp.kernel.org/
Fixes: 0ebc27a4c67d ("xsk: avoid data corruption on cq descriptor number")
Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
net/xdp/xsk.c | 88 ++++++++++++++++++++++++++++++++-------------------
1 file changed, 56 insertions(+), 32 deletions(-)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ed96f6ec8ff2..6bcd77068e52 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -566,9 +566,42 @@ static u64 xsk_skb_destructor_get_addr(struct sk_buff *skb)
return (u64)((uintptr_t)skb_shinfo(skb)->destructor_arg & ~0x1UL);
}
-static void xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr)
+static struct xsk_addrs *__xsk_addrs_alloc(struct sk_buff *skb, u64 addr)
{
- skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL);
+ struct xsk_addrs *xsk_addr;
+
+ xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
+ if (unlikely(!xsk_addr))
+ return NULL;
+
+ xsk_addr->addrs[0] = addr;
+ skb_shinfo(skb)->destructor_arg = (void *)xsk_addr;
+ return xsk_addr;
+}
+
+static struct xsk_addrs *xsk_addrs_alloc(struct sk_buff *skb)
+{
+ struct xsk_addrs *xsk_addr;
+
+ if (!xsk_skb_destructor_is_addr(skb))
+ return (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
+
+ xsk_addr = __xsk_addrs_alloc(skb, xsk_skb_destructor_get_addr(skb));
+ if (likely(xsk_addr))
+ xsk_addr->num_descs = 1;
+ return xsk_addr;
+}
+
+static int xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr)
+{
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL);
+ return 0;
+ }
+
+ if (unlikely(!__xsk_addrs_alloc(skb, addr)))
+ return -ENOMEM;
+ return 0;
}
static void xsk_inc_num_desc(struct sk_buff *skb)
@@ -644,14 +677,20 @@ void xsk_destruct_skb(struct sk_buff *skb)
sock_wfree(skb);
}
-static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs,
- u64 addr)
+static int xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs,
+ u64 addr)
{
+ int err;
+
+ err = xsk_skb_destructor_set_addr(skb, addr);
+ if (unlikely(err))
+ return err;
+
skb->dev = xs->dev;
skb->priority = READ_ONCE(xs->sk.sk_priority);
skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb;
- xsk_skb_destructor_set_addr(skb, addr);
+ return 0;
}
static void xsk_consume_skb(struct sk_buff *skb)
@@ -749,18 +788,9 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
} else {
struct xsk_addrs *xsk_addr;
- if (xsk_skb_destructor_is_addr(skb)) {
- xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache,
- GFP_KERNEL);
- if (!xsk_addr)
- return ERR_PTR(-ENOMEM);
-
- xsk_addr->num_descs = 1;
- xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb);
- skb_shinfo(skb)->destructor_arg = (void *)xsk_addr;
- } else {
- xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
- }
+ xsk_addr = xsk_addrs_alloc(skb);
+ if (!xsk_addr)
+ return ERR_PTR(-ENOMEM);
/* in case of -EOVERFLOW that could happen below,
* xsk_consume_skb() will release this node as whole skb
@@ -849,19 +879,10 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct page *page;
u8 *vaddr;
- if (xsk_skb_destructor_is_addr(skb)) {
- xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache,
- GFP_KERNEL);
- if (!xsk_addr) {
- err = -ENOMEM;
- goto free_err;
- }
-
- xsk_addr->num_descs = 1;
- xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb);
- skb_shinfo(skb)->destructor_arg = (void *)xsk_addr;
- } else {
- xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
+ xsk_addr = xsk_addrs_alloc(skb);
+ if (!xsk_addr) {
+ err = -ENOMEM;
+ goto free_err;
}
if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
@@ -886,8 +907,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
}
}
- if (!xs->skb)
- xsk_skb_init_misc(skb, xs, desc->addr);
+ if (!xs->skb) {
+ err = xsk_skb_init_misc(skb, xs, desc->addr);
+ if (unlikely(err))
+ goto free_err;
+ }
xsk_inc_num_desc(skb);
return skb;
--
2.41.3
next prev parent reply other threads:[~2026-05-02 20:07 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-02 20:07 [PATCH net v5 0/8] xsk: fix bugs around xsk skb allocation Jason Xing
2026-05-02 20:07 ` [PATCH net v5 1/8] xsk: reject sw-csum UMEM binding to IFF_TX_SKB_NO_LINEAR devices Jason Xing
2026-05-03 20:09 ` sashiko-bot
2026-05-05 19:18 ` Jason Xing
2026-05-02 20:07 ` [PATCH net v5 2/8] xsk: free the skb when hitting the upper bound MAX_SKB_FRAGS Jason Xing
2026-05-03 20:09 ` sashiko-bot
2026-05-05 19:26 ` Jason Xing
2026-05-02 20:07 ` [PATCH net v5 3/8] xsk: handle NULL dereference of the skb without frags issue Jason Xing
2026-05-03 20:09 ` sashiko-bot
2026-05-05 19:28 ` Jason Xing
2026-05-02 20:07 ` [PATCH net v5 4/8] xsk: fix use-after-free of xs->skb in xsk_build_skb() free_err path Jason Xing
2026-05-03 20:09 ` sashiko-bot
2026-05-05 19:32 ` Jason Xing
2026-05-02 20:07 ` [PATCH net v5 5/8] xsk: prevent CQ desync when freeing half-built skbs in xsk_build_skb() Jason Xing
2026-05-03 20:09 ` sashiko-bot
2026-05-05 19:36 ` Jason Xing
2026-05-02 20:07 ` [PATCH net v5 6/8] xsk: avoid skb leak in XDP_TX_METADATA case Jason Xing
2026-05-03 20:09 ` sashiko-bot
2026-05-05 19:43 ` Jason Xing
2026-05-02 20:07 ` [PATCH net v5 7/8] xsk: fix xsk_addrs slab leak on multi-buffer error path Jason Xing
2026-05-02 20:07 ` Jason Xing [this message]
2026-05-03 20:09 ` [PATCH net v5 8/8] xsk: fix u64 descriptor address truncation on 32-bit architectures sashiko-bot
2026-05-05 19:46 ` Jason Xing
2026-05-04 14:59 ` Stanislav Fomichev
2026-05-05 15:44 ` [PATCH net v5 0/8] xsk: fix bugs around xsk skb allocation Alexander Lobakin
2026-05-05 19:09 ` Jason Xing
2026-05-06 2:40 ` patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260502200722.53960-9-kerneljasonxing@gmail.com \
--to=kerneljasonxing@gmail.com \
--cc=andrew+netdev@lunn.ch \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hawk@kernel.org \
--cc=horms@kernel.org \
--cc=john.fastabend@gmail.com \
--cc=jonathan.lemon@gmail.com \
--cc=kernelxing@tencent.com \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.