From: Lorenz Brun <lorenz@monogon.tech>
To: Tony Nguyen <anthony.l.nguyen@intel.com>,
Przemek Kitszel <przemyslaw.kitszel@intel.com>,
Andrew Lunn <andrew+netdev@lunn.ch>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Alexander Lobakin <aleksander.lobakin@intel.com>,
Simon Horman <horms@kernel.org>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Jesper Dangaard Brouer <hawk@kernel.org>,
John Fastabend <john.fastabend@gmail.com>,
Stanislav Fomichev <sdf@fomichev.me>,
Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Cc: stable@vger.kernel.org, intel-wired-lan@lists.osuosl.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
bpf@vger.kernel.org
Subject: [PATCH] xsk: switch xdp_build_skb_from_zc() to napi_alloc_skb()
Date: Tue, 12 May 2026 17:26:56 +0200 [thread overview]
Message-ID: <20260512152658.2818805-1-lorenz@monogon.tech> (raw)
xdp_build_skb_from_zc() allocated xdp->frame_sz bytes from the per-cpu
system_page_pool and built the skb head with napi_build_skb(). The
latter places skb_shared_info at the tail of the buffer, but the
helper sized the allocation as if the whole frame_sz were usable for
data. Whenever the packet plus reserved headroom approached frame_sz,
the head memcpy overran shinfo with packet content, corrupting
->flags (SKBFL_ZEROCOPY_ENABLE) and ->nr_frags, which then drove
skb_copy_ubufs() off the end of frags[] on the RX path:
UBSAN: array-index-out-of-bounds in include/linux/skbuff.h:2541
index 113 is out of range for type 'skb_frag_t [17]'
skb_copy_ubufs+0x7da/0x960
ip_local_deliver_finish+0xcd/0x110
ice_napi_poll+0xe4/0x2a0 [ice]
The overrun bytes come from the packet, so an on-wire sender can
corrupt kernel memory remotely whenever the XDP program returns
XDP_PASS.
Rather than patch the sizing math, switch to the pattern used by other
in-tree AF_XDP zero-copy drivers like mlx5 and i40e which use
napi_alloc_skb() sized to the actual packet plus skb_put_data().
This sizes the head exactly for the data being copied, drops the
system_page_pool local_lock from this path, and removes the
structural mismatch between frame_sz and the skb head buffer. Frags
are allocated with alloc_page() per frag, matching the other drivers.
Fixes: 560d958c6c68 ("xsk: add generic XSk &xdp_buff -> skb conversion")
Cc: stable@vger.kernel.org
Signed-off-by: Lorenz Brun <lorenz@monogon.tech>
---
drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +-
include/net/libeth/xsk.h | 2 +-
include/net/xdp.h | 3 +-
net/core/xdp.c | 72 ++++++++----------------
4 files changed, 29 insertions(+), 50 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 0643017541c35..6c01a14fde150 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -653,7 +653,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
construct_skb:
/* XDP_PASS path */
- skb = xdp_build_skb_from_zc(first);
+ skb = xdp_build_skb_from_zc(&rx_ring->q_vector->napi, first);
if (!skb) {
xsk_buff_free(first);
first = NULL;
diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h
index 82b5d21aae878..922b4587acd3f 100644
--- a/include/net/libeth/xsk.h
+++ b/include/net/libeth/xsk.h
@@ -468,7 +468,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp,
if (act != LIBETH_XDP_PASS)
return act != LIBETH_XDP_ABORTED;
- skb = xdp_build_skb_from_zc(&xdp->base);
+ skb = xdp_build_skb_from_zc(napi, &xdp->base);
if (unlikely(!skb)) {
libeth_xsk_buff_free_slow(xdp);
return true;
diff --git a/include/net/xdp.h b/include/net/xdp.h
index aa742f413c358..fb2452243fd36 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -375,7 +375,8 @@ void xdp_warn(const char *msg, const char *func, const int line);
#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
-struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp);
+struct sk_buff *xdp_build_skb_from_zc(struct napi_struct *napi,
+ struct xdp_buff *xdp);
struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct sk_buff *skb,
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 9890a30584ba7..54005b64e6cbb 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -677,16 +677,14 @@ EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff);
* xdp_copy_frags_from_zc - copy frags from XSk buff to skb
* @skb: skb to copy frags to
* @xdp: XSk &xdp_buff from which the frags will be copied
- * @pp: &page_pool backing page allocation, if available
*
* Copy all frags from XSk &xdp_buff to the skb to pass it up the stack.
- * Allocate a new buffer for each frag, copy it and attach to the skb.
+ * Allocate a new page for each frag, copy it and attach to the skb.
*
- * Return: true on success, false on netmem allocation fail.
+ * Return: true on success, false on page allocation fail.
*/
static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
- const struct xdp_buff *xdp,
- struct page_pool *pp)
+ const struct xdp_buff *xdp)
{
struct skb_shared_info *sinfo = skb_shinfo(skb);
const struct skb_shared_info *xinfo;
@@ -699,20 +697,18 @@ static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
for (u32 i = 0; i < nr_frags; i++) {
const skb_frag_t *frag = &xinfo->frags[i];
u32 len = skb_frag_size(frag);
- u32 offset, truesize = len;
struct page *page;
- page = page_pool_dev_alloc(pp, &offset, &truesize);
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!page)) {
sinfo->nr_frags = i;
return false;
}
- memcpy(page_address(page) + offset, skb_frag_address(frag),
- LARGEST_ALIGN(len));
- __skb_fill_page_desc_noacc(sinfo, i, page, offset, len);
+ memcpy(page_address(page), skb_frag_address(frag), len);
+ __skb_fill_page_desc_noacc(sinfo, i, page, 0, len);
- tsize += truesize;
+ tsize += PAGE_SIZE;
if (page_is_pfmemalloc(page))
flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
}
@@ -725,49 +721,34 @@ static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
/**
* xdp_build_skb_from_zc - create an skb from XSk &xdp_buff
+ * @napi: NAPI instance the buffer was received on (provides the skb cache)
* @xdp: source XSk buff
*
* Similar to xdp_build_skb_from_buff(), but for XSk frames. Allocate an skb
- * head, new buffer for the head, copy the data and initialize the skb fields.
- * If there are frags, allocate new buffers for them and copy.
- * Buffers are allocated from the system percpu pools to try recycling them.
- * If new skb was built successfully, @xdp is returned to XSk pool's freelist.
- * On error, it remains untouched and the caller must take care of this.
+ * sized to the packet from the NAPI cache, copy the head data, and copy
+ * any frags into freshly allocated pages.
+ *
+ * If a new skb was built successfully, @xdp is returned to the XSk pool's
+ * freelist. On error, it remains untouched and the caller must take care
+ * of this.
*
* Return: new &sk_buff on success, %NULL on error.
*/
-struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp)
+struct sk_buff *xdp_build_skb_from_zc(struct napi_struct *napi,
+ struct xdp_buff *xdp)
{
const struct xdp_rxq_info *rxq = xdp->rxq;
- u32 len = xdp->data_end - xdp->data_meta;
- u32 truesize = xdp->frame_sz;
- struct sk_buff *skb = NULL;
- struct page_pool *pp;
- int metalen;
- void *data;
+ u32 totallen = xdp->data_end - xdp->data_meta;
+ u32 metalen = xdp->data - xdp->data_meta;
+ struct sk_buff *skb;
- if (!IS_ENABLED(CONFIG_PAGE_POOL))
+ skb = napi_alloc_skb(napi, totallen);
+ if (unlikely(!skb))
return NULL;
- local_lock_nested_bh(&system_page_pool.bh_lock);
- pp = this_cpu_read(system_page_pool.pool);
- data = page_pool_dev_alloc_va(pp, &truesize);
- if (unlikely(!data))
- goto out;
-
- skb = napi_build_skb(data, truesize);
- if (unlikely(!skb)) {
- page_pool_free_va(pp, data, true);
- goto out;
- }
-
- skb_mark_for_recycle(skb);
- skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
+ skb_put_data(skb, xdp->data_meta, totallen);
- memcpy(__skb_put(skb, len), xdp->data_meta, LARGEST_ALIGN(len));
-
- metalen = xdp->data - xdp->data_meta;
- if (metalen > 0) {
+ if (metalen) {
skb_metadata_set(skb, metalen);
__skb_pull(skb, metalen);
}
@@ -775,18 +756,15 @@ struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp)
skb_record_rx_queue(skb, rxq->queue_index);
if (unlikely(xdp_buff_has_frags(xdp)) &&
- unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) {
+ unlikely(!xdp_copy_frags_from_zc(skb, xdp))) {
napi_consume_skb(skb, true);
- skb = NULL;
- goto out;
+ return NULL;
}
xsk_buff_free(xdp);
skb->protocol = eth_type_trans(skb, rxq->dev);
-out:
- local_unlock_nested_bh(&system_page_pool.bh_lock);
return skb;
}
EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc);
--
2.51.2
reply other threads:[~2026-05-12 15:27 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260512152658.2818805-1-lorenz@monogon.tech \
--to=lorenz@monogon.tech \
--cc=aleksander.lobakin@intel.com \
--cc=andrew+netdev@lunn.ch \
--cc=anthony.l.nguyen@intel.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hawk@kernel.org \
--cc=horms@kernel.org \
--cc=intel-wired-lan@lists.osuosl.org \
--cc=john.fastabend@gmail.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=przemyslaw.kitszel@intel.com \
--cc=sdf@fomichev.me \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox