From: Jason Xing <kerneljasonxing@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, bjorn@kernel.org, magnus.karlsson@intel.com,
maciej.fijalkowski@intel.com, jonathan.lemon@gmail.com,
sdf@fomichev.me, ast@kernel.org, daniel@iogearbox.net,
hawk@kernel.org, john.fastabend@gmail.com
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
Jason Xing <kernelxing@tencent.com>
Subject: [PATCH RFC net-next v4 04/14] xsk: cache data buffers to avoid frequently calling kmalloc_reserve
Date: Wed, 15 Apr 2026 16:26:44 +0800 [thread overview]
Message-ID: <20260415082654.21026-5-kerneljasonxing@gmail.com> (raw)
In-Reply-To: <20260415082654.21026-1-kerneljasonxing@gmail.com>
From: Jason Xing <kernelxing@tencent.com>
It's beneficial for small data transmission.
Replace per-SKB kmalloc_reserve() with on-demand bulk allocation from
skb_small_head_cache for small packets.
Add a persistent per-socket data buffer cache (batch.data_cache /
batch.data_count) that survives across batch cycles, similar to how
batch.send_queue caches built SKBs.
Inside the Phase-1 per-descriptor loop, when a small packet needs a
data buffer and the cache is empty, a single kmem_cache_alloc_bulk()
refills it with generic_xmit_batch objects. Subsequent small packets
pop directly from the cache. Large packets bypass the cache entirely
and fall back to kmalloc_reserve(). Unused buffers remain in the
cache for the next batch.
I observed that kmalloc_reserve() consumes nearly 40% which seems
unavoidable at the first glance, thinking adding the bulk mechanism
should contribute to the performance. That's the motivation of this
patch. Now, the feature gives us around 10% improvement.
Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
include/net/xdp_sock.h | 2 ++
net/core/skbuff.c | 27 ++++++++++++++++++++++-----
net/xdp/xsk.c | 24 ++++++++++++++++++++----
3 files changed, 44 insertions(+), 9 deletions(-)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 84f0aee3fb10..2151aab8f0a1 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -51,6 +51,8 @@ struct xsk_batch {
struct sk_buff **skb_cache;
struct xdp_desc *desc_cache;
struct sk_buff_head send_queue;
+ unsigned int data_count;
+ void **data_cache;
};
struct xdp_sock {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f29cecacd8bb..5726b1566b2b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -661,9 +661,11 @@ int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err
unsigned int total_truesize = 0;
struct sk_buff *skb = NULL;
int node = NUMA_NO_NODE;
+ void **dc = batch->data_cache;
+ unsigned int dc_count = batch->data_count;
u32 i = 0, j, k = 0;
bool need_alloc;
- u8 *data;
+ void *data;
base_len = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
if (!(dev->priv_flags & IFF_TX_SKB_NO_LINEAR))
@@ -683,6 +685,13 @@ int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err
nb_pkts = skb_count;
alloc_data:
+ if (dc_count < nb_pkts && !(gfp_mask & KMALLOC_NOT_NORMAL_BITS))
+ dc_count += kmem_cache_alloc_bulk(
+ net_hotdata.skb_small_head_cache,
+ gfp_mask | __GFP_NOMEMALLOC | __GFP_NOWARN,
+ batch->generic_xmit_batch - dc_count,
+ &dc[dc_count]);
+
/*
* Phase 1: Allocate data buffers and initialize SKBs.
* Pre-scan descriptors to determine packet boundaries, so we can
@@ -710,10 +719,17 @@ int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err
skb = skbs[skb_count - 1 - i];
skbuff_clear(skb);
- data = kmalloc_reserve(&size, gfp_mask, node, skb);
- if (unlikely(!data)) {
- *err = -ENOBUFS;
- break;
+ if (dc_count &&
+ SKB_HEAD_ALIGN(size) <= SKB_SMALL_HEAD_CACHE_SIZE) {
+ data = dc[--dc_count];
+ size = SKB_SMALL_HEAD_CACHE_SIZE;
+ } else {
+ data = kmalloc_reserve(&size, gfp_mask,
+ node, skb);
+ if (unlikely(!data)) {
+ *err = -ENOBUFS;
+ break;
+ }
}
__finalize_skb_around(skb, data, size);
/* Replace skb_set_owner_w() with the following */
@@ -762,6 +778,7 @@ int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err
while (k < i)
kfree_skb(skbs[skb_count - 1 - k++]);
+ batch->data_count = dc_count;
batch->skb_count = skb_count - i;
return j;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index f97bc9cf9b9a..7a6991bc19a8 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1229,14 +1229,22 @@ static void xsk_delete_from_maps(struct xdp_sock *xs)
}
static void xsk_batch_reset(struct xsk_batch *batch, struct sk_buff **skbs,
- struct xdp_desc *descs, unsigned int size)
-{
+ struct xdp_desc *descs, void **data,
+ unsigned int size)
+{
+ if (batch->data_count)
+ kmem_cache_free_bulk(net_hotdata.skb_small_head_cache,
+ batch->data_count,
+ batch->data_cache);
+ kfree(batch->data_cache);
if (batch->skb_count)
kmem_cache_free_bulk(net_hotdata.skbuff_cache,
batch->skb_count,
(void **)batch->skb_cache);
kfree(batch->skb_cache);
kvfree(batch->desc_cache);
+ batch->data_cache = data;
+ batch->data_count = 0;
batch->skb_cache = skbs;
batch->desc_cache = descs;
batch->skb_count = 0;
@@ -1272,7 +1280,7 @@ static int xsk_release(struct socket *sock)
xskq_destroy(xs->tx);
xskq_destroy(xs->fq_tmp);
xskq_destroy(xs->cq_tmp);
- xsk_batch_reset(&xs->batch, NULL, NULL, 0);
+ xsk_batch_reset(&xs->batch, NULL, NULL, NULL, 0);
sock_orphan(sk);
sock->sk = NULL;
@@ -1620,6 +1628,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_batch *batch = &xs->batch;
struct xdp_desc *descs;
struct sk_buff **skbs;
+ void **data;
unsigned int size;
int ret = 0;
@@ -1638,14 +1647,21 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
ret = -ENOMEM;
goto out;
}
+ data = kmalloc_array(size, sizeof(void *), GFP_KERNEL);
+ if (!data) {
+ kfree(skbs);
+ ret = -ENOMEM;
+ goto out;
+ }
descs = kvcalloc(size, sizeof(struct xdp_desc), GFP_KERNEL);
if (!descs) {
+ kfree(data);
kfree(skbs);
ret = -ENOMEM;
goto out;
}
- xsk_batch_reset(batch, skbs, descs, size);
+ xsk_batch_reset(batch, skbs, descs, data, size);
out:
mutex_unlock(&xs->mutex);
return ret;
--
2.41.3
next prev parent reply other threads:[~2026-04-15 8:27 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-15 8:26 [PATCH RFC net-next v4 00/14] xsk: batch xmit in copy mode Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 01/14] xsk: introduce XDP_GENERIC_XMIT_BATCH setsockopt Jason Xing
2026-04-15 8:51 ` sashiko-bot
2026-04-15 12:46 ` Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 02/14] xsk: extend xsk_build_skb() to support passing an already allocated skb Jason Xing
2026-04-15 8:52 ` sashiko-bot
2026-04-15 13:19 ` Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 03/14] xsk: add xsk_alloc_batch_skb() to build skbs in batch Jason Xing
2026-04-15 9:17 ` sashiko-bot
2026-04-16 1:18 ` Jason Xing
2026-04-15 8:26 ` Jason Xing [this message]
2026-04-15 9:38 ` [PATCH RFC net-next v4 04/14] xsk: cache data buffers to avoid frequently calling kmalloc_reserve sashiko-bot
2026-04-16 2:45 ` Jason Xing
2026-04-16 12:18 ` Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 05/14] xsk: add direct xmit in batch function Jason Xing
2026-04-15 9:11 ` sashiko-bot
2026-04-16 3:04 ` Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 06/14] xsk: support dynamic xmit.more control for batch xmit Jason Xing
2026-04-15 9:35 ` sashiko-bot
2026-04-16 3:43 ` Jason Xing
2026-04-16 4:50 ` Dmitry Torokhov
2026-04-16 4:51 ` Dmitry Torokhov
2026-04-15 8:26 ` [PATCH RFC net-next v4 07/14] xsk: try to skip validating skb list in xmit path Jason Xing
2026-04-15 9:33 ` sashiko-bot
2026-04-16 5:55 ` Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 08/14] xsk: rename nb_pkts to nb_descs in xsk_tx_peek_release_desc_batch Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 09/14] xsk: extend xskq_cons_read_desc_batch to count nb_pkts Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 10/14] xsk: extend xsk_cq_reserve_locked() to reserve n slots Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 11/14] xsk: support batch xmit main logic Jason Xing
2026-04-15 9:38 ` sashiko-bot
2026-04-16 9:58 ` Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 12/14] xsk: separate read-mostly and write-heavy fields in xsk_buff_pool Jason Xing
2026-04-15 9:20 ` sashiko-bot
2026-04-16 10:09 ` Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 13/14] xsk: retire old xmit path in copy mode Jason Xing
2026-04-15 9:18 ` sashiko-bot
2026-04-16 10:33 ` Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 14/14] xsk: optimize xsk_build_skb for batch copy-mode fast path Jason Xing
2026-04-15 9:47 ` sashiko-bot
2026-04-16 13:12 ` Jason Xing
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260415082654.21026-5-kerneljasonxing@gmail.com \
--to=kerneljasonxing@gmail.com \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hawk@kernel.org \
--cc=john.fastabend@gmail.com \
--cc=jonathan.lemon@gmail.com \
--cc=kernelxing@tencent.com \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.