All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Xing <kerneljasonxing@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, bjorn@kernel.org, magnus.karlsson@intel.com,
	maciej.fijalkowski@intel.com, jonathan.lemon@gmail.com,
	sdf@fomichev.me, ast@kernel.org, daniel@iogearbox.net,
	hawk@kernel.org, john.fastabend@gmail.com, joe@dama.to,
	willemdebruijn.kernel@gmail.com
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
	Jason Xing <kernelxing@tencent.com>
Subject: [PATCH net-next v3 3/9] xsk: add xsk_alloc_batch_skb() to build skbs in batch
Date: Tue, 21 Oct 2025 21:12:03 +0800	[thread overview]
Message-ID: <20251021131209.41491-4-kerneljasonxing@gmail.com> (raw)
In-Reply-To: <20251021131209.41491-1-kerneljasonxing@gmail.com>

From: Jason Xing <kernelxing@tencent.com>

Support allocating and building skbs in batch.

This patch uses kmem_cache_alloc_bulk() to complete the batch allocation
which relies on the global common cache 'net_hotdata.skbuff_cache'. Use
a xsk standalone skb cache (namely, xs->skb_cache) to store allocated
skbs instead of resorting to napi_alloc_cache that was designed for
softirq condition.

After allocating memory for each of skbs, in a 'for' loop, the patch
borrows part of __allocate_skb() to initialize skb and then calls
xsk_build_skb() to complete the rest of initialization process, like
copying data and stuff.

Add batch.send_queue and use the skb->list to make skbs into one chain
so that they can be easily sent which is shown in the subsequent patches.

In terms of freeing skbs process, napi_consume_skb() in the tx completion
would put the skb into global cache 'net_hotdata.skbuff_cache' that
implements the deferred freeing skb feature to avoid freeing skb one
by one to improve the performance.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 include/net/xdp_sock.h |   3 ++
 net/core/skbuff.c      | 101 +++++++++++++++++++++++++++++++++++++++++
 net/xdp/xsk.c          |   1 +
 3 files changed, 105 insertions(+)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 8944f4782eb6..cb5aa8a314fe 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -47,8 +47,10 @@ struct xsk_map {
 
 struct xsk_batch {
 	u32 generic_xmit_batch;
+	unsigned int skb_count;
 	struct sk_buff **skb_cache;
 	struct xdp_desc *desc_cache;
+	struct sk_buff_head send_queue;
 };
 
 struct xdp_sock {
@@ -130,6 +132,7 @@ struct xsk_tx_metadata_ops {
 struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 			      struct sk_buff *allocated_skb,
 			      struct xdp_desc *desc);
+int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err);
 #ifdef CONFIG_XDP_SOCKETS
 
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bc12790017b0..5b6d3b4fa895 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -81,6 +81,8 @@
 #include <net/page_pool/helpers.h>
 #include <net/psp/types.h>
 #include <net/dropreason.h>
+#include <net/xdp_sock.h>
+#include <net/xsk_buff_pool.h>
 
 #include <linux/uaccess.h>
 #include <trace/events/skb.h>
@@ -615,6 +617,105 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
 	return obj;
 }
 
+int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err)
+{
+	struct xsk_batch *batch = &xs->batch;
+	struct xdp_desc *descs = batch->desc_cache;
+	struct sk_buff **skbs = batch->skb_cache;
+	gfp_t gfp_mask = xs->sk.sk_allocation;
+	struct net_device *dev = xs->dev;
+	int node = NUMA_NO_NODE;
+	struct sk_buff *skb;
+	u32 i = 0, j = 0;
+	bool pfmemalloc;
+	u32 base_len;
+	u8 *data;
+
+	base_len = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
+	if (!(dev->priv_flags & IFF_TX_SKB_NO_LINEAR))
+		base_len += dev->needed_tailroom;
+
+	if (batch->skb_count >= nb_pkts)
+		goto build;
+
+	if (xs->skb) {
+		i = 1;
+		batch->skb_count++;
+	}
+
+	batch->skb_count += kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
+						  gfp_mask, nb_pkts - batch->skb_count,
+						  (void **)&skbs[batch->skb_count]);
+	if (batch->skb_count < nb_pkts)
+		nb_pkts = batch->skb_count;
+
+build:
+	for (i = 0, j = 0; j < nb_descs; j++) {
+		if (!xs->skb) {
+			u32 size = base_len + descs[j].len;
+
+			/* In case we don't have enough allocated skbs */
+			if (i >= nb_pkts) {
+				*err = -EAGAIN;
+				break;
+			}
+
+			if (sk_wmem_alloc_get(&xs->sk) > READ_ONCE(xs->sk.sk_sndbuf)) {
+				*err = -EAGAIN;
+				break;
+			}
+
+			skb = skbs[batch->skb_count - 1 - i];
+
+			prefetchw(skb);
+			/* We do our best to align skb_shared_info on a separate cache
+			 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
+			 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
+			 * Both skb->head and skb_shared_info are cache line aligned.
+			 */
+			data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc);
+			if (unlikely(!data)) {
+				*err = -ENOBUFS;
+				break;
+			}
+			/* kmalloc_size_roundup() might give us more room than requested.
+			 * Put skb_shared_info exactly at the end of allocated zone,
+			 * to allow max possible filling before reallocation.
+			 */
+			prefetchw(data + SKB_WITH_OVERHEAD(size));
+
+			memset(skb, 0, offsetof(struct sk_buff, tail));
+			__build_skb_around(skb, data, size);
+			skb->pfmemalloc = pfmemalloc;
+			skb_set_owner_w(skb, &xs->sk);
+		} else if (unlikely(i == 0)) {
+			/* We have a skb in cache that is left last time */
+			kmem_cache_free(net_hotdata.skbuff_cache,
+					skbs[batch->skb_count - 1]);
+			skbs[batch->skb_count - 1] = xs->skb;
+		}
+
+		skb = xsk_build_skb(xs, skb, &descs[j]);
+		if (IS_ERR(skb)) {
+			*err = PTR_ERR(skb);
+			break;
+		}
+
+		if (xp_mb_desc(&descs[j])) {
+			xs->skb = skb;
+			continue;
+		}
+
+		xs->skb = NULL;
+		i++;
+		__skb_queue_tail(&batch->send_queue, skb);
+	}
+
+	batch->skb_count -= i;
+
+	return j;
+}
+
 /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  *	'private' fields and also do memory statistics to find all the
  *	[BEEP] leaks.
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index f9458347ff7b..cf45c7545124 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1906,6 +1906,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 
 	INIT_LIST_HEAD(&xs->map_list);
 	spin_lock_init(&xs->map_list_lock);
+	__skb_queue_head_init(&xs->batch.send_queue);
 
 	mutex_lock(&net->xdp.lock);
 	sk_add_node_rcu(sk, &net->xdp.list);
-- 
2.41.3


  parent reply	other threads:[~2025-10-21 13:12 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-21 13:12 [PATCH net-next v3 0/9] xsk: batch xmit in copy mode Jason Xing
2025-10-21 13:12 ` [PATCH net-next v3 1/9] xsk: introduce XDP_GENERIC_XMIT_BATCH setsockopt Jason Xing
2025-10-24 13:30   ` Simon Horman
2025-10-25  9:08     ` Jason Xing
2025-10-28 14:44       ` Simon Horman
2025-10-29  0:00         ` Jason Xing
2025-10-21 13:12 ` [PATCH net-next v3 2/9] xsk: extend xsk_build_skb() to support passing an already allocated skb Jason Xing
2025-10-21 13:12 ` Jason Xing [this message]
2025-10-23 17:30   ` [PATCH net-next v3 3/9] xsk: add xsk_alloc_batch_skb() to build skbs in batch kernel test robot
2025-10-23 18:25   ` kernel test robot
2025-10-24 13:33   ` Simon Horman
2025-10-25  9:26     ` Jason Xing
2025-10-24 18:49   ` Stanislav Fomichev
2025-10-25  9:11     ` Jason Xing
2025-10-21 13:12 ` [PATCH net-next v3 4/9] xsk: add direct xmit in batch function Jason Xing
2025-10-21 13:12 ` [PATCH net-next v3 5/9] xsk: rename nb_pkts to nb_descs in xsk_tx_peek_release_desc_batch Jason Xing
2025-10-21 13:12 ` [PATCH net-next v3 6/9] xsk: extend xskq_cons_read_desc_batch to count nb_pkts Jason Xing
2025-10-21 13:12 ` [PATCH net-next v3 7/9] xsk: support batch xmit main logic Jason Xing
2025-10-24 13:32   ` Simon Horman
2025-10-25  9:09     ` Jason Xing
2025-10-21 13:12 ` [PATCH net-next v3 8/9] xsk: support generic batch xmit in copy mode Jason Xing
2025-10-24 18:52   ` Stanislav Fomichev
2025-10-25  9:28     ` Jason Xing
2025-10-21 13:12 ` [PATCH net-next v3 9/9] xsk: support dynamic xmit.more control for batch xmit Jason Xing

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251021131209.41491-4-kerneljasonxing@gmail.com \
    --to=kerneljasonxing@gmail.com \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=joe@dama.to \
    --cc=john.fastabend@gmail.com \
    --cc=jonathan.lemon@gmail.com \
    --cc=kernelxing@tencent.com \
    --cc=kuba@kernel.org \
    --cc=maciej.fijalkowski@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    --cc=willemdebruijn.kernel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.