All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Xing <kerneljasonxing@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, bjorn@kernel.org, magnus.karlsson@intel.com,
	maciej.fijalkowski@intel.com, jonathan.lemon@gmail.com,
	sdf@fomichev.me, ast@kernel.org, daniel@iogearbox.net,
	hawk@kernel.org, john.fastabend@gmail.com, horms@kernel.org,
	andrew+netdev@lunn.ch
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
	Jason Xing <kernelxing@tencent.com>
Subject: [PATCH net-next v2 1/9] xsk: introduce XDP_GENERIC_XMIT_BATCH setsockopt
Date: Mon, 25 Aug 2025 21:53:34 +0800	[thread overview]
Message-ID: <20250825135342.53110-2-kerneljasonxing@gmail.com> (raw)
In-Reply-To: <20250825135342.53110-1-kerneljasonxing@gmail.com>

From: Jason Xing <kernelxing@tencent.com>

Add a new socket option to provide an alternative to achieve a higher
overall throughput with the rest of series applied.

Init skb_cache and desc_batch when setting setsockopt with xs->mutex
protection.

skb_cache will be used to store newly allocated skb at one time in the
xmit path. desc_batch will be used to temporarily store descriptors of
pool.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 Documentation/networking/af_xdp.rst | 11 +++++++
 include/net/xdp_sock.h              |  3 ++
 include/uapi/linux/if_xdp.h         |  1 +
 net/xdp/xsk.c                       | 47 +++++++++++++++++++++++++++++
 tools/include/uapi/linux/if_xdp.h   |  1 +
 5 files changed, 63 insertions(+)

diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 50d92084a49c..decb4da80db4 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -447,6 +447,17 @@ mode to allow application to tune the per-socket maximum iteration for
 better throughput and less frequency of send syscall.
 Allowed range is [32, xs->tx->nentries].
 
+XDP_GENERIC_XMIT_BATCH
+----------------------
+
+It provides an option that allows application to use batch xmit in the copy
+mode. Batch process tries to allocate a certain number skbs through bulk
+mechanism first and then send them out at one time, minimizing the number
+of grabbing/releasing a few locks (like cache lock and queue lock).
+it normally gains the overall performance improvement as observed by
+xdpsock benchmark, whereas it might increase the latency of per packet.
+The maximum value shouldn't be larger than xs->max_tx_budget.
+
 XDP_STATISTICS getsockopt
 -------------------------
 
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index ce587a225661..c2b05268b8ad 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -70,6 +70,7 @@ struct xdp_sock {
 	 * preventing other XSKs from being starved.
 	 */
 	u32 tx_budget_spent;
+	u32 generic_xmit_batch;
 
 	/* Statistics */
 	u64 rx_dropped;
@@ -89,6 +90,8 @@ struct xdp_sock {
 	struct mutex mutex;
 	struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */
 	struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
+	struct sk_buff **skb_cache;
+	struct xdp_desc *desc_batch;
 };
 
 /*
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 23a062781468..44cb72cd328e 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -80,6 +80,7 @@ struct xdp_mmap_offsets {
 #define XDP_STATISTICS			7
 #define XDP_OPTIONS			8
 #define XDP_MAX_TX_SKB_BUDGET		9
+#define XDP_GENERIC_XMIT_BATCH		10
 
 struct xdp_umem_reg {
 	__u64 addr; /* Start of packet data area */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9c3acecc14b1..e75a6e2bab83 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1122,6 +1122,8 @@ static int xsk_release(struct socket *sock)
 	xskq_destroy(xs->tx);
 	xskq_destroy(xs->fq_tmp);
 	xskq_destroy(xs->cq_tmp);
+	kfree(xs->skb_cache);
+	kvfree(xs->desc_batch);
 
 	sock_orphan(sk);
 	sock->sk = NULL;
@@ -1456,6 +1458,51 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
 		WRITE_ONCE(xs->max_tx_budget, budget);
 		return 0;
 	}
+	case XDP_GENERIC_XMIT_BATCH:
+	{
+		struct xdp_desc *descs;
+		struct sk_buff **skbs;
+		unsigned int batch;
+		int ret = 0;
+
+		if (optlen != sizeof(batch))
+			return -EINVAL;
+		if (copy_from_sockptr(&batch, optval, sizeof(batch)))
+			return -EFAULT;
+		if (batch > xs->max_tx_budget)
+			return -EACCES;
+
+		mutex_lock(&xs->mutex);
+		if (!batch) {
+			kfree(xs->skb_cache);
+			kvfree(xs->desc_batch);
+			xs->generic_xmit_batch = 0;
+			goto out;
+		}
+
+		skbs = kmalloc(batch * sizeof(struct sk_buff *), GFP_KERNEL);
+		if (!skbs) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		descs = kvcalloc(batch, sizeof(*xs->desc_batch), GFP_KERNEL);
+		if (!skbs) {
+			kfree(skbs);
+			ret = -ENOMEM;
+			goto out;
+		}
+		if (xs->skb_cache)
+			kfree(xs->skb_cache);
+		if (xs->desc_batch)
+			kvfree(xs->desc_batch);
+
+		xs->skb_cache = skbs;
+		xs->desc_batch = descs;
+		xs->generic_xmit_batch = batch;
+out:
+		mutex_unlock(&xs->mutex);
+		return ret;
+	}
 	default:
 		break;
 	}
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 23a062781468..44cb72cd328e 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -80,6 +80,7 @@ struct xdp_mmap_offsets {
 #define XDP_STATISTICS			7
 #define XDP_OPTIONS			8
 #define XDP_MAX_TX_SKB_BUDGET		9
+#define XDP_GENERIC_XMIT_BATCH		10
 
 struct xdp_umem_reg {
 	__u64 addr; /* Start of packet data area */
-- 
2.41.3


  reply	other threads:[~2025-08-25 13:54 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-25 13:53 [PATCH net-next v2 0/9] xsk: improvement performance in copy mode Jason Xing
2025-08-25 13:53 ` Jason Xing [this message]
2025-08-25 13:53 ` [PATCH net-next v2 2/9] xsk: add descs parameter in xskq_cons_read_desc_batch() Jason Xing
2025-08-25 21:18   ` Maciej Fijalkowski
2025-08-26  0:10     ` Jason Xing
2025-08-25 13:53 ` [PATCH net-next v2 3/9] xsk: introduce locked version of xskq_prod_write_addr_batch Jason Xing
2025-08-25 21:42   ` Maciej Fijalkowski
2025-08-26  0:13     ` Jason Xing
2025-08-25 13:53 ` [PATCH net-next v2 4/9] xsk: extend xsk_build_skb() to support passing an already allocated skb Jason Xing
2025-08-25 21:49   ` Maciej Fijalkowski
2025-08-26  0:26     ` Jason Xing
2025-08-25 13:53 ` [PATCH net-next v2 5/9] xsk: add xsk_alloc_batch_skb() to build skbs in batch Jason Xing
2025-08-25 16:56   ` kernel test robot
2025-08-27 14:32   ` Alexander Lobakin
2025-08-28  0:38     ` Jason Xing
2025-08-28 15:28       ` Alexander Lobakin
2025-08-29  0:31         ` Jason Xing
2025-08-25 13:53 ` [PATCH net-next v2 6/9] xsk: add direct xmit in batch function Jason Xing
2025-08-25 17:34   ` Stanislav Fomichev
2025-08-26  0:27     ` Jason Xing
2025-08-25 13:53 ` [PATCH net-next v2 7/9] xsk: support batch xmit main logic Jason Xing
2025-08-25 13:53 ` [PATCH net-next v2 8/9] xsk: support generic batch xmit in copy mode Jason Xing
2025-08-25 13:53 ` [PATCH net-next v2 9/9] xsk: support dynamic xmit.more control for batch xmit Jason Xing
2025-08-25 17:44 ` [PATCH net-next v2 0/9] xsk: improvement performance in copy mode Jakub Kicinski
2025-08-26  0:01   ` Jason Xing
2025-08-26  0:29     ` Jakub Kicinski
2025-08-26  0:51       ` Jason Xing
2025-08-26  1:15         ` Jakub Kicinski
2025-08-26  1:49           ` Jason Xing
2025-08-25 21:15 ` Maciej Fijalkowski
2025-08-26  0:06   ` Jason Xing

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250825135342.53110-2-kerneljasonxing@gmail.com \
    --to=kerneljasonxing@gmail.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=horms@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=jonathan.lemon@gmail.com \
    --cc=kernelxing@tencent.com \
    --cc=kuba@kernel.org \
    --cc=maciej.fijalkowski@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.