bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v3] net: xsk: introduce XDP_MAX_TX_BUDGET set/getsockopt
@ 2025-06-19  9:04 Jason Xing
  2025-06-19 13:53 ` Willem de Bruijn
  2025-06-19 15:09 ` Jakub Kicinski
  0 siblings, 2 replies; 26+ messages in thread
From: Jason Xing @ 2025-06-19  9:04 UTC (permalink / raw)
  To: davem, edumazet, kuba, pabeni, bjorn, magnus.karlsson,
	maciej.fijalkowski, jonathan.lemon, sdf, ast, daniel, hawk,
	john.fastabend, joe, willemdebruijn.kernel
  Cc: bpf, netdev, Jason Xing

From: Jason Xing <kernelxing@tencent.com>

The patch does the following things:
- Add XDP_MAX_TX_BUDGET socket option.
- Unify TX_BATCH_SIZE and MAX_PER_SOCKET_BUDGET into single one
  tx_budget_spent.
- tx_budget_spent is set to 32 by default in the initialization phase.
  It's a per-socket granular control.

The idea behind this comes out of real workloads in production. We use a
user-level stack with xsk support to accelerate sending packets and
minimize triggering syscall. When the packets are aggregated, it's not
hard to hit the upper bound (namely, 32). The moment user-space stack
fetches the -EAGAIN error number passed from sendto(), it will loop to try
again until all the expected descs from tx ring are sent out to the driver.
Enlarging the XDP_MAX_TX_BUDGET value contributes to less frequencies of
sendto(). Besides, applications leveraging this setsockopt can adjust
its proper value in time after noticing the upper bound issue happening.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
V3
Link: https://lore.kernel.org/all/20250618065553.96822-1-kerneljasonxing@gmail.com/
1. use a per-socket control (suggested by Stanislav)
2. unify both definitions into one
3. support setsockopt and getsockopt
4. add more description in commit message

V2
Link: https://lore.kernel.org/all/20250617002236.30557-1-kerneljasonxing@gmail.com/
1. use a per-netns sysctl knob
2. use sysctl_xsk_max_tx_budget to unify both definitions.
---
 include/net/xdp_sock.h            |  3 ++-
 include/uapi/linux/if_xdp.h       |  1 +
 net/xdp/xsk.c                     | 36 +++++++++++++++++++++++++------
 tools/include/uapi/linux/if_xdp.h |  1 +
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index e8bd6ddb7b12..8eecafad92c0 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -65,11 +65,12 @@ struct xdp_sock {
 	struct xsk_queue *tx ____cacheline_aligned_in_smp;
 	struct list_head tx_list;
 	/* record the number of tx descriptors sent by this xsk and
-	 * when it exceeds MAX_PER_SOCKET_BUDGET, an opportunity needs
+	 * when it exceeds max_tx_budget, an opportunity needs
 	 * to be given to other xsks for sending tx descriptors, thereby
 	 * preventing other XSKs from being starved.
 	 */
 	u32 tx_budget_spent;
+	u32 max_tx_budget;
 
 	/* Statistics */
 	u64 rx_dropped;
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 44f2bb93e7e6..07c6d21c2f1c 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -79,6 +79,7 @@ struct xdp_mmap_offsets {
 #define XDP_UMEM_COMPLETION_RING	6
 #define XDP_STATISTICS			7
 #define XDP_OPTIONS			8
+#define XDP_MAX_TX_BUDGET		9
 
 struct xdp_umem_reg {
 	__u64 addr; /* Start of packet data area */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 72c000c0ae5f..7c47f665e9d1 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -33,9 +33,6 @@
 #include "xdp_umem.h"
 #include "xsk.h"
 
-#define TX_BATCH_SIZE 32
-#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
-
 void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
 {
 	if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -424,7 +421,9 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
 	rcu_read_lock();
 again:
 	list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
-		if (xs->tx_budget_spent >= MAX_PER_SOCKET_BUDGET) {
+		int max_budget = READ_ONCE(xs->max_tx_budget);
+
+		if (xs->tx_budget_spent >= max_budget) {
 			budget_exhausted = true;
 			continue;
 		}
@@ -779,7 +778,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 static int __xsk_generic_xmit(struct sock *sk)
 {
 	struct xdp_sock *xs = xdp_sk(sk);
-	u32 max_batch = TX_BATCH_SIZE;
+	u32 max_budget = READ_ONCE(xs->max_tx_budget);
 	bool sent_frame = false;
 	struct xdp_desc desc;
 	struct sk_buff *skb;
@@ -797,7 +796,7 @@ static int __xsk_generic_xmit(struct sock *sk)
 		goto out;
 
 	while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
-		if (max_batch-- == 0) {
+		if (max_budget-- == 0) {
 			err = -EAGAIN;
 			goto out;
 		}
@@ -1437,6 +1436,18 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
 		mutex_unlock(&xs->mutex);
 		return err;
 	}
+	case XDP_MAX_TX_BUDGET:
+	{
+		unsigned int budget;
+
+		if (optlen < sizeof(budget))
+			return -EINVAL;
+		if (copy_from_sockptr(&budget, optval, sizeof(budget)))
+			return -EFAULT;
+
+		WRITE_ONCE(xs->max_tx_budget, budget);
+		return 0;
+	}
 	default:
 		break;
 	}
@@ -1588,6 +1599,18 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
 
 		return 0;
 	}
+	case XDP_MAX_TX_BUDGET:
+	{
+		unsigned int budget = READ_ONCE(xs->max_tx_budget);
+
+		if (copy_to_user(optval, &budget, sizeof(budget)))
+			return -EFAULT;
+		if (put_user(sizeof(budget), optlen))
+			return -EFAULT;
+
+		return 0;
+	}
+
 	default:
 		break;
 	}
@@ -1734,6 +1757,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 
 	xs = xdp_sk(sk);
 	xs->state = XSK_READY;
+	xs->max_tx_budget = 32;
 	mutex_init(&xs->mutex);
 
 	INIT_LIST_HEAD(&xs->map_list);
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 44f2bb93e7e6..07c6d21c2f1c 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -79,6 +79,7 @@ struct xdp_mmap_offsets {
 #define XDP_UMEM_COMPLETION_RING	6
 #define XDP_STATISTICS			7
 #define XDP_OPTIONS			8
+#define XDP_MAX_TX_BUDGET		9
 
 struct xdp_umem_reg {
 	__u64 addr; /* Start of packet data area */
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2025-06-24  2:48 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-19  9:04 [PATCH net-next v3] net: xsk: introduce XDP_MAX_TX_BUDGET set/getsockopt Jason Xing
2025-06-19 13:53 ` Willem de Bruijn
2025-06-19 23:53   ` Jason Xing
2025-06-20  0:02     ` Jason Xing
2025-06-20 13:43     ` Willem de Bruijn
2025-06-20 13:58       ` Willem de Bruijn
2025-06-20 14:37         ` Jason Xing
2025-06-20 22:21           ` Willem de Bruijn
2025-06-19 15:09 ` Jakub Kicinski
2025-06-20  0:17   ` Jason Xing
2025-06-20 13:50     ` Willem de Bruijn
2025-06-20 15:03       ` Jason Xing
2025-06-20 22:24         ` Willem de Bruijn
2025-06-21  0:40           ` Jason Xing
2025-06-21 14:43     ` Jakub Kicinski
2025-06-22  0:05       ` Jason Xing
2025-06-20 14:25   ` Stanislav Fomichev
2025-06-20 16:30     ` Jason Xing
2025-06-20 16:47       ` Stanislav Fomichev
2025-06-20 17:46         ` Jason Xing
2025-06-23 14:18           ` Stanislav Fomichev
2025-06-23 23:54             ` Jason Xing
2025-06-24  0:48               ` Stanislav Fomichev
2025-06-24  2:47                 ` Jason Xing
2025-06-20 22:20     ` Willem de Bruijn
2025-06-21  1:06       ` Jason Xing

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).