From: Jason Xing <kerneljasonxing@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, bjorn@kernel.org, magnus.karlsson@intel.com,
maciej.fijalkowski@intel.com, jonathan.lemon@gmail.com,
sdf@fomichev.me, ast@kernel.org, daniel@iogearbox.net,
hawk@kernel.org, john.fastabend@gmail.com, joe@dama.to
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
Jason Xing <kernelxing@tencent.com>
Subject: [PATCH net-next v2] net: xsk: add sysctl_xsk_max_tx_budget in the xmit path
Date: Wed, 18 Jun 2025 14:55:53 +0800 [thread overview]
Message-ID: <20250618065553.96822-1-kerneljasonxing@gmail.com> (raw)
From: Jason Xing <kernelxing@tencent.com>
For some applications, it's quite useful to let users have the chance to
tune the max budget, like accelerating transmission, when xsk is sending
packets. Exposing such a knob also helps auto/AI tuning in the long run.
The patch unifies two definitions into one that is 32 by default and
makes the sysctl knob namespecified.
Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
v2
Link: https://lore.kernel.org/all/20250617002236.30557-1-kerneljasonxing@gmail.com/
1. use a per-netns sysctl knob
2. use sysctl_xsk_max_tx_budget to unify both definitions.
---
include/net/netns/core.h | 1 +
include/net/xdp_sock.h | 2 +-
net/core/net_namespace.c | 1 +
net/core/sysctl_net_core.c | 8 ++++++++
net/xdp/xsk.c | 12 ++++++------
5 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 9b36f0ff0c20..f1ff15fd0032 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -14,6 +14,7 @@ struct netns_core {
int sysctl_somaxconn;
int sysctl_optmem_max;
+ int sysctl_xsk_max_tx_budget;
u8 sysctl_txrehash;
u8 sysctl_tstamp_allow_data;
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index e8bd6ddb7b12..57b26ad12aa1 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -65,7 +65,7 @@ struct xdp_sock {
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head tx_list;
/* record the number of tx descriptors sent by this xsk and
- * when it exceeds MAX_PER_SOCKET_BUDGET, an opportunity needs
+ * when it exceeds sysctl_xsk_max_tx_budget, an opportunity needs
* to be given to other xsks for sending tx descriptors, thereby
* preventing other XSKs from being starved.
*/
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ae54f26709ca..890f8dc28690 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -396,6 +396,7 @@ static __net_init void preinit_net_sysctl(struct net *net)
net->core.sysctl_optmem_max = 128 * 1024;
net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
net->core.sysctl_tstamp_allow_data = 1;
+ net->core.sysctl_xsk_max_tx_budget = 32;
}
/* init code that must occur even if setup_net() is not called. */
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5dbb2c6f371d..a51d9c7246ee 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -667,6 +667,14 @@ static struct ctl_table netns_core_table[] = {
.extra1 = SYSCTL_ZERO,
.proc_handler = proc_dointvec_minmax
},
+ {
+ .procname = "xsk_max_tx_budget",
+ .data = &init_net.core.sysctl_xsk_max_tx_budget,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .extra1 = SYSCTL_ONE,
+ .proc_handler = proc_dointvec_minmax
+ },
{
.procname = "txrehash",
.data = &init_net.core.sysctl_txrehash,
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 72c000c0ae5f..15df133b50d7 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -33,9 +33,6 @@
#include "xdp_umem.h"
#include "xsk.h"
-#define TX_BATCH_SIZE 32
-#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
-
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -424,7 +421,10 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
rcu_read_lock();
again:
list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
- if (xs->tx_budget_spent >= MAX_PER_SOCKET_BUDGET) {
+ struct sock *sk = (struct sock *)xs;
+ int max_budget = READ_ONCE(sock_net(sk)->core.sysctl_xsk_max_tx_budget);
+
+ if (xs->tx_budget_spent >= max_budget) {
budget_exhausted = true;
continue;
}
@@ -778,8 +778,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
static int __xsk_generic_xmit(struct sock *sk)
{
+ u32 max_budget = READ_ONCE(sock_net(sk)->core.sysctl_xsk_max_tx_budget);
struct xdp_sock *xs = xdp_sk(sk);
- u32 max_batch = TX_BATCH_SIZE;
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
@@ -797,7 +797,7 @@ static int __xsk_generic_xmit(struct sock *sk)
goto out;
while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
- if (max_batch-- == 0) {
+ if (max_budget-- == 0) {
err = -EAGAIN;
goto out;
}
--
2.43.5
next reply other threads:[~2025-06-18 6:56 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-18 6:55 Jason Xing [this message]
2025-06-18 15:09 ` [PATCH net-next v2] net: xsk: add sysctl_xsk_max_tx_budget in the xmit path Stanislav Fomichev
2025-06-18 19:21 ` Jason Xing
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250618065553.96822-1-kerneljasonxing@gmail.com \
--to=kerneljasonxing@gmail.com \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hawk@kernel.org \
--cc=joe@dama.to \
--cc=john.fastabend@gmail.com \
--cc=jonathan.lemon@gmail.com \
--cc=kernelxing@tencent.com \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).