From: Jason Xing <kerneljasonxing@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, bjorn@kernel.org, magnus.karlsson@intel.com,
maciej.fijalkowski@intel.com, jonathan.lemon@gmail.com,
sdf@fomichev.me, ast@kernel.org, daniel@iogearbox.net,
hawk@kernel.org, john.fastabend@gmail.com
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
Jason Xing <kernelxing@tencent.com>
Subject: [PATCH RFC net-next v4 13/14] xsk: retire old xmit path in copy mode
Date: Wed, 15 Apr 2026 16:26:53 +0800 [thread overview]
Message-ID: <20260415082654.21026-14-kerneljasonxing@gmail.com> (raw)
In-Reply-To: <20260415082654.21026-1-kerneljasonxing@gmail.com>
From: Jason Xing <kernelxing@tencent.com>
Add a new helper xsk_init_batch() used in xsk_create() with the default
value 1.
Obsolete __xsk_generic_xmit.
Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
net/xdp/xsk.c | 151 +++++++++++++-------------------------------------
1 file changed, 37 insertions(+), 114 deletions(-)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index e1ad2ac2b39a..be341290e42c 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1036,101 +1036,14 @@ static int __xsk_generic_xmit_batch(struct xdp_sock *xs)
return err;
}
-static int __xsk_generic_xmit(struct sock *sk)
-{
- struct xdp_sock *xs = xdp_sk(sk);
- bool sent_frame = false;
- struct xdp_desc desc;
- struct sk_buff *skb;
- u32 max_batch;
- int err = 0;
-
- mutex_lock(&xs->mutex);
-
- /* Since we dropped the RCU read lock, the socket state might have changed. */
- if (unlikely(!xsk_is_bound(xs))) {
- err = -ENXIO;
- goto out;
- }
-
- if (xs->queue_id >= xs->dev->real_num_tx_queues)
- goto out;
-
- max_batch = READ_ONCE(xs->max_tx_budget);
- while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
- if (max_batch-- == 0) {
- err = -EAGAIN;
- goto out;
- }
-
- /* This is the backpressure mechanism for the Tx path.
- * Reserve space in the completion queue and only proceed
- * if there is space in it. This avoids having to implement
- * any buffering in the Tx path.
- */
- if (!xsk_cq_reserve_locked(xs->pool, 1)) {
- err = -EAGAIN;
- goto out;
- }
-
- skb = xsk_build_skb(xs, NULL, &desc);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- if (err != -EOVERFLOW)
- goto out;
- err = 0;
- continue;
- }
-
- xskq_cons_release(xs->tx);
-
- if (xp_mb_desc(&desc)) {
- xs->skb = skb;
- continue;
- }
-
- err = __dev_direct_xmit(skb, xs->queue_id);
- if (err == NETDEV_TX_BUSY) {
- /* Tell user-space to retry the send */
- xskq_cons_cancel_n(xs->tx, xsk_get_num_desc(skb));
- xsk_consume_skb(skb);
- err = -EAGAIN;
- goto out;
- }
-
- /* Ignore NET_XMIT_CN as packet might have been sent */
- if (err == NET_XMIT_DROP) {
- /* SKB completed but not sent */
- err = -EBUSY;
- xs->skb = NULL;
- goto out;
- }
-
- sent_frame = true;
- xs->skb = NULL;
- }
-
- if (xskq_has_descs(xs->tx)) {
- if (xs->skb)
- xsk_drop_skb(xs->skb);
- xskq_cons_release(xs->tx);
- }
-
-out:
- if (sent_frame)
- __xsk_tx_release(xs);
-
- mutex_unlock(&xs->mutex);
- return err;
-}
-
static int xsk_generic_xmit(struct sock *sk)
{
+ struct xdp_sock *xs = xdp_sk(sk);
int ret;
/* Drop the RCU lock since the SKB path might sleep. */
rcu_read_unlock();
- ret = __xsk_generic_xmit(sk);
+ ret = __xsk_generic_xmit_batch(xs);
/* Reaquire RCU lock before going into common code. */
rcu_read_lock();
@@ -1626,6 +1539,34 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
+static int xsk_init_batch(struct xsk_batch *batch, unsigned int size)
+{
+ struct xdp_desc *descs;
+ struct sk_buff **skbs;
+ void **data;
+
+ skbs = kmalloc(size * sizeof(struct sk_buff *), GFP_KERNEL);
+ if (!skbs)
+ return -ENOMEM;
+
+ data = kmalloc_array(size, sizeof(void *), GFP_KERNEL);
+ if (!data) {
+ kfree(skbs);
+ return -ENOMEM;
+ }
+
+ descs = kvcalloc(size, sizeof(struct xdp_desc), GFP_KERNEL);
+ if (!descs) {
+ kfree(data);
+ kfree(skbs);
+ return -ENOMEM;
+ }
+
+ xsk_batch_reset(batch, skbs, descs, data, size);
+
+ return 0;
+}
+
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1746,9 +1687,6 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
{
struct xsk_buff_pool *pool = xs->pool;
struct xsk_batch *batch = &xs->batch;
- struct xdp_desc *descs;
- struct sk_buff **skbs;
- void **data;
unsigned int size;
int ret = 0;
@@ -1762,27 +1700,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
return -EACCES;
mutex_lock(&xs->mutex);
- skbs = kmalloc(size * sizeof(struct sk_buff *), GFP_KERNEL);
- if (!skbs) {
- ret = -ENOMEM;
- goto out;
- }
- data = kmalloc_array(size, sizeof(void *), GFP_KERNEL);
- if (!data) {
- kfree(skbs);
- ret = -ENOMEM;
- goto out;
- }
- descs = kvcalloc(size, sizeof(struct xdp_desc), GFP_KERNEL);
- if (!descs) {
- kfree(data);
- kfree(skbs);
- ret = -ENOMEM;
- goto out;
- }
-
- xsk_batch_reset(batch, skbs, descs, data, size);
-out:
+ ret = xsk_init_batch(batch, size);
mutex_unlock(&xs->mutex);
return ret;
}
@@ -2056,6 +1974,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
{
struct xdp_sock *xs;
struct sock *sk;
+ int ret;
if (!ns_capable(net->user_ns, CAP_NET_RAW))
return -EPERM;
@@ -2071,6 +1990,11 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
if (!sk)
return -ENOBUFS;
+ xs = xdp_sk(sk);
+ ret = xsk_init_batch(&xs->batch, 1);
+ if (ret)
+ return ret;
+
sock->ops = &xsk_proto_ops;
sock_init_data(sock, sk);
@@ -2081,7 +2005,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sock_set_flag(sk, SOCK_RCU_FREE);
- xs = xdp_sk(sk);
xs->state = XSK_READY;
xs->max_tx_budget = TX_BATCH_SIZE;
mutex_init(&xs->mutex);
--
2.41.3
next prev parent reply other threads:[~2026-04-15 8:28 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-15 8:26 [PATCH RFC net-next v4 00/14] xsk: batch xmit in copy mode Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 01/14] xsk: introduce XDP_GENERIC_XMIT_BATCH setsockopt Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 02/14] xsk: extend xsk_build_skb() to support passing an already allocated skb Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 03/14] xsk: add xsk_alloc_batch_skb() to build skbs in batch Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 04/14] xsk: cache data buffers to avoid frequently calling kmalloc_reserve Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 05/14] xsk: add direct xmit in batch function Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 06/14] xsk: support dynamic xmit.more control for batch xmit Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 07/14] xsk: try to skip validating skb list in xmit path Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 08/14] xsk: rename nb_pkts to nb_descs in xsk_tx_peek_release_desc_batch Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 09/14] xsk: extend xskq_cons_read_desc_batch to count nb_pkts Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 10/14] xsk: extend xsk_cq_reserve_locked() to reserve n slots Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 11/14] xsk: support batch xmit main logic Jason Xing
2026-04-15 8:26 ` [PATCH RFC net-next v4 12/14] xsk: separate read-mostly and write-heavy fields in xsk_buff_pool Jason Xing
2026-04-15 8:26 ` Jason Xing [this message]
2026-04-15 8:26 ` [PATCH RFC net-next v4 14/14] xsk: optimize xsk_build_skb for batch copy-mode fast path Jason Xing
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260415082654.21026-14-kerneljasonxing@gmail.com \
--to=kerneljasonxing@gmail.com \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hawk@kernel.org \
--cc=john.fastabend@gmail.com \
--cc=jonathan.lemon@gmail.com \
--cc=kernelxing@tencent.com \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox