public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Jason Xing <kerneljasonxing@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, bjorn@kernel.org, magnus.karlsson@intel.com,
	maciej.fijalkowski@intel.com, jonathan.lemon@gmail.com,
	sdf@fomichev.me, ast@kernel.org, daniel@iogearbox.net,
	hawk@kernel.org, john.fastabend@gmail.com
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
	Jason Xing <kernelxing@tencent.com>
Subject: [PATCH RFC net-next v4 11/14] xsk: support batch xmit main logic
Date: Wed, 15 Apr 2026 16:26:51 +0800	[thread overview]
Message-ID: <20260415082654.21026-12-kerneljasonxing@gmail.com> (raw)
In-Reply-To: <20260415082654.21026-1-kerneljasonxing@gmail.com>

From: Jason Xing <kernelxing@tencent.com>

This function __xsk_generic_xmit_batch() is the core function in batches
xmit, implement a batch version of __xsk_generic_xmit().

The whole logic is divided into sections:
1. check if we have enough available slots in tx ring and completion
   ring.
2. read descriptors from tx ring into pool->tx_descs in batches
3. reserve enough slots in completion ring to avoid backpressure
4. allocate and build skbs in batches
5. send all the possible packets in batches at one time

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 net/xdp/xsk.c       | 116 ++++++++++++++++++++++++++++++++++++++++++++
 net/xdp/xsk_queue.h |   8 +++
 2 files changed, 124 insertions(+)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c26e26cb4dda..e1ad2ac2b39a 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -920,6 +920,122 @@ struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 	return ERR_PTR(err);
 }
 
+static int __xsk_generic_xmit_batch(struct xdp_sock *xs)
+{
+	struct xsk_buff_pool *pool = xs->pool;
+	struct xsk_batch *batch = &xs->batch;
+	struct xdp_desc *descs = batch->desc_cache;
+	struct net_device *dev = xs->dev;
+	u32 max_batch, max_budget;
+	bool sent_frame = false;
+	struct sk_buff *skb;
+	u32 cons_descs;
+	int err = 0;
+	u32 i = 0;
+
+	mutex_lock(&xs->mutex);
+
+	/* Since we dropped the RCU read lock, the socket state might have changed. */
+	if (unlikely(!xsk_is_bound(xs))) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	if (xs->queue_id >= dev->real_num_tx_queues) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	max_budget = READ_ONCE(xs->max_tx_budget);
+	max_batch = batch->generic_xmit_batch;
+
+	for (i = 0; i < max_budget; i += cons_descs) {
+		u32 nb_pkts = 0;
+		u32 nb_descs;
+
+		nb_descs = min(max_batch, max_budget - i);
+		nb_descs = xskq_cons_nb_entries(xs->tx, nb_descs);
+		if (!nb_descs)
+			goto out;
+
+		/* This is the backpressure mechanism for the Tx path. Try to
+		 * reserve space in the completion queue for all packets, but
+		 * if there are fewer slots available, just process that many
+		 * packets. This avoids having to implement any buffering in
+		 * the Tx path.
+		 */
+		nb_descs = xsk_cq_reserve_locked(pool, nb_descs);
+		if (!nb_descs) {
+			err = -EAGAIN;
+			goto out;
+		}
+
+		cons_descs = xskq_cons_read_desc_batch_copy(xs->tx, pool, descs,
+							    nb_descs, &nb_pkts);
+		if (cons_descs < nb_descs) {
+			u32 delta = nb_descs - cons_descs;
+
+			xsk_cq_cancel_locked(pool, delta);
+			xs->tx->queue_empty_descs += delta;
+			if (!cons_descs) {
+				err = -EAGAIN;
+				goto out;
+			}
+			nb_descs = cons_descs;
+		}
+
+		cons_descs = xsk_alloc_batch_skb(xs, nb_pkts, nb_descs, &err);
+		/* Return 'nb_descs - cons_descs' number of descs to the
+		 * pool if the batch allocation partially fails
+		 */
+		if (cons_descs < nb_descs) {
+			xskq_cons_cancel_n(xs->tx, nb_descs - cons_descs);
+			xsk_cq_cancel_locked(pool, nb_descs - cons_descs);
+		}
+
+		if (!skb_queue_empty(&batch->send_queue)) {
+			int err_xmit;
+
+			err_xmit = xsk_direct_xmit_batch(xs, dev);
+			if (err_xmit == NETDEV_TX_BUSY)
+				err = -EAGAIN;
+			else if (err_xmit == NET_XMIT_DROP)
+				err = -EBUSY;
+
+			sent_frame = true;
+		}
+
+		if (err)
+			goto out;
+	}
+
+	/* Maximum budget of descriptors have been consumed */
+	if (xskq_has_descs(xs->tx))
+		err = -EAGAIN;
+
+out:
+	if (xs->skb)
+		xsk_drop_skb(xs->skb);
+
+	/* If send_queue has more pending skbs, we must to clear
+	 * the rest of them.
+	 */
+	while ((skb = __skb_dequeue(&batch->send_queue)) != NULL) {
+		xskq_cons_cancel_n(xs->tx, xsk_get_num_desc(skb));
+		xsk_consume_skb(skb);
+	}
+	if (sent_frame)
+		__xsk_tx_release(xs);
+
+	mutex_unlock(&xs->mutex);
+	return err;
+}
+
 static int __xsk_generic_xmit(struct sock *sk)
 {
 	struct xdp_sock *xs = xdp_sk(sk);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 34cc07d6115e..c3b97c6f2910 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -314,6 +314,14 @@ xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
 					   NULL, pool->xdp_zc_max_segs);
 }
 
+static inline u32
+xskq_cons_read_desc_batch_copy(struct xsk_queue *q, struct xsk_buff_pool *pool,
+			       struct xdp_desc *descs, u32 max, u32 *nb_pkts)
+{
+	return __xskq_cons_read_desc_batch(q, pool, descs, max,
+					   nb_pkts, MAX_SKB_FRAGS);
+}
+
 /* Functions for consumers */
 
 static inline void __xskq_cons_release(struct xsk_queue *q)
-- 
2.41.3


  parent reply	other threads:[~2026-04-15  8:28 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-15  8:26 [PATCH RFC net-next v4 00/14] xsk: batch xmit in copy mode Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 01/14] xsk: introduce XDP_GENERIC_XMIT_BATCH setsockopt Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 02/14] xsk: extend xsk_build_skb() to support passing an already allocated skb Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 03/14] xsk: add xsk_alloc_batch_skb() to build skbs in batch Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 04/14] xsk: cache data buffers to avoid frequently calling kmalloc_reserve Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 05/14] xsk: add direct xmit in batch function Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 06/14] xsk: support dynamic xmit.more control for batch xmit Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 07/14] xsk: try to skip validating skb list in xmit path Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 08/14] xsk: rename nb_pkts to nb_descs in xsk_tx_peek_release_desc_batch Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 09/14] xsk: extend xskq_cons_read_desc_batch to count nb_pkts Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 10/14] xsk: extend xsk_cq_reserve_locked() to reserve n slots Jason Xing
2026-04-15  8:26 ` Jason Xing [this message]
2026-04-15  8:26 ` [PATCH RFC net-next v4 12/14] xsk: separate read-mostly and write-heavy fields in xsk_buff_pool Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 13/14] xsk: retire old xmit path in copy mode Jason Xing
2026-04-15  8:26 ` [PATCH RFC net-next v4 14/14] xsk: optimize xsk_build_skb for batch copy-mode fast path Jason Xing

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260415082654.21026-12-kerneljasonxing@gmail.com \
    --to=kerneljasonxing@gmail.com \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=jonathan.lemon@gmail.com \
    --cc=kernelxing@tencent.com \
    --cc=kuba@kernel.org \
    --cc=maciej.fijalkowski@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox