Linux virtualization list
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] xsk: support tx napi busy_poll
@ 2026-06-11  7:12 menglong8.dong
  2026-06-11  7:12 ` [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop() menglong8.dong
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: menglong8.dong @ 2026-06-11  7:12 UTC (permalink / raw)
  To: jasowang
  Cc: mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet, kuba,
	pabeni, magnus.karlsson, maciej.fijalkowski, sdf, horms, ast,
	daniel, hawk, john.fastabend, bjorn, kerneljasonxing, netdev,
	virtualization, linux-kernel, bpf

From: Menglong Dong <dongml2@chinatelecom.cn>

For now, we use sk_busy_loop() in __xsk_sendmsg() to send the data in tx
ring. The sk_busy_loop() will poll on the target NAPI. However, for the
nic driver that support the tx napi, such as virtio-net, it can't schedule
the tx NAPI, but only the rx NAPI. If we enable the busy_poll for xsk and
use virtio-net, we can't send data, as the rx NAPI in virtio-net doesn't
handle the packet sending.

Fix this by introduce the sk_tx_busy_loop(), which will poll on the tx
NAPI if available. To get the tx NAPI from the napi_id, we add the
"tx_napi" field to napi_struct, which is ugly :/

Another choice is to call virtnet_xsk_xmit() in virtnet_poll() too. But
this a little contradict the design of tx NAPI.

Menglong Dong (3):
  net: busy-poll: introduce sk_tx_busy_loop()
  virtio_net: initialize napi.tx_napi in virtnet_alloc_queues()
  xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg()

 drivers/net/virtio_net.c  |  1 +
 include/linux/netdevice.h |  1 +
 include/net/busy_poll.h   | 41 ++++++++++++++++++++++++++++++++++++---
 net/core/dev.c            | 23 +++++-----------------
 net/xdp/xsk.c             |  2 +-
 5 files changed, 46 insertions(+), 22 deletions(-)

-- 
2.54.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop()
  2026-06-11  7:12 [PATCH net-next 0/3] xsk: support tx napi busy_poll menglong8.dong
@ 2026-06-11  7:12 ` menglong8.dong
  2026-06-11  7:12 ` [PATCH net-next 2/3] virtio_net: initialize napi.tx_napi in virtnet_alloc_queues() menglong8.dong
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: menglong8.dong @ 2026-06-11  7:12 UTC (permalink / raw)
  To: jasowang
  Cc: mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet, kuba,
	pabeni, magnus.karlsson, maciej.fijalkowski, sdf, horms, ast,
	daniel, hawk, john.fastabend, bjorn, kerneljasonxing, netdev,
	virtualization, linux-kernel, bpf

From: Menglong Dong <dongml2@chinatelecom.cn>

For now, we use sk_busy_loop() for both rx and tx path. The sk_busy_loop()
will call napi_busy_loop() for the specified napi_id. However, some
nic drivers have tx napi, such as virtio-net. In this case, sk_busy_loop()
doesn't work, as it can only schedule the NAPI for the rx queue.

Therefore, introduce sk_tx_busy_loop() for the nic drivers that support tx
napi, which will schedule the tx napi if available.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
 include/linux/netdevice.h |  1 +
 include/net/busy_poll.h   | 41 ++++++++++++++++++++++++++++++++++++---
 net/core/dev.c            | 26 +++++++------------------
 3 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0e1e581efc5a..8a771b014d54 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -416,6 +416,7 @@ struct napi_struct {
 	int			napi_rmap_idx;
 	int			index;
 	struct napi_config	*config;
+	struct napi_struct	*tx_napi;
 };
 
 enum {
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 6e172d0f6ef5..0959e80272c7 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -33,6 +33,12 @@ static inline bool napi_id_valid(unsigned int napi_id)
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 
+enum {
+	NAPI_F_PREFER_BUSY_POLL	= 1,
+	NAPI_F_END_ON_RESCHED	= 2,
+	NAPI_F_TX_NAPI		= 4,
+};
+
 struct napi_struct;
 extern unsigned int sysctl_net_busy_read __read_mostly;
 extern unsigned int sysctl_net_busy_poll __read_mostly;
@@ -49,9 +55,9 @@ static inline bool sk_can_busy_loop(const struct sock *sk)
 
 bool sk_busy_loop_end(void *p, unsigned long start_time);
 
-void napi_busy_loop(unsigned int napi_id,
-		    bool (*loop_end)(void *, unsigned long),
-		    void *loop_end_arg, bool prefer_busy_poll, u16 budget);
+void __napi_busy_loop(unsigned int napi_id,
+		      bool (*loop_end)(void *, unsigned long),
+		      void *loop_end_arg, unsigned int flags, u16 budget);
 
 void napi_busy_loop_rcu(unsigned int napi_id,
 			bool (*loop_end)(void *, unsigned long),
@@ -60,6 +66,17 @@ void napi_busy_loop_rcu(unsigned int napi_id,
 void napi_suspend_irqs(unsigned int napi_id);
 void napi_resume_irqs(unsigned int napi_id);
 
+static inline void napi_busy_loop(unsigned int napi_id,
+				  bool (*loop_end)(void *, unsigned long),
+				  void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+{
+	unsigned int flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0;
+
+	rcu_read_lock();
+	__napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
+	rcu_read_unlock();
+}
+
 #else /* CONFIG_NET_RX_BUSY_POLL */
 static inline unsigned long net_busy_loop_on(void)
 {
@@ -126,6 +143,24 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 #endif
 }
 
+static inline void sk_tx_busy_loop(struct sock *sk, int nonblock)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
+	unsigned int flags = NAPI_F_TX_NAPI;
+
+	if (READ_ONCE(sk->sk_prefer_busy_poll))
+		flags |= NAPI_F_PREFER_BUSY_POLL;
+
+	if (napi_id_valid(napi_id)) {
+		rcu_read_lock();
+		__napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk, flags,
+				 READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
+		rcu_read_unlock();
+	}
+#endif
+}
+
 /* used in the NIC receive handler to mark the skb */
 static inline void __skb_mark_napi_id(struct sk_buff *skb,
 				      const struct gro_node *gro)
diff --git a/net/core/dev.c b/net/core/dev.c
index 0c6c270d9f7d..645a2e851918 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6878,11 +6878,6 @@ static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout)
 		      HRTIMER_MODE_REL_PINNED);
 }
 
-enum {
-	NAPI_F_PREFER_BUSY_POLL	= 1,
-	NAPI_F_END_ON_RESCHED	= 2,
-};
-
 static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
 			   unsigned flags, u16 budget)
 {
@@ -6932,9 +6927,9 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
 	local_bh_enable();
 }
 
-static void __napi_busy_loop(unsigned int napi_id,
+void __napi_busy_loop(unsigned int napi_id,
 		      bool (*loop_end)(void *, unsigned long),
-		      void *loop_end_arg, unsigned flags, u16 budget)
+		      void *loop_end_arg, unsigned int flags, u16 budget)
 {
 	unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
 	int (*napi_poll)(struct napi_struct *napi, int budget);
@@ -6951,6 +6946,9 @@ static void __napi_busy_loop(unsigned int napi_id,
 	if (!napi)
 		return;
 
+	if ((flags & NAPI_F_TX_NAPI) && napi->tx_napi)
+		napi = napi->tx_napi;
+
 	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
 		preempt_disable();
 	for (;;) {
@@ -7015,6 +7013,7 @@ static void __napi_busy_loop(unsigned int napi_id,
 	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
 		preempt_enable();
 }
+EXPORT_SYMBOL(__napi_busy_loop);
 
 void napi_busy_loop_rcu(unsigned int napi_id,
 			bool (*loop_end)(void *, unsigned long),
@@ -7028,18 +7027,6 @@ void napi_busy_loop_rcu(unsigned int napi_id,
 	__napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
 }
 
-void napi_busy_loop(unsigned int napi_id,
-		    bool (*loop_end)(void *, unsigned long),
-		    void *loop_end_arg, bool prefer_busy_poll, u16 budget)
-{
-	unsigned flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0;
-
-	rcu_read_lock();
-	__napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(napi_busy_loop);
-
 void napi_suspend_irqs(unsigned int napi_id)
 {
 	struct napi_struct *napi;
@@ -7579,6 +7566,7 @@ void netif_napi_add_weight_locked(struct net_device *dev,
 	napi->poll_owner = -1;
 #endif
 	napi->list_owner = -1;
+	napi->tx_napi = NULL;
 	set_bit(NAPI_STATE_SCHED, &napi->state);
 	set_bit(NAPI_STATE_NPSVC, &napi->state);
 	netif_napi_dev_list_add(dev, napi);
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH net-next 2/3] virtio_net: initialize napi.tx_napi in virtnet_alloc_queues()
  2026-06-11  7:12 [PATCH net-next 0/3] xsk: support tx napi busy_poll menglong8.dong
  2026-06-11  7:12 ` [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop() menglong8.dong
@ 2026-06-11  7:12 ` menglong8.dong
  2026-06-11  7:12 ` [PATCH net-next 3/3] xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg() menglong8.dong
  2026-06-11 18:40 ` [PATCH net-next 0/3] xsk: support tx napi busy_poll Maciej Fijalkowski
  3 siblings, 0 replies; 5+ messages in thread
From: menglong8.dong @ 2026-06-11  7:12 UTC (permalink / raw)
  To: jasowang
  Cc: mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet, kuba,
	pabeni, magnus.karlsson, maciej.fijalkowski, sdf, horms, ast,
	daniel, hawk, john.fastabend, bjorn, kerneljasonxing, netdev,
	virtualization, linux-kernel, bpf

From: Menglong Dong <dongml2@chinatelecom.cn>

Ininialize the tx_napi for the rx queue, which will allow us get the tx
napi from the rx napi.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
 drivers/net/virtio_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 86b5c1ca568c..d72c124c9760 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -6543,6 +6543,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
 					 virtnet_poll_tx,
 					 napi_tx ? napi_weight : 0);
 
+		vi->rq[i].napi.tx_napi = &vi->sq[i].napi;
 		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
 		ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
 		sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH net-next 3/3] xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg()
  2026-06-11  7:12 [PATCH net-next 0/3] xsk: support tx napi busy_poll menglong8.dong
  2026-06-11  7:12 ` [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop() menglong8.dong
  2026-06-11  7:12 ` [PATCH net-next 2/3] virtio_net: initialize napi.tx_napi in virtnet_alloc_queues() menglong8.dong
@ 2026-06-11  7:12 ` menglong8.dong
  2026-06-11 18:40 ` [PATCH net-next 0/3] xsk: support tx napi busy_poll Maciej Fijalkowski
  3 siblings, 0 replies; 5+ messages in thread
From: menglong8.dong @ 2026-06-11  7:12 UTC (permalink / raw)
  To: jasowang
  Cc: mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet, kuba,
	pabeni, magnus.karlsson, maciej.fijalkowski, sdf, horms, ast,
	daniel, hawk, john.fastabend, bjorn, kerneljasonxing, netdev,
	virtualization, linux-kernel, bpf

From: Menglong Dong <dongml2@chinatelecom.cn>

Replace sk_busy_loop with sk_tx_busy_loop to support tx napi in
__xsk_sendmsg().

Fixes: a0731952d9cd ("xsk: Add busy-poll support for {recv,send}msg()")
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
 net/xdp/xsk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 5e5786cd9af5..2bf9a7313ac4 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1158,7 +1158,7 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
 		return -ENOBUFS;
 
 	if (sk_can_busy_loop(sk))
-		sk_busy_loop(sk, 1); /* only support non-blocking sockets */
+		sk_tx_busy_loop(sk, 1); /* only support non-blocking sockets */
 
 	if (xs->zc && xsk_no_wakeup(sk))
 		return 0;
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next 0/3] xsk: support tx napi busy_poll
  2026-06-11  7:12 [PATCH net-next 0/3] xsk: support tx napi busy_poll menglong8.dong
                   ` (2 preceding siblings ...)
  2026-06-11  7:12 ` [PATCH net-next 3/3] xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg() menglong8.dong
@ 2026-06-11 18:40 ` Maciej Fijalkowski
  3 siblings, 0 replies; 5+ messages in thread
From: Maciej Fijalkowski @ 2026-06-11 18:40 UTC (permalink / raw)
  To: menglong8.dong
  Cc: jasowang, mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet,
	kuba, pabeni, magnus.karlsson, sdf, horms, ast, daniel, hawk,
	john.fastabend, bjorn, kerneljasonxing, netdev, virtualization,
	linux-kernel, bpf

On Thu, Jun 11, 2026 at 03:12:39PM +0800, menglong8.dong@gmail.com wrote:
> From: Menglong Dong <dongml2@chinatelecom.cn>
> 
> For now, we use sk_busy_loop() in __xsk_sendmsg() to send the data in tx
> ring. The sk_busy_loop() will poll on the target NAPI. However, for the
> nic driver that support the tx napi, such as virtio-net, it can't schedule
> the tx NAPI, but only the rx NAPI. If we enable the busy_poll for xsk and
> use virtio-net, we can't send data, as the rx NAPI in virtio-net doesn't
> handle the packet sending.

Am I reading this right that you decided to break busy-poll support for
zero-copy drivers that happen to handle transmit side from Rx NAPI context
in favor of supporting virtio-net?

> 
> Fix this by introduce the sk_tx_busy_loop(), which will poll on the tx
> NAPI if available. To get the tx NAPI from the napi_id, we add the
> "tx_napi" field to napi_struct, which is ugly :/
> 
> Another choice is to call virtnet_xsk_xmit() in virtnet_poll() too. But
> this a little contradict the design of tx NAPI.
> 
> Menglong Dong (3):
>   net: busy-poll: introduce sk_tx_busy_loop()
>   virtio_net: initialize napi.tx_napi in virtnet_alloc_queues()
>   xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg()
> 
>  drivers/net/virtio_net.c  |  1 +
>  include/linux/netdevice.h |  1 +
>  include/net/busy_poll.h   | 41 ++++++++++++++++++++++++++++++++++++---
>  net/core/dev.c            | 23 +++++-----------------
>  net/xdp/xsk.c             |  2 +-
>  5 files changed, 46 insertions(+), 22 deletions(-)
> 
> -- 
> 2.54.0
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-06-11 18:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-11  7:12 [PATCH net-next 0/3] xsk: support tx napi busy_poll menglong8.dong
2026-06-11  7:12 ` [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop() menglong8.dong
2026-06-11  7:12 ` [PATCH net-next 2/3] virtio_net: initialize napi.tx_napi in virtnet_alloc_queues() menglong8.dong
2026-06-11  7:12 ` [PATCH net-next 3/3] xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg() menglong8.dong
2026-06-11 18:40 ` [PATCH net-next 0/3] xsk: support tx napi busy_poll Maciej Fijalkowski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox