* [PATCH net-next 0/3] xsk: support tx napi busy_poll
@ 2026-06-11 7:12 menglong8.dong
2026-06-11 7:12 ` [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop() menglong8.dong
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: menglong8.dong @ 2026-06-11 7:12 UTC (permalink / raw)
To: jasowang
Cc: mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet, kuba,
pabeni, magnus.karlsson, maciej.fijalkowski, sdf, horms, ast,
daniel, hawk, john.fastabend, bjorn, kerneljasonxing, netdev,
virtualization, linux-kernel, bpf
From: Menglong Dong <dongml2@chinatelecom.cn>
For now, we use sk_busy_loop() in __xsk_sendmsg() to send the data in tx
ring. The sk_busy_loop() will poll on the target NAPI. However, for the
nic driver that support the tx napi, such as virtio-net, it can't schedule
the tx NAPI, but only the rx NAPI. If we enable the busy_poll for xsk and
use virtio-net, we can't send data, as the rx NAPI in virtio-net doesn't
handle the packet sending.
Fix this by introduce the sk_tx_busy_loop(), which will poll on the tx
NAPI if available. To get the tx NAPI from the napi_id, we add the
"tx_napi" field to napi_struct, which is ugly :/
Another choice is to call virtnet_xsk_xmit() in virtnet_poll() too. But
this a little contradict the design of tx NAPI.
Menglong Dong (3):
net: busy-poll: introduce sk_tx_busy_loop()
virtio_net: initialize napi.tx_napi in virtnet_alloc_queues()
xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg()
drivers/net/virtio_net.c | 1 +
include/linux/netdevice.h | 1 +
include/net/busy_poll.h | 41 ++++++++++++++++++++++++++++++++++++---
net/core/dev.c | 23 +++++-----------------
net/xdp/xsk.c | 2 +-
5 files changed, 46 insertions(+), 22 deletions(-)
--
2.54.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop()
2026-06-11 7:12 [PATCH net-next 0/3] xsk: support tx napi busy_poll menglong8.dong
@ 2026-06-11 7:12 ` menglong8.dong
2026-06-11 7:12 ` [PATCH net-next 2/3] virtio_net: initialize napi.tx_napi in virtnet_alloc_queues() menglong8.dong
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: menglong8.dong @ 2026-06-11 7:12 UTC (permalink / raw)
To: jasowang
Cc: mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet, kuba,
pabeni, magnus.karlsson, maciej.fijalkowski, sdf, horms, ast,
daniel, hawk, john.fastabend, bjorn, kerneljasonxing, netdev,
virtualization, linux-kernel, bpf
From: Menglong Dong <dongml2@chinatelecom.cn>
For now, we use sk_busy_loop() for both rx and tx path. The sk_busy_loop()
will call napi_busy_loop() for the specified napi_id. However, some
nic drivers have tx napi, such as virtio-net. In this case, sk_busy_loop()
doesn't work, as it can only schedule the NAPI for the rx queue.
Therefore, introduce sk_tx_busy_loop() for the nic drivers that support tx
napi, which will schedule the tx napi if available.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
include/linux/netdevice.h | 1 +
include/net/busy_poll.h | 41 ++++++++++++++++++++++++++++++++++++---
net/core/dev.c | 26 +++++++------------------
3 files changed, 46 insertions(+), 22 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0e1e581efc5a..8a771b014d54 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -416,6 +416,7 @@ struct napi_struct {
int napi_rmap_idx;
int index;
struct napi_config *config;
+ struct napi_struct *tx_napi;
};
enum {
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 6e172d0f6ef5..0959e80272c7 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -33,6 +33,12 @@ static inline bool napi_id_valid(unsigned int napi_id)
#ifdef CONFIG_NET_RX_BUSY_POLL
+enum {
+ NAPI_F_PREFER_BUSY_POLL = 1,
+ NAPI_F_END_ON_RESCHED = 2,
+ NAPI_F_TX_NAPI = 4,
+};
+
struct napi_struct;
extern unsigned int sysctl_net_busy_read __read_mostly;
extern unsigned int sysctl_net_busy_poll __read_mostly;
@@ -49,9 +55,9 @@ static inline bool sk_can_busy_loop(const struct sock *sk)
bool sk_busy_loop_end(void *p, unsigned long start_time);
-void napi_busy_loop(unsigned int napi_id,
- bool (*loop_end)(void *, unsigned long),
- void *loop_end_arg, bool prefer_busy_poll, u16 budget);
+void __napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, unsigned int flags, u16 budget);
void napi_busy_loop_rcu(unsigned int napi_id,
bool (*loop_end)(void *, unsigned long),
@@ -60,6 +66,17 @@ void napi_busy_loop_rcu(unsigned int napi_id,
void napi_suspend_irqs(unsigned int napi_id);
void napi_resume_irqs(unsigned int napi_id);
+static inline void napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+{
+ unsigned int flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0;
+
+ rcu_read_lock();
+ __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
+ rcu_read_unlock();
+}
+
#else /* CONFIG_NET_RX_BUSY_POLL */
static inline unsigned long net_busy_loop_on(void)
{
@@ -126,6 +143,24 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
#endif
}
+static inline void sk_tx_busy_loop(struct sock *sk, int nonblock)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
+ unsigned int flags = NAPI_F_TX_NAPI;
+
+ if (READ_ONCE(sk->sk_prefer_busy_poll))
+ flags |= NAPI_F_PREFER_BUSY_POLL;
+
+ if (napi_id_valid(napi_id)) {
+ rcu_read_lock();
+ __napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk, flags,
+ READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
+ rcu_read_unlock();
+ }
+#endif
+}
+
/* used in the NIC receive handler to mark the skb */
static inline void __skb_mark_napi_id(struct sk_buff *skb,
const struct gro_node *gro)
diff --git a/net/core/dev.c b/net/core/dev.c
index 0c6c270d9f7d..645a2e851918 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6878,11 +6878,6 @@ static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout)
HRTIMER_MODE_REL_PINNED);
}
-enum {
- NAPI_F_PREFER_BUSY_POLL = 1,
- NAPI_F_END_ON_RESCHED = 2,
-};
-
static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
unsigned flags, u16 budget)
{
@@ -6932,9 +6927,9 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
local_bh_enable();
}
-static void __napi_busy_loop(unsigned int napi_id,
+void __napi_busy_loop(unsigned int napi_id,
bool (*loop_end)(void *, unsigned long),
- void *loop_end_arg, unsigned flags, u16 budget)
+ void *loop_end_arg, unsigned int flags, u16 budget)
{
unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
@@ -6951,6 +6946,9 @@ static void __napi_busy_loop(unsigned int napi_id,
if (!napi)
return;
+ if ((flags & NAPI_F_TX_NAPI) && napi->tx_napi)
+ napi = napi->tx_napi;
+
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
for (;;) {
@@ -7015,6 +7013,7 @@ static void __napi_busy_loop(unsigned int napi_id,
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
}
+EXPORT_SYMBOL(__napi_busy_loop);
void napi_busy_loop_rcu(unsigned int napi_id,
bool (*loop_end)(void *, unsigned long),
@@ -7028,18 +7027,6 @@ void napi_busy_loop_rcu(unsigned int napi_id,
__napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
}
-void napi_busy_loop(unsigned int napi_id,
- bool (*loop_end)(void *, unsigned long),
- void *loop_end_arg, bool prefer_busy_poll, u16 budget)
-{
- unsigned flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0;
-
- rcu_read_lock();
- __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
- rcu_read_unlock();
-}
-EXPORT_SYMBOL(napi_busy_loop);
-
void napi_suspend_irqs(unsigned int napi_id)
{
struct napi_struct *napi;
@@ -7579,6 +7566,7 @@ void netif_napi_add_weight_locked(struct net_device *dev,
napi->poll_owner = -1;
#endif
napi->list_owner = -1;
+ napi->tx_napi = NULL;
set_bit(NAPI_STATE_SCHED, &napi->state);
set_bit(NAPI_STATE_NPSVC, &napi->state);
netif_napi_dev_list_add(dev, napi);
--
2.54.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next 2/3] virtio_net: initialize napi.tx_napi in virtnet_alloc_queues()
2026-06-11 7:12 [PATCH net-next 0/3] xsk: support tx napi busy_poll menglong8.dong
2026-06-11 7:12 ` [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop() menglong8.dong
@ 2026-06-11 7:12 ` menglong8.dong
2026-06-11 7:12 ` [PATCH net-next 3/3] xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg() menglong8.dong
2026-06-11 18:40 ` [PATCH net-next 0/3] xsk: support tx napi busy_poll Maciej Fijalkowski
3 siblings, 0 replies; 5+ messages in thread
From: menglong8.dong @ 2026-06-11 7:12 UTC (permalink / raw)
To: jasowang
Cc: mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet, kuba,
pabeni, magnus.karlsson, maciej.fijalkowski, sdf, horms, ast,
daniel, hawk, john.fastabend, bjorn, kerneljasonxing, netdev,
virtualization, linux-kernel, bpf
From: Menglong Dong <dongml2@chinatelecom.cn>
Ininialize the tx_napi for the rx queue, which will allow us get the tx
napi from the rx napi.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
drivers/net/virtio_net.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 86b5c1ca568c..d72c124c9760 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -6543,6 +6543,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
virtnet_poll_tx,
napi_tx ? napi_weight : 0);
+ vi->rq[i].napi.tx_napi = &vi->sq[i].napi;
sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
--
2.54.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next 3/3] xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg()
2026-06-11 7:12 [PATCH net-next 0/3] xsk: support tx napi busy_poll menglong8.dong
2026-06-11 7:12 ` [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop() menglong8.dong
2026-06-11 7:12 ` [PATCH net-next 2/3] virtio_net: initialize napi.tx_napi in virtnet_alloc_queues() menglong8.dong
@ 2026-06-11 7:12 ` menglong8.dong
2026-06-11 18:40 ` [PATCH net-next 0/3] xsk: support tx napi busy_poll Maciej Fijalkowski
3 siblings, 0 replies; 5+ messages in thread
From: menglong8.dong @ 2026-06-11 7:12 UTC (permalink / raw)
To: jasowang
Cc: mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet, kuba,
pabeni, magnus.karlsson, maciej.fijalkowski, sdf, horms, ast,
daniel, hawk, john.fastabend, bjorn, kerneljasonxing, netdev,
virtualization, linux-kernel, bpf
From: Menglong Dong <dongml2@chinatelecom.cn>
Replace sk_busy_loop with sk_tx_busy_loop to support tx napi in
__xsk_sendmsg().
Fixes: a0731952d9cd ("xsk: Add busy-poll support for {recv,send}msg()")
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
net/xdp/xsk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 5e5786cd9af5..2bf9a7313ac4 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1158,7 +1158,7 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
return -ENOBUFS;
if (sk_can_busy_loop(sk))
- sk_busy_loop(sk, 1); /* only support non-blocking sockets */
+ sk_tx_busy_loop(sk, 1); /* only support non-blocking sockets */
if (xs->zc && xsk_no_wakeup(sk))
return 0;
--
2.54.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH net-next 0/3] xsk: support tx napi busy_poll
2026-06-11 7:12 [PATCH net-next 0/3] xsk: support tx napi busy_poll menglong8.dong
` (2 preceding siblings ...)
2026-06-11 7:12 ` [PATCH net-next 3/3] xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg() menglong8.dong
@ 2026-06-11 18:40 ` Maciej Fijalkowski
3 siblings, 0 replies; 5+ messages in thread
From: Maciej Fijalkowski @ 2026-06-11 18:40 UTC (permalink / raw)
To: menglong8.dong
Cc: jasowang, mst, xuanzhuo, eperezma, andrew+netdev, davem, edumazet,
kuba, pabeni, magnus.karlsson, sdf, horms, ast, daniel, hawk,
john.fastabend, bjorn, kerneljasonxing, netdev, virtualization,
linux-kernel, bpf
On Thu, Jun 11, 2026 at 03:12:39PM +0800, menglong8.dong@gmail.com wrote:
> From: Menglong Dong <dongml2@chinatelecom.cn>
>
> For now, we use sk_busy_loop() in __xsk_sendmsg() to send the data in tx
> ring. The sk_busy_loop() will poll on the target NAPI. However, for the
> nic driver that support the tx napi, such as virtio-net, it can't schedule
> the tx NAPI, but only the rx NAPI. If we enable the busy_poll for xsk and
> use virtio-net, we can't send data, as the rx NAPI in virtio-net doesn't
> handle the packet sending.
Am I reading this right that you decided to break busy-poll support for
zero-copy drivers that happen to handle transmit side from Rx NAPI context
in favor of supporting virtio-net?
>
> Fix this by introduce the sk_tx_busy_loop(), which will poll on the tx
> NAPI if available. To get the tx NAPI from the napi_id, we add the
> "tx_napi" field to napi_struct, which is ugly :/
>
> Another choice is to call virtnet_xsk_xmit() in virtnet_poll() too. But
> this a little contradict the design of tx NAPI.
>
> Menglong Dong (3):
> net: busy-poll: introduce sk_tx_busy_loop()
> virtio_net: initialize napi.tx_napi in virtnet_alloc_queues()
> xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg()
>
> drivers/net/virtio_net.c | 1 +
> include/linux/netdevice.h | 1 +
> include/net/busy_poll.h | 41 ++++++++++++++++++++++++++++++++++++---
> net/core/dev.c | 23 +++++-----------------
> net/xdp/xsk.c | 2 +-
> 5 files changed, 46 insertions(+), 22 deletions(-)
>
> --
> 2.54.0
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2026-06-11 18:40 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-11 7:12 [PATCH net-next 0/3] xsk: support tx napi busy_poll menglong8.dong
2026-06-11 7:12 ` [PATCH net-next 1/3] net: busy-poll: introduce sk_tx_busy_loop() menglong8.dong
2026-06-11 7:12 ` [PATCH net-next 2/3] virtio_net: initialize napi.tx_napi in virtnet_alloc_queues() menglong8.dong
2026-06-11 7:12 ` [PATCH net-next 3/3] xsk: replace sk_busy_loop with sk_tx_busy_loop in __xsk_sendmsg() menglong8.dong
2026-06-11 18:40 ` [PATCH net-next 0/3] xsk: support tx napi busy_poll Maciej Fijalkowski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox