* [PATCH] virtio-net: disable delayed refill when pausing rx
@ 2025-04-04 9:39 Bui Quang Minh
2025-04-07 1:03 ` Xuan Zhuo
0 siblings, 1 reply; 12+ messages in thread
From: Bui Quang Minh @ 2025-04-04 9:39 UTC (permalink / raw)
To: virtualization
Cc: Michael S . Tsirkin, Jason Wang, Xuan Zhuo, Andrew Lunn,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
Eugenio Pérez, David S . Miller, netdev, linux-kernel, bpf,
Bui Quang Minh
When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
napi_disable() on the receive queue's napi. In delayed refill_work, it
also calls napi_disable() on the receive queue's napi. This can leads to
deadlock when napi_disable() is called on an already disabled napi. This
scenario can be reproducible by binding a XDP socket to virtio-net
interface without setting up the fill ring. As a result, try_fill_recv
will fail until the fill ring is set up and refill_work is scheduled.
This commit adds virtnet_rx_(pause/resume)_all helpers and fixes up the
virtnet_rx_resume to disable future and cancel all inflights delayed
refill_work before calling napi_disable() to pause the rx.
Fixes: 6a4763e26803 ("virtio_net: support rx queue resize")
Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set")
Fixes: 09d2b3182c8e ("virtio_net: xsk: bind/unbind xsk for rx")
Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
drivers/net/virtio_net.c | 60 ++++++++++++++++++++++++++++++++++------
1 file changed, 51 insertions(+), 9 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7e4617216a4b..4361b91ccc64 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3342,10 +3342,53 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
+static void virtnet_rx_pause_all(struct virtnet_info *vi)
+{
+ bool running = netif_running(vi->dev);
+
+ /*
+ * Make sure refill_work does not run concurrently to
+ * avoid napi_disable race which leads to deadlock.
+ */
+ disable_delayed_refill(vi);
+ cancel_delayed_work_sync(&vi->refill);
+ if (running) {
+ int i;
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ virtnet_napi_disable(&vi->rq[i]);
+ virtnet_cancel_dim(vi, &vi->rq[i].dim);
+ }
+ }
+}
+
+static void virtnet_rx_resume_all(struct virtnet_info *vi)
+{
+ bool running = netif_running(vi->dev);
+ int i;
+
+ enable_delayed_refill(vi);
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ if (i < vi->curr_queue_pairs) {
+ if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
+ schedule_delayed_work(&vi->refill, 0);
+ }
+
+ if (running)
+ virtnet_napi_enable(&vi->rq[i]);
+ }
+}
+
static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
{
bool running = netif_running(vi->dev);
+ /*
+ * Make sure refill_work does not run concurrently to
+ * avoid napi_disable race which leads to deadlock.
+ */
+ disable_delayed_refill(vi);
+ cancel_delayed_work_sync(&vi->refill);
if (running) {
virtnet_napi_disable(rq);
virtnet_cancel_dim(vi, &rq->dim);
@@ -3356,6 +3399,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
{
bool running = netif_running(vi->dev);
+ enable_delayed_refill(vi);
if (!try_fill_recv(vi, rq, GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
@@ -5959,12 +6003,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
if (prog)
bpf_prog_add(prog, vi->max_queue_pairs - 1);
+ virtnet_rx_pause_all(vi);
+
/* Make sure NAPI is not using any XDP TX queues for RX. */
if (netif_running(dev)) {
- for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_disable(&vi->rq[i]);
+ for (i = 0; i < vi->max_queue_pairs; i++)
virtnet_napi_tx_disable(&vi->sq[i]);
- }
}
if (!prog) {
@@ -5996,13 +6040,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
vi->xdp_enabled = false;
}
+ virtnet_rx_resume_all(vi);
for (i = 0; i < vi->max_queue_pairs; i++) {
if (old_prog)
bpf_prog_put(old_prog);
- if (netif_running(dev)) {
- virtnet_napi_enable(&vi->rq[i]);
+ if (netif_running(dev))
virtnet_napi_tx_enable(&vi->sq[i]);
- }
}
return 0;
@@ -6014,11 +6057,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
}
+ virtnet_rx_resume_all(vi);
if (netif_running(dev)) {
- for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_enable(&vi->rq[i]);
+ for (i = 0; i < vi->max_queue_pairs; i++)
virtnet_napi_tx_enable(&vi->sq[i]);
- }
}
if (prog)
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] virtio-net: disable delayed refill when pausing rx
2025-04-04 9:39 [PATCH] virtio-net: disable delayed refill when pausing rx Bui Quang Minh
@ 2025-04-07 1:03 ` Xuan Zhuo
2025-04-07 2:27 ` Bui Quang Minh
0 siblings, 1 reply; 12+ messages in thread
From: Xuan Zhuo @ 2025-04-07 1:03 UTC (permalink / raw)
To: Bui Quang Minh
Cc: Michael S . Tsirkin, Jason Wang, Andrew Lunn, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, Bui Quang Minh,
virtualization
On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
> napi_disable() on the receive queue's napi. In delayed refill_work, it
> also calls napi_disable() on the receive queue's napi. This can leads to
> deadlock when napi_disable() is called on an already disabled napi. This
> scenario can be reproducible by binding a XDP socket to virtio-net
> interface without setting up the fill ring. As a result, try_fill_recv
> will fail until the fill ring is set up and refill_work is scheduled.
So, what is the problem? The refill_work is waiting? As I know, that thread
will sleep some time, so the cpu can do other work.
Thanks.
>
> This commit adds virtnet_rx_(pause/resume)_all helpers and fixes up the
> virtnet_rx_resume to disable future and cancel all inflights delayed
> refill_work before calling napi_disable() to pause the rx.
>
> Fixes: 6a4763e26803 ("virtio_net: support rx queue resize")
> Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set")
> Fixes: 09d2b3182c8e ("virtio_net: xsk: bind/unbind xsk for rx")
> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> ---
> drivers/net/virtio_net.c | 60 ++++++++++++++++++++++++++++++++++------
> 1 file changed, 51 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7e4617216a4b..4361b91ccc64 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -3342,10 +3342,53 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> return NETDEV_TX_OK;
> }
>
> +static void virtnet_rx_pause_all(struct virtnet_info *vi)
> +{
> + bool running = netif_running(vi->dev);
> +
> + /*
> + * Make sure refill_work does not run concurrently to
> + * avoid napi_disable race which leads to deadlock.
> + */
> + disable_delayed_refill(vi);
> + cancel_delayed_work_sync(&vi->refill);
> + if (running) {
> + int i;
> +
> + for (i = 0; i < vi->max_queue_pairs; i++) {
> + virtnet_napi_disable(&vi->rq[i]);
> + virtnet_cancel_dim(vi, &vi->rq[i].dim);
> + }
> + }
> +}
> +
> +static void virtnet_rx_resume_all(struct virtnet_info *vi)
> +{
> + bool running = netif_running(vi->dev);
> + int i;
> +
> + enable_delayed_refill(vi);
> + for (i = 0; i < vi->max_queue_pairs; i++) {
> + if (i < vi->curr_queue_pairs) {
> + if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
> + schedule_delayed_work(&vi->refill, 0);
> + }
> +
> + if (running)
> + virtnet_napi_enable(&vi->rq[i]);
> + }
> +}
> +
> static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
> {
> bool running = netif_running(vi->dev);
>
> + /*
> + * Make sure refill_work does not run concurrently to
> + * avoid napi_disable race which leads to deadlock.
> + */
> + disable_delayed_refill(vi);
> + cancel_delayed_work_sync(&vi->refill);
> if (running) {
> virtnet_napi_disable(rq);
> virtnet_cancel_dim(vi, &rq->dim);
> @@ -3356,6 +3399,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
> {
> bool running = netif_running(vi->dev);
>
> + enable_delayed_refill(vi);
> if (!try_fill_recv(vi, rq, GFP_KERNEL))
> schedule_delayed_work(&vi->refill, 0);
>
> @@ -5959,12 +6003,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> if (prog)
> bpf_prog_add(prog, vi->max_queue_pairs - 1);
>
> + virtnet_rx_pause_all(vi);
> +
> /* Make sure NAPI is not using any XDP TX queues for RX. */
> if (netif_running(dev)) {
> - for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_disable(&vi->rq[i]);
> + for (i = 0; i < vi->max_queue_pairs; i++)
> virtnet_napi_tx_disable(&vi->sq[i]);
> - }
> }
>
> if (!prog) {
> @@ -5996,13 +6040,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> vi->xdp_enabled = false;
> }
>
> + virtnet_rx_resume_all(vi);
> for (i = 0; i < vi->max_queue_pairs; i++) {
> if (old_prog)
> bpf_prog_put(old_prog);
> - if (netif_running(dev)) {
> - virtnet_napi_enable(&vi->rq[i]);
> + if (netif_running(dev))
> virtnet_napi_tx_enable(&vi->sq[i]);
> - }
> }
>
> return 0;
> @@ -6014,11 +6057,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
> }
>
> + virtnet_rx_resume_all(vi);
> if (netif_running(dev)) {
> - for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_enable(&vi->rq[i]);
> + for (i = 0; i < vi->max_queue_pairs; i++)
> virtnet_napi_tx_enable(&vi->sq[i]);
> - }
> }
> if (prog)
> bpf_prog_sub(prog, vi->max_queue_pairs - 1);
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] virtio-net: disable delayed refill when pausing rx
2025-04-07 1:03 ` Xuan Zhuo
@ 2025-04-07 2:27 ` Bui Quang Minh
2025-04-08 7:34 ` Jason Wang
0 siblings, 1 reply; 12+ messages in thread
From: Bui Quang Minh @ 2025-04-07 2:27 UTC (permalink / raw)
To: Xuan Zhuo
Cc: Michael S . Tsirkin, Jason Wang, Andrew Lunn, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, virtualization
On 4/7/25 08:03, Xuan Zhuo wrote:
> On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
>> napi_disable() on the receive queue's napi. In delayed refill_work, it
>> also calls napi_disable() on the receive queue's napi. This can leads to
>> deadlock when napi_disable() is called on an already disabled napi. This
>> scenario can be reproducible by binding a XDP socket to virtio-net
>> interface without setting up the fill ring. As a result, try_fill_recv
>> will fail until the fill ring is set up and refill_work is scheduled.
>
> So, what is the problem? The refill_work is waiting? As I know, that thread
> will sleep some time, so the cpu can do other work.
When napi_disable is called on an already disabled napi, it will sleep
in napi_disable_locked while still holding the netdev_lock. As a result,
later napi_enable gets stuck too as it cannot acquire the netdev_lock.
This leads to refill_work and the pause-then-resume tx are stuck altogether.
Thanks,
Quang Minh.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] virtio-net: disable delayed refill when pausing rx
2025-04-07 2:27 ` Bui Quang Minh
@ 2025-04-08 7:34 ` Jason Wang
2025-04-08 9:28 ` Paolo Abeni
2025-04-09 6:44 ` Bui Quang Minh
0 siblings, 2 replies; 12+ messages in thread
From: Jason Wang @ 2025-04-08 7:34 UTC (permalink / raw)
To: Bui Quang Minh
Cc: Xuan Zhuo, Michael S . Tsirkin, Andrew Lunn, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, virtualization
On Mon, Apr 7, 2025 at 10:27 AM Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>
> On 4/7/25 08:03, Xuan Zhuo wrote:
> > On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> >> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
> >> napi_disable() on the receive queue's napi. In delayed refill_work, it
> >> also calls napi_disable() on the receive queue's napi. This can leads to
> >> deadlock when napi_disable() is called on an already disabled napi. This
> >> scenario can be reproducible by binding a XDP socket to virtio-net
> >> interface without setting up the fill ring. As a result, try_fill_recv
> >> will fail until the fill ring is set up and refill_work is scheduled.
> >
> > So, what is the problem? The refill_work is waiting? As I know, that thread
> > will sleep some time, so the cpu can do other work.
>
> When napi_disable is called on an already disabled napi, it will sleep
> in napi_disable_locked while still holding the netdev_lock. As a result,
> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
> This leads to refill_work and the pause-then-resume tx are stuck altogether.
This needs to be added to the chagelog. And it looks like this is a fix for
commit 413f0271f3966e0c73d4937963f19335af19e628
Author: Jakub Kicinski <kuba@kernel.org>
Date: Tue Jan 14 19:53:14 2025 -0800
net: protect NAPI enablement with netdev_lock()
?
I wonder if it's simpler to just hold the netdev lock in resize or xsk
binding instead of this.
Thanks
>
> Thanks,
> Quang Minh.
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] virtio-net: disable delayed refill when pausing rx
2025-04-08 7:34 ` Jason Wang
@ 2025-04-08 9:28 ` Paolo Abeni
2025-04-08 14:37 ` Jakub Kicinski
2025-04-09 6:44 ` Bui Quang Minh
1 sibling, 1 reply; 12+ messages in thread
From: Paolo Abeni @ 2025-04-08 9:28 UTC (permalink / raw)
To: Jason Wang, Bui Quang Minh
Cc: Xuan Zhuo, Michael S . Tsirkin, Andrew Lunn, Eric Dumazet,
Jakub Kicinski, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, virtualization
On 4/8/25 9:34 AM, Jason Wang wrote:
> On Mon, Apr 7, 2025 at 10:27 AM Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>> On 4/7/25 08:03, Xuan Zhuo wrote:
>>> On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>>>> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
>>>> napi_disable() on the receive queue's napi. In delayed refill_work, it
>>>> also calls napi_disable() on the receive queue's napi. This can leads to
>>>> deadlock when napi_disable() is called on an already disabled napi. This
>>>> scenario can be reproducible by binding a XDP socket to virtio-net
>>>> interface without setting up the fill ring. As a result, try_fill_recv
>>>> will fail until the fill ring is set up and refill_work is scheduled.
>>>
>>> So, what is the problem? The refill_work is waiting? As I know, that thread
>>> will sleep some time, so the cpu can do other work.
>>
>> When napi_disable is called on an already disabled napi, it will sleep
>> in napi_disable_locked while still holding the netdev_lock. As a result,
>> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
>> This leads to refill_work and the pause-then-resume tx are stuck altogether.
>
> This needs to be added to the chagelog. And it looks like this is a fix for
>
> commit 413f0271f3966e0c73d4937963f19335af19e628
> Author: Jakub Kicinski <kuba@kernel.org>
> Date: Tue Jan 14 19:53:14 2025 -0800
>
> net: protect NAPI enablement with netdev_lock()
>
> ?
>
> I wonder if it's simpler to just hold the netdev lock in resize or xsk
> binding instead of this.
Setting:
dev->request_ops_lock = true;
in virtnet_probe() before calling register_netdevice() should achieve
the above. Could you please have a try?
Thanks,
Paolo
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] virtio-net: disable delayed refill when pausing rx
2025-04-08 9:28 ` Paolo Abeni
@ 2025-04-08 14:37 ` Jakub Kicinski
2025-04-09 6:55 ` Bui Quang Minh
0 siblings, 1 reply; 12+ messages in thread
From: Jakub Kicinski @ 2025-04-08 14:37 UTC (permalink / raw)
To: Paolo Abeni
Cc: Jason Wang, Bui Quang Minh, Xuan Zhuo, Michael S . Tsirkin,
Andrew Lunn, Eric Dumazet, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, virtualization
On Tue, 8 Apr 2025 11:28:54 +0200 Paolo Abeni wrote:
> >> When napi_disable is called on an already disabled napi, it will sleep
> >> in napi_disable_locked while still holding the netdev_lock. As a result,
> >> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
> >> This leads to refill_work and the pause-then-resume tx are stuck altogether.
> >
> > This needs to be added to the chagelog. And it looks like this is a fix for
> >
> > commit 413f0271f3966e0c73d4937963f19335af19e628
> > Author: Jakub Kicinski <kuba@kernel.org>
> > Date: Tue Jan 14 19:53:14 2025 -0800
> >
> > net: protect NAPI enablement with netdev_lock()
> >
> > ?
> >
> > I wonder if it's simpler to just hold the netdev lock in resize or xsk
> > binding instead of this.
>
> Setting:
>
> dev->request_ops_lock = true;
>
> in virtnet_probe() before calling register_netdevice() should achieve
> the above. Could you please have a try?
Can we do that or do we need a more tailored fix? request_ops_lock only
appeared in 6.15 and the bug AFAIU dates back to 6.14. We don't normally
worry about given the stream of fixes - request_ops_lock is a bit risky.
Jason's suggestion AFAIU is just to wrap the disable/enable pairs in
the lock. We can try request_ops_lock in -next ?
Bui Quang Minh, could you add a selftest for this problem?
See tools/testing/selftests/drivers/net/virtio_net/
You can re-use / extend the XSK helper from
tools/testing/selftests/drivers/net/xdp_helper.c ?
(move it to tools/testing/selftests/net/lib for easier access)
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] virtio-net: disable delayed refill when pausing rx
2025-04-08 7:34 ` Jason Wang
2025-04-08 9:28 ` Paolo Abeni
@ 2025-04-09 6:44 ` Bui Quang Minh
2025-04-10 7:02 ` Bui Quang Minh
1 sibling, 1 reply; 12+ messages in thread
From: Bui Quang Minh @ 2025-04-09 6:44 UTC (permalink / raw)
To: Jason Wang
Cc: Xuan Zhuo, Michael S . Tsirkin, Andrew Lunn, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, virtualization
On 4/8/25 14:34, Jason Wang wrote:
> On Mon, Apr 7, 2025 at 10:27 AM Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>> On 4/7/25 08:03, Xuan Zhuo wrote:
>>> On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>>>> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
>>>> napi_disable() on the receive queue's napi. In delayed refill_work, it
>>>> also calls napi_disable() on the receive queue's napi. This can leads to
>>>> deadlock when napi_disable() is called on an already disabled napi. This
>>>> scenario can be reproducible by binding a XDP socket to virtio-net
>>>> interface without setting up the fill ring. As a result, try_fill_recv
>>>> will fail until the fill ring is set up and refill_work is scheduled.
>>> So, what is the problem? The refill_work is waiting? As I know, that thread
>>> will sleep some time, so the cpu can do other work.
>> When napi_disable is called on an already disabled napi, it will sleep
>> in napi_disable_locked while still holding the netdev_lock. As a result,
>> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
>> This leads to refill_work and the pause-then-resume tx are stuck altogether.
> This needs to be added to the chagelog. And it looks like this is a fix for
>
> commit 413f0271f3966e0c73d4937963f19335af19e628
> Author: Jakub Kicinski <kuba@kernel.org>
> Date: Tue Jan 14 19:53:14 2025 -0800
>
> net: protect NAPI enablement with netdev_lock()
>
> ?
I'm not aware of this, will update the fix tags in the next patch.
> I wonder if it's simpler to just hold the netdev lock in resize or xsk
> binding instead of this.
That looks cleaner, let me try that approach.
Thanks,
Quang Minh
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] virtio-net: disable delayed refill when pausing rx
2025-04-08 14:37 ` Jakub Kicinski
@ 2025-04-09 6:55 ` Bui Quang Minh
0 siblings, 0 replies; 12+ messages in thread
From: Bui Quang Minh @ 2025-04-09 6:55 UTC (permalink / raw)
To: Jakub Kicinski, Paolo Abeni
Cc: Jason Wang, Xuan Zhuo, Michael S . Tsirkin, Andrew Lunn,
Eric Dumazet, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, virtualization
On 4/8/25 21:37, Jakub Kicinski wrote:
> On Tue, 8 Apr 2025 11:28:54 +0200 Paolo Abeni wrote:
>>>> When napi_disable is called on an already disabled napi, it will sleep
>>>> in napi_disable_locked while still holding the netdev_lock. As a result,
>>>> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
>>>> This leads to refill_work and the pause-then-resume tx are stuck altogether.
>>> This needs to be added to the chagelog. And it looks like this is a fix for
>>>
>>> commit 413f0271f3966e0c73d4937963f19335af19e628
>>> Author: Jakub Kicinski <kuba@kernel.org>
>>> Date: Tue Jan 14 19:53:14 2025 -0800
>>>
>>> net: protect NAPI enablement with netdev_lock()
>>>
>>> ?
>>>
>>> I wonder if it's simpler to just hold the netdev lock in resize or xsk
>>> binding instead of this.
>> Setting:
>>
>> dev->request_ops_lock = true;
>>
>> in virtnet_probe() before calling register_netdevice() should achieve
>> the above. Could you please have a try?
> Can we do that or do we need a more tailored fix? request_ops_lock only
> appeared in 6.15 and the bug AFAIU dates back to 6.14. We don't normally
> worry about given the stream of fixes - request_ops_lock is a bit risky.
> Jason's suggestion AFAIU is just to wrap the disable/enable pairs in
> the lock. We can try request_ops_lock in -next ?
>
> Bui Quang Minh, could you add a selftest for this problem?
> See tools/testing/selftests/drivers/net/virtio_net/
> You can re-use / extend the XSK helper from
> tools/testing/selftests/drivers/net/xdp_helper.c ?
> (move it to tools/testing/selftests/net/lib for easier access)
I've just tried the current selftests and found out that the queues.py
can trigger the deadlock.
Running this a few times (the probability is quite high) on the guest
with virtio-net interface
~ NETIF=enp0s2 ./ksft-net-drv/run_kselftest.sh -t drivers/net:queues.py
The test hangs.
I've got the hung task warning
[ 363.841549] #0: ffff88800015c758
((wq_completion)events){+.+.}-{0:0}, at: process_scheduled_works+0x1a9/0x3c9
[ 363.841560] #1: ffffc90000043e28
((work_completion)(&pool->work)){+.+.}-{0:0}, at:
process_scheduled_works+0x1c2/0x3c9
[ 363.841570] #2: ffffffff81f5a150 (rtnl_mutex){+.+.}-{4:4}, at:
rtnl_lock+0x1b/0x1d
[ 363.841579] #3: ffff8880035e49f0 (&dev->lock){+.+.}-{4:4}, at:
netdev_lock+0x12/0x14
[ 363.841587] 3 locks held by kworker/2:0/26:
[ 363.841589] #0: ffff88800015c758
((wq_completion)events){+.+.}-{0:0}, at: process_scheduled_works+0x1a9/0x3c9
[ 363.841598] #1: ffffc900000f7e28
((work_completion)(&(&vi->refill)->work)){+.+.}-{0:0}, at:
process_scheduled_works+0x1c2/0x3c9
[ 363.841608] #2: ffff8880035e49f0 (&dev->lock){+.+.}-{4:4}, at:
netdev_lock+0x12/0x14
~ cat /proc/7/stack
[<0>] netdev_lock+0x12/0x14
[<0>] napi_enable+0x1a/0x35
[<0>] virtnet_napi_do_enable+0x1a/0x40
[<0>] virtnet_napi_enable+0x32/0x4a
[<0>] virtnet_rx_resume+0x4f/0x56
[<0>] virtnet_rq_bind_xsk_pool+0xd8/0xfd
[<0>] virtnet_xdp+0x6d3/0x72d
[<0>] xp_disable_drv_zc+0x5a/0x63
[<0>] xp_clear_dev+0x1d/0x4a
[<0>] xp_release_deferred+0x24/0x75
[<0>] process_scheduled_works+0x23e/0x3c9
[<0>] worker_thread+0x13f/0x1ca
[<0>] kthread+0x1b2/0x1ba
[<0>] ret_from_fork+0x2b/0x40
[<0>] ret_from_fork_asm+0x11/0x20
~ cat /proc/26/stack
[<0>] napi_disable_locked+0x41/0xaf
[<0>] napi_disable+0x22/0x35
[<0>] refill_work+0x4b/0x8f
[<0>] process_scheduled_works+0x23e/0x3c9
[<0>] worker_thread+0x13f/0x1ca
[<0>] kthread+0x1b2/0x1ba
[<0>] ret_from_fork+0x2b/0x40
[<0>] ret_from_fork_asm+0x11/0x20
The deadlock was caused by a race between xsk unbind and refill_work.
Thanks,
Quang Minh.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] virtio-net: disable delayed refill when pausing rx
2025-04-09 6:44 ` Bui Quang Minh
@ 2025-04-10 7:02 ` Bui Quang Minh
2025-04-10 7:05 ` [PATCH] virtio-net: hold netdev_lock " Bui Quang Minh
0 siblings, 1 reply; 12+ messages in thread
From: Bui Quang Minh @ 2025-04-10 7:02 UTC (permalink / raw)
To: Jason Wang
Cc: Xuan Zhuo, Michael S . Tsirkin, Andrew Lunn, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, virtualization
On 4/9/25 13:44, Bui Quang Minh wrote:
> On 4/8/25 14:34, Jason Wang wrote:
>> On Mon, Apr 7, 2025 at 10:27 AM Bui Quang Minh
>> <minhquangbui99@gmail.com> wrote:
>>> On 4/7/25 08:03, Xuan Zhuo wrote:
>>>> On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh
>>>> <minhquangbui99@gmail.com> wrote:
>>>>> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
>>>>> napi_disable() on the receive queue's napi. In delayed
>>>>> refill_work, it
>>>>> also calls napi_disable() on the receive queue's napi. This can
>>>>> leads to
>>>>> deadlock when napi_disable() is called on an already disabled
>>>>> napi. This
>>>>> scenario can be reproducible by binding a XDP socket to virtio-net
>>>>> interface without setting up the fill ring. As a result,
>>>>> try_fill_recv
>>>>> will fail until the fill ring is set up and refill_work is scheduled.
>>>> So, what is the problem? The refill_work is waiting? As I know,
>>>> that thread
>>>> will sleep some time, so the cpu can do other work.
>>> When napi_disable is called on an already disabled napi, it will sleep
>>> in napi_disable_locked while still holding the netdev_lock. As a
>>> result,
>>> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
>>> This leads to refill_work and the pause-then-resume tx are stuck
>>> altogether.
>> This needs to be added to the chagelog. And it looks like this is a
>> fix for
>>
>> commit 413f0271f3966e0c73d4937963f19335af19e628
>> Author: Jakub Kicinski <kuba@kernel.org>
>> Date: Tue Jan 14 19:53:14 2025 -0800
>>
>> net: protect NAPI enablement with netdev_lock()
>>
>> ?
>
> I'm not aware of this, will update the fix tags in the next patch.
>
>> I wonder if it's simpler to just hold the netdev lock in resize or xsk
>> binding instead of this.
>
> That looks cleaner, let me try that approach.
We need to hold the netdev_lock in the refill work too. Otherwise, we
get this race
refill_work xdp bind
napi_disable()
netdev_lock(dev)
napi_disable_locked() <- hang
napi_enable() <- cannot acquire the lock
I don't like the idea to hold netdev_lock in refill_work. I will reply
with the patch for you to review.
Thanks,
Quang Minh.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH] virtio-net: hold netdev_lock when pausing rx
2025-04-10 7:02 ` Bui Quang Minh
@ 2025-04-10 7:05 ` Bui Quang Minh
2025-04-10 7:58 ` Michael S. Tsirkin
0 siblings, 1 reply; 12+ messages in thread
From: Bui Quang Minh @ 2025-04-10 7:05 UTC (permalink / raw)
To: Jason Wang
Cc: Xuan Zhuo, Michael S . Tsirkin, Andrew Lunn, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, virtualization
When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
napi_disable() on the receive queue's napi. In delayed refill_work, it
also calls napi_disable() on the receive queue's napi. When
napi_disable() is called on an already disabled napi, it will sleep in
napi_disable_locked while still holding the netdev_lock. As a result,
later napi_enable gets stuck too as it cannot acquire the netdev_lock.
This leads to refill_work and the pause-then-resume tx are stuck
altogether.
This scenario can be reproducible by binding a XDP socket to virtio-net
interface without setting up the fill ring. As a result, try_fill_recv
will fail until the fill ring is set up and refill_work is scheduled.
This commit makes the pausing rx path hold the netdev_lock until
resuming, prevent any napi_disable() to be called on a temporarily
disabled napi.
Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++---------------
1 file changed, 47 insertions(+), 27 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7e4617216a4b..74bd1065c586 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq)
}
static void virtnet_napi_do_enable(struct virtqueue *vq,
- struct napi_struct *napi)
+ struct napi_struct *napi,
+ bool netdev_locked)
{
- napi_enable(napi);
+ if (netdev_locked)
+ napi_enable_locked(napi);
+ else
+ napi_enable(napi);
/* If all buffers were filled by other side before we napi_enabled, we
* won't get another interrupt, so process any outstanding packets
now.
@@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct
virtqueue *vq,
local_bh_enable();
}
-static void virtnet_napi_enable(struct receive_queue *rq)
+static void virtnet_napi_enable(struct receive_queue *rq, bool
netdev_locked)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
int qidx = vq2rxq(rq->vq);
- virtnet_napi_do_enable(rq->vq, &rq->napi);
+ virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked);
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
}
-static void virtnet_napi_tx_enable(struct send_queue *sq)
+static void virtnet_napi_tx_enable(struct send_queue *sq, bool
netdev_locked)
{
struct virtnet_info *vi = sq->vq->vdev->priv;
struct napi_struct *napi = &sq->napi;
@@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct
send_queue *sq)
return;
}
- virtnet_napi_do_enable(sq->vq, napi);
+ virtnet_napi_do_enable(sq->vq, napi, netdev_locked);
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
}
-static void virtnet_napi_tx_disable(struct send_queue *sq)
+static void virtnet_napi_tx_disable(struct send_queue *sq, bool
netdev_locked)
{
struct virtnet_info *vi = sq->vq->vdev->priv;
struct napi_struct *napi = &sq->napi;
@@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct
send_queue *sq)
if (napi->weight) {
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
- napi_disable(napi);
+ if (netdev_locked)
+ napi_disable_locked(napi);
+ else
+ napi_disable(napi);
}
}
-static void virtnet_napi_disable(struct receive_queue *rq)
+static void virtnet_napi_disable(struct receive_queue *rq, bool
netdev_locked)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
struct napi_struct *napi = &rq->napi;
int qidx = vq2rxq(rq->vq);
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
- napi_disable(napi);
+ if (netdev_locked)
+ napi_disable_locked(napi);
+ else
+ napi_disable(napi);
}
static void refill_work(struct work_struct *work)
@@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work)
* instance lock)
* - check netif_running() and return early to avoid a race
*/
- napi_disable(&rq->napi);
+ netdev_lock(vi->dev);
+ napi_disable_locked(&rq->napi);
still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
- virtnet_napi_do_enable(rq->vq, &rq->napi);
+ virtnet_napi_do_enable(rq->vq, &rq->napi, true);
+ netdev_unlock(vi->dev);
/* In theory, this can happen: if we don't get any buffers in
* we will *never* try to fill again.
@@ -3074,8 +3086,8 @@ static int virtnet_poll(struct napi_struct *napi,
int budget)
static void virtnet_disable_queue_pair(struct virtnet_info *vi, int
qp_index)
{
- virtnet_napi_tx_disable(&vi->sq[qp_index]);
- virtnet_napi_disable(&vi->rq[qp_index]);
+ virtnet_napi_tx_disable(&vi->sq[qp_index], false);
+ virtnet_napi_disable(&vi->rq[qp_index], false);
xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
}
@@ -3094,8 +3106,8 @@ static int virtnet_enable_queue_pair(struct
virtnet_info *vi, int qp_index)
if (err < 0)
goto err_xdp_reg_mem_model;
- virtnet_napi_enable(&vi->rq[qp_index]);
- virtnet_napi_tx_enable(&vi->sq[qp_index]);
+ virtnet_napi_enable(&vi->rq[qp_index], false);
+ virtnet_napi_tx_enable(&vi->sq[qp_index], false);
return 0;
@@ -3347,7 +3359,8 @@ static void virtnet_rx_pause(struct virtnet_info
*vi, struct receive_queue *rq)
bool running = netif_running(vi->dev);
if (running) {
- virtnet_napi_disable(rq);
+ netdev_lock(vi->dev);
+ virtnet_napi_disable(rq, true);
virtnet_cancel_dim(vi, &rq->dim);
}
}
@@ -3359,8 +3372,10 @@ static void virtnet_rx_resume(struct virtnet_info
*vi, struct receive_queue *rq)
if (!try_fill_recv(vi, rq, GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
- if (running)
- virtnet_napi_enable(rq);
+ if (running) {
+ virtnet_napi_enable(rq, true);
+ netdev_unlock(vi->dev);
+ }
}
static int virtnet_rx_resize(struct virtnet_info *vi,
@@ -3389,7 +3404,7 @@ static void virtnet_tx_pause(struct virtnet_info
*vi, struct send_queue *sq)
qindex = sq - vi->sq;
if (running)
- virtnet_napi_tx_disable(sq);
+ virtnet_napi_tx_disable(sq, false);
txq = netdev_get_tx_queue(vi->dev, qindex);
@@ -3423,7 +3438,7 @@ static void virtnet_tx_resume(struct virtnet_info
*vi, struct send_queue *sq)
__netif_tx_unlock_bh(txq);
if (running)
- virtnet_napi_tx_enable(sq);
+ virtnet_napi_tx_enable(sq, false);
}
static int virtnet_tx_resize(struct virtnet_info *vi, struct
send_queue *sq,
@@ -5961,9 +5976,10 @@ static int virtnet_xdp_set(struct net_device
*dev, struct bpf_prog *prog,
/* Make sure NAPI is not using any XDP TX queues for RX. */
if (netif_running(dev)) {
+ netdev_lock(dev);
for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_disable(&vi->rq[i]);
- virtnet_napi_tx_disable(&vi->sq[i]);
+ virtnet_napi_disable(&vi->rq[i], true);
+ virtnet_napi_tx_disable(&vi->sq[i], true);
}
}
@@ -6000,11 +6016,14 @@ static int virtnet_xdp_set(struct net_device
*dev, struct bpf_prog *prog,
if (old_prog)
bpf_prog_put(old_prog);
if (netif_running(dev)) {
- virtnet_napi_enable(&vi->rq[i]);
- virtnet_napi_tx_enable(&vi->sq[i]);
+ virtnet_napi_enable(&vi->rq[i], true);
+ virtnet_napi_tx_enable(&vi->sq[i], true);
}
}
+ if (netif_running(dev))
+ netdev_unlock(dev);
+
return 0;
err:
@@ -6016,9 +6035,10 @@ static int virtnet_xdp_set(struct net_device
*dev, struct bpf_prog *prog,
if (netif_running(dev)) {
for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_enable(&vi->rq[i]);
- virtnet_napi_tx_enable(&vi->sq[i]);
+ virtnet_napi_enable(&vi->rq[i], true);
+ virtnet_napi_tx_enable(&vi->sq[i], true);
}
+ netdev_unlock(dev);
}
if (prog)
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] virtio-net: hold netdev_lock when pausing rx
2025-04-10 7:05 ` [PATCH] virtio-net: hold netdev_lock " Bui Quang Minh
@ 2025-04-10 7:58 ` Michael S. Tsirkin
2025-04-10 8:09 ` Bui Quang Minh
0 siblings, 1 reply; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-04-10 7:58 UTC (permalink / raw)
To: Bui Quang Minh
Cc: Jason Wang, Xuan Zhuo, Andrew Lunn, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, virtualization
On Thu, Apr 10, 2025 at 02:05:57PM +0700, Bui Quang Minh wrote:
> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
> napi_disable() on the receive queue's napi. In delayed refill_work, it
> also calls napi_disable() on the receive queue's napi. When
> napi_disable() is called on an already disabled napi, it will sleep in
> napi_disable_locked while still holding the netdev_lock. As a result,
> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
> This leads to refill_work and the pause-then-resume tx are stuck
> altogether.
>
> This scenario can be reproducible by binding a XDP socket to virtio-net
> interface without setting up the fill ring. As a result, try_fill_recv
> will fail until the fill ring is set up and refill_work is scheduled.
>
> This commit makes the pausing rx path hold the netdev_lock until
> resuming, prevent any napi_disable() to be called on a temporarily
> disabled napi.
>
> Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> ---
> drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++---------------
> 1 file changed, 47 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7e4617216a4b..74bd1065c586 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq)
> }
>
> static void virtnet_napi_do_enable(struct virtqueue *vq,
> - struct napi_struct *napi)
> + struct napi_struct *napi,
> + bool netdev_locked)
> {
> - napi_enable(napi);
> + if (netdev_locked)
> + napi_enable_locked(napi);
> + else
> + napi_enable(napi);
>
> /* If all buffers were filled by other side before we napi_enabled, we
> * won't get another interrupt, so process any outstanding packets now.
> @@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct virtqueue
> *vq,
Your patch is line-wrapped, unfortunately. Here and elsewhere.
> local_bh_enable();
> }
>
> -static void virtnet_napi_enable(struct receive_queue *rq)
> +static void virtnet_napi_enable(struct receive_queue *rq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = rq->vq->vdev->priv;
> int qidx = vq2rxq(rq->vq);
>
> - virtnet_napi_do_enable(rq->vq, &rq->napi);
> + virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked);
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
> }
>
> -static void virtnet_napi_tx_enable(struct send_queue *sq)
> +static void virtnet_napi_tx_enable(struct send_queue *sq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = sq->vq->vdev->priv;
> struct napi_struct *napi = &sq->napi;
> @@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct send_queue
> *sq)
> return;
> }
>
> - virtnet_napi_do_enable(sq->vq, napi);
> + virtnet_napi_do_enable(sq->vq, napi, netdev_locked);
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
> }
>
> -static void virtnet_napi_tx_disable(struct send_queue *sq)
> +static void virtnet_napi_tx_disable(struct send_queue *sq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = sq->vq->vdev->priv;
> struct napi_struct *napi = &sq->napi;
> @@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct
> send_queue *sq)
>
> if (napi->weight) {
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
> - napi_disable(napi);
> + if (netdev_locked)
> + napi_disable_locked(napi);
> + else
> + napi_disable(napi);
> }
> }
>
> -static void virtnet_napi_disable(struct receive_queue *rq)
> +static void virtnet_napi_disable(struct receive_queue *rq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = rq->vq->vdev->priv;
> struct napi_struct *napi = &rq->napi;
> int qidx = vq2rxq(rq->vq);
>
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
> - napi_disable(napi);
> + if (netdev_locked)
> + napi_disable_locked(napi);
> + else
> + napi_disable(napi);
> }
>
> static void refill_work(struct work_struct *work)
> @@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work)
> * instance lock)
> * - check netif_running() and return early to avoid a race
> */
> - napi_disable(&rq->napi);
> + netdev_lock(vi->dev);
> + napi_disable_locked(&rq->napi);
> still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
This does mean netdev_lock is held potentially for a long while,
while try_fill_recv and processing inside virtnet_napi_do_enable
finish. Better ideas?
> - virtnet_napi_do_enable(rq->vq, &rq->napi);
> + virtnet_napi_do_enable(rq->vq, &rq->napi, true);
> + netdev_unlock(vi->dev);
>
> /* In theory, this can happen: if we don't get any buffers in
> * we will *never* try to fill again.
> @@ -3074,8 +3086,8 @@ static int virtnet_poll(struct napi_struct *napi, int
> budget)
>
> static void virtnet_disable_queue_pair(struct virtnet_info *vi, int
> qp_index)
> {
> - virtnet_napi_tx_disable(&vi->sq[qp_index]);
> - virtnet_napi_disable(&vi->rq[qp_index]);
> + virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> + virtnet_napi_disable(&vi->rq[qp_index], false);
> xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
> }
>
> @@ -3094,8 +3106,8 @@ static int virtnet_enable_queue_pair(struct
> virtnet_info *vi, int qp_index)
> if (err < 0)
> goto err_xdp_reg_mem_model;
>
> - virtnet_napi_enable(&vi->rq[qp_index]);
> - virtnet_napi_tx_enable(&vi->sq[qp_index]);
> + virtnet_napi_enable(&vi->rq[qp_index], false);
> + virtnet_napi_tx_enable(&vi->sq[qp_index], false);
>
> return 0;
>
> @@ -3347,7 +3359,8 @@ static void virtnet_rx_pause(struct virtnet_info *vi,
> struct receive_queue *rq)
> bool running = netif_running(vi->dev);
>
> if (running) {
> - virtnet_napi_disable(rq);
> + netdev_lock(vi->dev);
> + virtnet_napi_disable(rq, true);
> virtnet_cancel_dim(vi, &rq->dim);
> }
> }
> @@ -3359,8 +3372,10 @@ static void virtnet_rx_resume(struct virtnet_info
> *vi, struct receive_queue *rq)
> if (!try_fill_recv(vi, rq, GFP_KERNEL))
> schedule_delayed_work(&vi->refill, 0);
>
> - if (running)
> - virtnet_napi_enable(rq);
> + if (running) {
> + virtnet_napi_enable(rq, true);
> + netdev_unlock(vi->dev);
> + }
> }
>
> static int virtnet_rx_resize(struct virtnet_info *vi,
> @@ -3389,7 +3404,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi,
> struct send_queue *sq)
> qindex = sq - vi->sq;
>
> if (running)
> - virtnet_napi_tx_disable(sq);
> + virtnet_napi_tx_disable(sq, false);
>
> txq = netdev_get_tx_queue(vi->dev, qindex);
>
> @@ -3423,7 +3438,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi,
> struct send_queue *sq)
> __netif_tx_unlock_bh(txq);
>
> if (running)
> - virtnet_napi_tx_enable(sq);
> + virtnet_napi_tx_enable(sq, false);
> }
>
> static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue
> *sq,
> @@ -5961,9 +5976,10 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
>
> /* Make sure NAPI is not using any XDP TX queues for RX. */
> if (netif_running(dev)) {
> + netdev_lock(dev);
> for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_disable(&vi->rq[i]);
> - virtnet_napi_tx_disable(&vi->sq[i]);
> + virtnet_napi_disable(&vi->rq[i], true);
> + virtnet_napi_tx_disable(&vi->sq[i], true);
> }
> }
>
> @@ -6000,11 +6016,14 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
> if (old_prog)
> bpf_prog_put(old_prog);
> if (netif_running(dev)) {
> - virtnet_napi_enable(&vi->rq[i]);
> - virtnet_napi_tx_enable(&vi->sq[i]);
> + virtnet_napi_enable(&vi->rq[i], true);
> + virtnet_napi_tx_enable(&vi->sq[i], true);
> }
> }
>
> + if (netif_running(dev))
> + netdev_unlock(dev);
> +
> return 0;
>
> err:
> @@ -6016,9 +6035,10 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
>
> if (netif_running(dev)) {
> for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_enable(&vi->rq[i]);
> - virtnet_napi_tx_enable(&vi->sq[i]);
> + virtnet_napi_enable(&vi->rq[i], true);
> + virtnet_napi_tx_enable(&vi->sq[i], true);
> }
> + netdev_unlock(dev);
> }
> if (prog)
> bpf_prog_sub(prog, vi->max_queue_pairs - 1);
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] virtio-net: hold netdev_lock when pausing rx
2025-04-10 7:58 ` Michael S. Tsirkin
@ 2025-04-10 8:09 ` Bui Quang Minh
0 siblings, 0 replies; 12+ messages in thread
From: Bui Quang Minh @ 2025-04-10 8:09 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Jason Wang, Xuan Zhuo, Andrew Lunn, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Eugenio Pérez,
David S . Miller, netdev, linux-kernel, bpf, virtualization
On 4/10/25 14:58, Michael S. Tsirkin wrote:
> On Thu, Apr 10, 2025 at 02:05:57PM +0700, Bui Quang Minh wrote:
>> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
>> napi_disable() on the receive queue's napi. In delayed refill_work, it
>> also calls napi_disable() on the receive queue's napi. When
>> napi_disable() is called on an already disabled napi, it will sleep in
>> napi_disable_locked while still holding the netdev_lock. As a result,
>> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
>> This leads to refill_work and the pause-then-resume tx are stuck
>> altogether.
>>
>> This scenario can be reproducible by binding a XDP socket to virtio-net
>> interface without setting up the fill ring. As a result, try_fill_recv
>> will fail until the fill ring is set up and refill_work is scheduled.
>>
>> This commit makes the pausing rx path hold the netdev_lock until
>> resuming, prevent any napi_disable() to be called on a temporarily
>> disabled napi.
>>
>> Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
>> ---
>> drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++---------------
>> 1 file changed, 47 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 7e4617216a4b..74bd1065c586 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq)
>> }
>>
>> static void virtnet_napi_do_enable(struct virtqueue *vq,
>> - struct napi_struct *napi)
>> + struct napi_struct *napi,
>> + bool netdev_locked)
>> {
>> - napi_enable(napi);
>> + if (netdev_locked)
>> + napi_enable_locked(napi);
>> + else
>> + napi_enable(napi);
>>
>> /* If all buffers were filled by other side before we napi_enabled, we
>> * won't get another interrupt, so process any outstanding packets now.
>> @@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct virtqueue
>> *vq,
>
>
>
> Your patch is line-wrapped, unfortunately. Here and elsewhere.
>
>
>
>
>> local_bh_enable();
>> }
>>
>> -static void virtnet_napi_enable(struct receive_queue *rq)
>> +static void virtnet_napi_enable(struct receive_queue *rq, bool
>> netdev_locked)
>> {
>> struct virtnet_info *vi = rq->vq->vdev->priv;
>> int qidx = vq2rxq(rq->vq);
>>
>> - virtnet_napi_do_enable(rq->vq, &rq->napi);
>> + virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked);
>> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
>> }
>>
>> -static void virtnet_napi_tx_enable(struct send_queue *sq)
>> +static void virtnet_napi_tx_enable(struct send_queue *sq, bool
>> netdev_locked)
>> {
>> struct virtnet_info *vi = sq->vq->vdev->priv;
>> struct napi_struct *napi = &sq->napi;
>> @@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct send_queue
>> *sq)
>> return;
>> }
>>
>> - virtnet_napi_do_enable(sq->vq, napi);
>> + virtnet_napi_do_enable(sq->vq, napi, netdev_locked);
>> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
>> }
>>
>> -static void virtnet_napi_tx_disable(struct send_queue *sq)
>> +static void virtnet_napi_tx_disable(struct send_queue *sq, bool
>> netdev_locked)
>> {
>> struct virtnet_info *vi = sq->vq->vdev->priv;
>> struct napi_struct *napi = &sq->napi;
>> @@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct
>> send_queue *sq)
>>
>> if (napi->weight) {
>> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
>> - napi_disable(napi);
>> + if (netdev_locked)
>> + napi_disable_locked(napi);
>> + else
>> + napi_disable(napi);
>> }
>> }
>>
>> -static void virtnet_napi_disable(struct receive_queue *rq)
>> +static void virtnet_napi_disable(struct receive_queue *rq, bool
>> netdev_locked)
>> {
>> struct virtnet_info *vi = rq->vq->vdev->priv;
>> struct napi_struct *napi = &rq->napi;
>> int qidx = vq2rxq(rq->vq);
>>
>> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
>> - napi_disable(napi);
>> + if (netdev_locked)
>> + napi_disable_locked(napi);
>> + else
>> + napi_disable(napi);
>> }
>>
>> static void refill_work(struct work_struct *work)
>> @@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work)
>> * instance lock)
>> * - check netif_running() and return early to avoid a race
>> */
>> - napi_disable(&rq->napi);
>> + netdev_lock(vi->dev);
>> + napi_disable_locked(&rq->napi);
>> still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
>
> This does mean netdev_lock is held potentially for a long while,
> while try_fill_recv and processing inside virtnet_napi_do_enable
> finish. Better ideas?
I prefer the first patch in this thread where we disable delayed refill
and cancel all inflight refill_work before pausing rx.
Thanks,
Quang Minh.
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2025-04-10 8:09 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-04 9:39 [PATCH] virtio-net: disable delayed refill when pausing rx Bui Quang Minh
2025-04-07 1:03 ` Xuan Zhuo
2025-04-07 2:27 ` Bui Quang Minh
2025-04-08 7:34 ` Jason Wang
2025-04-08 9:28 ` Paolo Abeni
2025-04-08 14:37 ` Jakub Kicinski
2025-04-09 6:55 ` Bui Quang Minh
2025-04-09 6:44 ` Bui Quang Minh
2025-04-10 7:02 ` Bui Quang Minh
2025-04-10 7:05 ` [PATCH] virtio-net: hold netdev_lock " Bui Quang Minh
2025-04-10 7:58 ` Michael S. Tsirkin
2025-04-10 8:09 ` Bui Quang Minh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).