All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Bui Quang Minh <minhquangbui99@gmail.com>
Cc: "Jason Wang" <jasowang@redhat.com>,
	"Xuan Zhuo" <xuanzhuo@linux.alibaba.com>,
	"Andrew Lunn" <andrew+netdev@lunn.ch>,
	"Eric Dumazet" <edumazet@google.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Alexei Starovoitov" <ast@kernel.org>,
	"Daniel Borkmann" <daniel@iogearbox.net>,
	"Jesper Dangaard Brouer" <hawk@kernel.org>,
	"John Fastabend" <john.fastabend@gmail.com>,
	"Eugenio Pérez" <eperezma@redhat.com>,
	"David S . Miller" <davem@davemloft.net>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	bpf@vger.kernel.org, virtualization@lists.linux.dev
Subject: Re: [PATCH] virtio-net: hold netdev_lock when pausing rx
Date: Thu, 10 Apr 2025 03:58:54 -0400	[thread overview]
Message-ID: <20250410035158-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <4d3a1478-b6fc-47a3-8d77-7eca6a973a06@gmail.com>

On Thu, Apr 10, 2025 at 02:05:57PM +0700, Bui Quang Minh wrote:
> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
> napi_disable() on the receive queue's napi. In delayed refill_work, it
> also calls napi_disable() on the receive queue's napi. When
> napi_disable() is called on an already disabled napi, it will sleep in
> napi_disable_locked while still holding the netdev_lock. As a result,
> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
> This leads to refill_work and the pause-then-resume tx are stuck
> altogether.
> 
> This scenario can be reproducible by binding a XDP socket to virtio-net
> interface without setting up the fill ring. As a result, try_fill_recv
> will fail until the fill ring is set up and refill_work is scheduled.
> 
> This commit makes the pausing rx path hold the netdev_lock until
> resuming, prevent any napi_disable() to be called on a temporarily
> disabled napi.
> 
> Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> ---
>  drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++---------------
>  1 file changed, 47 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7e4617216a4b..74bd1065c586 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq)
>  }
> 
>  static void virtnet_napi_do_enable(struct virtqueue *vq,
> -                   struct napi_struct *napi)
> +                   struct napi_struct *napi,
> +                   bool netdev_locked)
>  {
> -    napi_enable(napi);
> +    if (netdev_locked)
> +        napi_enable_locked(napi);
> +    else
> +        napi_enable(napi);
> 
>      /* If all buffers were filled by other side before we napi_enabled, we
>       * won't get another interrupt, so process any outstanding packets now.
> @@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct virtqueue
> *vq,




Your patch is line-wrapped, unfortunately. Here and elsewhere.




>      local_bh_enable();
>  }
> 
> -static void virtnet_napi_enable(struct receive_queue *rq)
> +static void virtnet_napi_enable(struct receive_queue *rq, bool
> netdev_locked)
>  {
>      struct virtnet_info *vi = rq->vq->vdev->priv;
>      int qidx = vq2rxq(rq->vq);
> 
> -    virtnet_napi_do_enable(rq->vq, &rq->napi);
> +    virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked);
>      netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
>  }
> 
> -static void virtnet_napi_tx_enable(struct send_queue *sq)
> +static void virtnet_napi_tx_enable(struct send_queue *sq, bool
> netdev_locked)
>  {
>      struct virtnet_info *vi = sq->vq->vdev->priv;
>      struct napi_struct *napi = &sq->napi;
> @@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct send_queue
> *sq)
>          return;
>      }
> 
> -    virtnet_napi_do_enable(sq->vq, napi);
> +    virtnet_napi_do_enable(sq->vq, napi, netdev_locked);
>      netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
>  }
> 
> -static void virtnet_napi_tx_disable(struct send_queue *sq)
> +static void virtnet_napi_tx_disable(struct send_queue *sq, bool
> netdev_locked)
>  {
>      struct virtnet_info *vi = sq->vq->vdev->priv;
>      struct napi_struct *napi = &sq->napi;
> @@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct
> send_queue *sq)
> 
>      if (napi->weight) {
>          netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
> -        napi_disable(napi);
> +        if (netdev_locked)
> +            napi_disable_locked(napi);
> +        else
> +            napi_disable(napi);
>      }
>  }
> 
> -static void virtnet_napi_disable(struct receive_queue *rq)
> +static void virtnet_napi_disable(struct receive_queue *rq, bool
> netdev_locked)
>  {
>      struct virtnet_info *vi = rq->vq->vdev->priv;
>      struct napi_struct *napi = &rq->napi;
>      int qidx = vq2rxq(rq->vq);
> 
>      netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
> -    napi_disable(napi);
> +    if (netdev_locked)
> +        napi_disable_locked(napi);
> +    else
> +        napi_disable(napi);
>  }
> 
>  static void refill_work(struct work_struct *work)
> @@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work)
>           *     instance lock)
>           *   - check netif_running() and return early to avoid a race
>           */
> -        napi_disable(&rq->napi);
> +        netdev_lock(vi->dev);
> +        napi_disable_locked(&rq->napi);
>          still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);


This does mean netdev_lock is held potentially for a long while,
while try_fill_recv and processing inside virtnet_napi_do_enable
finish. Better ideas?

> -        virtnet_napi_do_enable(rq->vq, &rq->napi);
> +        virtnet_napi_do_enable(rq->vq, &rq->napi, true);
> +        netdev_unlock(vi->dev);
> 
>          /* In theory, this can happen: if we don't get any buffers in
>           * we will *never* try to fill again.
> @@ -3074,8 +3086,8 @@ static int virtnet_poll(struct napi_struct *napi, int
> budget)
> 
>  static void virtnet_disable_queue_pair(struct virtnet_info *vi, int
> qp_index)
>  {
> -    virtnet_napi_tx_disable(&vi->sq[qp_index]);
> -    virtnet_napi_disable(&vi->rq[qp_index]);
> +    virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> +    virtnet_napi_disable(&vi->rq[qp_index], false);
>      xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
>  }
> 
> @@ -3094,8 +3106,8 @@ static int virtnet_enable_queue_pair(struct
> virtnet_info *vi, int qp_index)
>      if (err < 0)
>          goto err_xdp_reg_mem_model;
> 
> -    virtnet_napi_enable(&vi->rq[qp_index]);
> -    virtnet_napi_tx_enable(&vi->sq[qp_index]);
> +    virtnet_napi_enable(&vi->rq[qp_index], false);
> +    virtnet_napi_tx_enable(&vi->sq[qp_index], false);
> 
>      return 0;
> 
> @@ -3347,7 +3359,8 @@ static void virtnet_rx_pause(struct virtnet_info *vi,
> struct receive_queue *rq)
>      bool running = netif_running(vi->dev);
> 
>      if (running) {
> -        virtnet_napi_disable(rq);
> +        netdev_lock(vi->dev);
> +        virtnet_napi_disable(rq, true);
>          virtnet_cancel_dim(vi, &rq->dim);
>      }
>  }
> @@ -3359,8 +3372,10 @@ static void virtnet_rx_resume(struct virtnet_info
> *vi, struct receive_queue *rq)
>      if (!try_fill_recv(vi, rq, GFP_KERNEL))
>          schedule_delayed_work(&vi->refill, 0);
> 
> -    if (running)
> -        virtnet_napi_enable(rq);
> +    if (running) {
> +        virtnet_napi_enable(rq, true);
> +        netdev_unlock(vi->dev);
> +    }
>  }
> 
>  static int virtnet_rx_resize(struct virtnet_info *vi,
> @@ -3389,7 +3404,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi,
> struct send_queue *sq)
>      qindex = sq - vi->sq;
> 
>      if (running)
> -        virtnet_napi_tx_disable(sq);
> +        virtnet_napi_tx_disable(sq, false);
> 
>      txq = netdev_get_tx_queue(vi->dev, qindex);
> 
> @@ -3423,7 +3438,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi,
> struct send_queue *sq)
>      __netif_tx_unlock_bh(txq);
> 
>      if (running)
> -        virtnet_napi_tx_enable(sq);
> +        virtnet_napi_tx_enable(sq, false);
>  }
> 
>  static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue
> *sq,
> @@ -5961,9 +5976,10 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
> 
>      /* Make sure NAPI is not using any XDP TX queues for RX. */
>      if (netif_running(dev)) {
> +        netdev_lock(dev);
>          for (i = 0; i < vi->max_queue_pairs; i++) {
> -            virtnet_napi_disable(&vi->rq[i]);
> -            virtnet_napi_tx_disable(&vi->sq[i]);
> +            virtnet_napi_disable(&vi->rq[i], true);
> +            virtnet_napi_tx_disable(&vi->sq[i], true);
>          }
>      }
> 
> @@ -6000,11 +6016,14 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
>          if (old_prog)
>              bpf_prog_put(old_prog);
>          if (netif_running(dev)) {
> -            virtnet_napi_enable(&vi->rq[i]);
> -            virtnet_napi_tx_enable(&vi->sq[i]);
> +            virtnet_napi_enable(&vi->rq[i], true);
> +            virtnet_napi_tx_enable(&vi->sq[i], true);
>          }
>      }
> 
> +    if (netif_running(dev))
> +        netdev_unlock(dev);
> +
>      return 0;
> 
>  err:
> @@ -6016,9 +6035,10 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
> 
>      if (netif_running(dev)) {
>          for (i = 0; i < vi->max_queue_pairs; i++) {
> -            virtnet_napi_enable(&vi->rq[i]);
> -            virtnet_napi_tx_enable(&vi->sq[i]);
> +            virtnet_napi_enable(&vi->rq[i], true);
> +            virtnet_napi_tx_enable(&vi->sq[i], true);
>          }
> +        netdev_unlock(dev);
>      }
>      if (prog)
>          bpf_prog_sub(prog, vi->max_queue_pairs - 1);
> -- 
> 2.43.0
> 


  reply	other threads:[~2025-04-10  7:59 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-04  9:39 [PATCH] virtio-net: disable delayed refill when pausing rx Bui Quang Minh
2025-04-07  1:03 ` Xuan Zhuo
2025-04-07  2:27   ` Bui Quang Minh
2025-04-08  7:34     ` Jason Wang
2025-04-08  9:28       ` Paolo Abeni
2025-04-08 14:37         ` Jakub Kicinski
2025-04-09  6:55           ` Bui Quang Minh
2025-04-09  6:44       ` Bui Quang Minh
2025-04-10  7:02         ` Bui Quang Minh
2025-04-10  7:05           ` [PATCH] virtio-net: hold netdev_lock " Bui Quang Minh
2025-04-10  7:58             ` Michael S. Tsirkin [this message]
2025-04-10  8:09               ` Bui Quang Minh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250410035158-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=eperezma@redhat.com \
    --cc=hawk@kernel.org \
    --cc=jasowang@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=minhquangbui99@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=virtualization@lists.linux.dev \
    --cc=xuanzhuo@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.