netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC] virtio_net: fix refill related races
@ 2011-12-07 15:21 Michael S. Tsirkin
  2011-12-08  4:37 ` Rusty Russell
  0 siblings, 1 reply; 15+ messages in thread
From: Michael S. Tsirkin @ 2011-12-07 15:21 UTC (permalink / raw)
  To: Amit Shah; +Cc: netdev, virtualization, linux-kernel, Michael S. Tsirkin

Fix theoretical races related to refill work:
1. After napi is disabled by ndo_stop, refill work
   can run and re-enable it.
2. Refill can reschedule itself, if this happens
   it can run after cancel_delayed_work_sync,
   and will access device after it is destroyed.

As a solution, add flags to track napi state and
to disable refill, and toggle them on start, stop
and remove; check these flags on refill.

refill work structure and new fields aren't used
on data path, so put them together near the end of
struct virtnet_info.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

RFC only since it's untested at this point.
A bugfix so 3.2 material?
Comments?


 drivers/net/virtio_net.c |   57 +++++++++++++++++++++++++++++++++++++---------
 1 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f5b3f19..39eb24c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -21,6 +21,7 @@
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/virtio.h>
 #include <linux/virtio_net.h>
 #include <linux/scatterlist.h>
@@ -68,15 +69,24 @@ struct virtnet_info {
 	/* Active statistics */
 	struct virtnet_stats __percpu *stats;
 
-	/* Work struct for refilling if we run low on memory. */
-	struct delayed_work refill;
-
 	/* Chain pages by the private ptr. */
 	struct page *pages;
 
 	/* fragments + linear part + virtio header */
 	struct scatterlist rx_sg[MAX_SKB_FRAGS + 2];
 	struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
+
+	/* Work struct for refilling if we run low on memory. */
+	struct delayed_work refill;
+
+	/* Set flag to allow delayed refill work, protected by a refill_lock. */
+	bool refill_enable;
+
+	/* Whether napi is enabled, protected by a refill_lock. */
+	bool napi_enable;
+
+	/* Lock to protect refill and napi enable/disable operations. */
+	struct mutex refill_lock;
 };
 
 struct skb_vnet_hdr {
@@ -477,20 +487,35 @@ static void virtnet_napi_enable(struct virtnet_info *vi)
 	}
 }
 
+static void virtnet_refill_enable(struct virtnet_info *vi, bool enable)
+{
+	mutex_lock(&vi->refill_lock);
+	vi->refill_enable = enable;
+	mutex_unlock(&vi->refill_lock);
+}
+
 static void refill_work(struct work_struct *work)
 {
 	struct virtnet_info *vi;
 	bool still_empty;
 
 	vi = container_of(work, struct virtnet_info, refill.work);
-	napi_disable(&vi->napi);
-	still_empty = !try_fill_recv(vi, GFP_KERNEL);
-	virtnet_napi_enable(vi);
 
-	/* In theory, this can happen: if we don't get any buffers in
-	 * we will *never* try to fill again. */
-	if (still_empty)
-		schedule_delayed_work(&vi->refill, HZ/2);
+	mutex_lock(&vi->refill_lock);
+	if (vi->refill_enable) {
+		if (vi->napi_enable)
+			napi_disable(&vi->napi);
+		still_empty = !try_fill_recv(vi, GFP_KERNEL);
+		if (vi->napi_enable)
+			virtnet_napi_enable(vi);
+
+		/* In theory, this can happen: if we don't get any buffers in
+		 * we will *never* try to fill again. */
+		if (still_empty)
+			schedule_delayed_work(&vi->refill, HZ/2);
+
+	}
+	mutex_unlock(&vi->refill_lock);
 }
 
 static int virtnet_poll(struct napi_struct *napi, int budget)
@@ -708,7 +733,10 @@ static int virtnet_open(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
+	mutex_lock(&vi->refill_lock);
+	vi->napi_enable = true;
 	virtnet_napi_enable(vi);
+	mutex_unlock(&vi->refill_lock);
 	return 0;
 }
 
@@ -761,7 +789,10 @@ static int virtnet_close(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
+	mutex_lock(&vi->refill_lock);
+	vi->napi_enable = false;
 	napi_disable(&vi->napi);
+	mutex_unlock(&vi->refill_lock);
 
 	return 0;
 }
@@ -1010,6 +1041,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (vi->stats == NULL)
 		goto free;
 
+	mutex_init(&vi->refill_lock);
 	INIT_DELAYED_WORK(&vi->refill, refill_work);
 	sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
 	sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));
@@ -1047,6 +1079,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 		goto free_vqs;
 	}
 
+	virtnet_refill_enable(vi, true);
+
 	/* Last of all, set up some receive buffers. */
 	try_fill_recv(vi, GFP_KERNEL);
 
@@ -1107,10 +1141,11 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
 
+	virtnet_refill_enable(vi, false);
+
 	/* Stop all the virtqueues. */
 	vdev->config->reset(vdev);
 
-
 	unregister_netdev(vi->dev);
 	cancel_delayed_work_sync(&vi->refill);
 
-- 
1.7.5.53.gc233e

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-07 15:21 [PATCH RFC] virtio_net: fix refill related races Michael S. Tsirkin
@ 2011-12-08  4:37 ` Rusty Russell
  2011-12-11 14:44   ` Michael S. Tsirkin
  0 siblings, 1 reply; 15+ messages in thread
From: Rusty Russell @ 2011-12-08  4:37 UTC (permalink / raw)
  To: Michael S. Tsirkin, Amit Shah
  Cc: netdev, virtualization, linux-kernel, Michael S. Tsirkin

On Wed, 7 Dec 2011 17:21:22 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> Fix theoretical races related to refill work:
> 1. After napi is disabled by ndo_stop, refill work
>    can run and re-enable it.
> 2. Refill can reschedule itself, if this happens
>    it can run after cancel_delayed_work_sync,
>    and will access device after it is destroyed.
> 
> As a solution, add flags to track napi state and
> to disable refill, and toggle them on start, stop
> and remove; check these flags on refill.

Why isn't a "dont-readd" flag sufficient?

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-08  4:37 ` Rusty Russell
@ 2011-12-11 14:44   ` Michael S. Tsirkin
  2011-12-11 22:55     ` Rusty Russell
  0 siblings, 1 reply; 15+ messages in thread
From: Michael S. Tsirkin @ 2011-12-11 14:44 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Amit Shah, netdev, linux-kernel, virtualization

On Thu, Dec 08, 2011 at 03:07:29PM +1030, Rusty Russell wrote:
> On Wed, 7 Dec 2011 17:21:22 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > Fix theoretical races related to refill work:
> > 1. After napi is disabled by ndo_stop, refill work
> >    can run and re-enable it.
> > 2. Refill can reschedule itself, if this happens
> >    it can run after cancel_delayed_work_sync,
> >    and will access device after it is destroyed.
> > 
> > As a solution, add flags to track napi state and
> > to disable refill, and toggle them on start, stop
> > and remove; check these flags on refill.
> 
> Why isn't a "dont-readd" flag sufficient?
> 
> Cheers,
> Rusty.

I started with that, but here's the problem I wanted to
address:

- we run out of descriptors and schedule refill work
- ndo_close runs
- refill work runs
- ndo_open runs


Now if we just disable refill, refill work will not add buffers and will
not reschedule.  Now we'll never get more buffers.
We can try starting refill work from ndo_open but overall
this seems to me more risky than just splitting flags.


-- 
MST

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-11 14:44   ` Michael S. Tsirkin
@ 2011-12-11 22:55     ` Rusty Russell
  2011-12-12 11:54       ` Michael S. Tsirkin
  0 siblings, 1 reply; 15+ messages in thread
From: Rusty Russell @ 2011-12-11 22:55 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Amit Shah, netdev, linux-kernel, virtualization

On Sun, 11 Dec 2011 16:44:29 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Thu, Dec 08, 2011 at 03:07:29PM +1030, Rusty Russell wrote:
> > On Wed, 7 Dec 2011 17:21:22 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > Fix theoretical races related to refill work:
> > > 1. After napi is disabled by ndo_stop, refill work
> > >    can run and re-enable it.
> > > 2. Refill can reschedule itself, if this happens
> > >    it can run after cancel_delayed_work_sync,
> > >    and will access device after it is destroyed.
> > > 
> > > As a solution, add flags to track napi state and
> > > to disable refill, and toggle them on start, stop
> > > and remove; check these flags on refill.
> > 
> > Why isn't a "dont-readd" flag sufficient?
> > 
> > Cheers,
> > Rusty.
> 
> I started with that, but here's the problem I wanted to
> address:
> 
> - we run out of descriptors and schedule refill work
> - ndo_close runs
> - refill work runs
> - ndo_open runs

(s/ndo_close/ndo_stop/)

You don't think we should do any refills on a closed device?  If so, we
simply move the refill-stop code into ndo_stop (aka virtnet_close), and
the refill-start code into ndo_open (aka. virtnet_open).  Right?

Orthogonally, the refill-stop code is still buggy, as you noted.  And
for self-rearming timers the pattern I've always used is a flag.

Or am I being obtuse again? :)
Rusty.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-11 22:55     ` Rusty Russell
@ 2011-12-12 11:54       ` Michael S. Tsirkin
  2011-12-13  2:35         ` Rusty Russell
  0 siblings, 1 reply; 15+ messages in thread
From: Michael S. Tsirkin @ 2011-12-12 11:54 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Amit Shah, netdev, linux-kernel, virtualization

On Mon, Dec 12, 2011 at 09:25:07AM +1030, Rusty Russell wrote:
> On Sun, 11 Dec 2011 16:44:29 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Thu, Dec 08, 2011 at 03:07:29PM +1030, Rusty Russell wrote:
> > > On Wed, 7 Dec 2011 17:21:22 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > Fix theoretical races related to refill work:
> > > > 1. After napi is disabled by ndo_stop, refill work
> > > >    can run and re-enable it.
> > > > 2. Refill can reschedule itself, if this happens
> > > >    it can run after cancel_delayed_work_sync,
> > > >    and will access device after it is destroyed.
> > > > 
> > > > As a solution, add flags to track napi state and
> > > > to disable refill, and toggle them on start, stop
> > > > and remove; check these flags on refill.
> > > 
> > > Why isn't a "dont-readd" flag sufficient?
> > > 
> > > Cheers,
> > > Rusty.
> > 
> > I started with that, but here's the problem I wanted to
> > address:
> > 
> > - we run out of descriptors and schedule refill work
> > - ndo_close runs
> > - refill work runs
> > - ndo_open runs
> 
> (s/ndo_close/ndo_stop/)
> 
> You don't think we should do any refills on a closed device?  If so, we
> simply move the refill-stop code into ndo_stop (aka virtnet_close), and
> the refill-start code into ndo_open (aka. virtnet_open).  Right?

We can do that, yes. We'll have to also cancel work if outstanding.
It seems a bigger change though. I'm especially concerned
about putting refill-start in virtnet_open, suddenly
it's always running where it used to only run after
we have consumed some buffers. I'm just concerned about
doing big changes that late in release cycle but maybe
that's OK.

> Orthogonally, the refill-stop code is still buggy, as you noted.

Sorry I don't understand how it's still buggy.

>  And
> for self-rearming timers the pattern I've always used is a flag.
> 
> Or am I being obtuse again? :)
> Rusty.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-12 11:54       ` Michael S. Tsirkin
@ 2011-12-13  2:35         ` Rusty Russell
  2011-12-14 23:54           ` Tejun Heo
  2011-12-20 19:09           ` Michael S. Tsirkin
  0 siblings, 2 replies; 15+ messages in thread
From: Rusty Russell @ 2011-12-13  2:35 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Amit Shah, netdev, Tejun Heo, linux-kernel, virtualization

On Mon, 12 Dec 2011 13:54:06 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Mon, Dec 12, 2011 at 09:25:07AM +1030, Rusty Russell wrote:
> > Orthogonally, the refill-stop code is still buggy, as you noted.
> 
> Sorry I don't understand how it's still buggy.

Both places where we call:

	cancel_delayed_work_sync(&vi->refill);

Do not actually guarantee that vi->refill isn't running, because it
can requeue itself.  A 'bool no_more_refill' field seems like the
simplest fix for this, but I don't think it's sufficient.

Tejun, is this correct?  What's the correct way to synchronously stop a
delayed_work which can "schedule_delayed_work(&vi->refill, HZ/2);" on
itself?

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-13  2:35         ` Rusty Russell
@ 2011-12-14 23:54           ` Tejun Heo
  2011-12-20 19:09           ` Michael S. Tsirkin
  1 sibling, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2011-12-14 23:54 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Amit Shah, netdev, virtualization, linux-kernel,
	Michael S. Tsirkin

Hello, Rusty.

On Tue, Dec 13, 2011 at 01:05:11PM +1030, Rusty Russell wrote:
> Both places where we call:
> 
> 	cancel_delayed_work_sync(&vi->refill);
> 
> Do not actually guarantee that vi->refill isn't running, because it
> can requeue itself.  A 'bool no_more_refill' field seems like the
> simplest fix for this, but I don't think it's sufficient.
> 
> Tejun, is this correct?  What's the correct way to synchronously stop a
> delayed_work which can "schedule_delayed_work(&vi->refill, HZ/2);" on
> itself?

cancel_delayed_work_sync() itself should be good enough.  It first
steals the pending state and then waits for it to finish if in-flight.
Queueing itself afterwards becomes noop.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-13  2:35         ` Rusty Russell
  2011-12-14 23:54           ` Tejun Heo
@ 2011-12-20 19:09           ` Michael S. Tsirkin
  2011-12-20 19:09             ` Tejun Heo
  1 sibling, 1 reply; 15+ messages in thread
From: Michael S. Tsirkin @ 2011-12-20 19:09 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Amit Shah, netdev, Tejun Heo, linux-kernel, virtualization

On Tue, Dec 13, 2011 at 01:05:11PM +1030, Rusty Russell wrote:
> On Mon, 12 Dec 2011 13:54:06 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Mon, Dec 12, 2011 at 09:25:07AM +1030, Rusty Russell wrote:
> > > Orthogonally, the refill-stop code is still buggy, as you noted.
> > 
> > Sorry I don't understand how it's still buggy.
> 
> Both places where we call:
> 
> 	cancel_delayed_work_sync(&vi->refill);
> 
> Do not actually guarantee that vi->refill isn't running, because it
> can requeue itself.  A 'bool no_more_refill' field seems like the
> simplest fix for this, but I don't think it's sufficient.
> 
> Tejun, is this correct?  What's the correct way to synchronously stop a
> delayed_work which can "schedule_delayed_work(&vi->refill, HZ/2);" on
> itself?
> 
> Thanks,
> Rusty.

Another question, wanted to make sure:
virtnet_poll does schedule_delayed_work(&vi->refill, 0);
separately refill work itself also does
schedule_delayed_work(&vi->refill, HZ/2);
If two such events happen twice, on different CPUs, we are still guaranteed
the work will only run once, right?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-20 19:09           ` Michael S. Tsirkin
@ 2011-12-20 19:09             ` Tejun Heo
  2011-12-20 19:30               ` Michael S. Tsirkin
  0 siblings, 1 reply; 15+ messages in thread
From: Tejun Heo @ 2011-12-20 19:09 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Amit Shah, netdev, linux-kernel, virtualization

Hello, Michael.

On Tue, Dec 20, 2011 at 09:09:08PM +0200, Michael S. Tsirkin wrote:
> Another question, wanted to make sure:
> virtnet_poll does schedule_delayed_work(&vi->refill, 0);
> separately refill work itself also does
> schedule_delayed_work(&vi->refill, HZ/2);
> If two such events happen twice, on different CPUs, we are still guaranteed
> the work will only run once, right?

No, it's not.  Normal workqueues only guarantee non-reentrance on
local CPU.  If you want to guarantee that only one instance of a given
item is executing across all CPUs, you need to use the nrt workqueue.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-20 19:09             ` Tejun Heo
@ 2011-12-20 19:30               ` Michael S. Tsirkin
  2011-12-20 19:31                 ` Tejun Heo
  0 siblings, 1 reply; 15+ messages in thread
From: Michael S. Tsirkin @ 2011-12-20 19:30 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Amit Shah, netdev, linux-kernel, virtualization

On Tue, Dec 20, 2011 at 11:09:46AM -0800, Tejun Heo wrote:
> Hello, Michael.
> 
> On Tue, Dec 20, 2011 at 09:09:08PM +0200, Michael S. Tsirkin wrote:
> > Another question, wanted to make sure:
> > virtnet_poll does schedule_delayed_work(&vi->refill, 0);
> > separately refill work itself also does
> > schedule_delayed_work(&vi->refill, HZ/2);
> > If two such events happen twice, on different CPUs, we are still guaranteed
> > the work will only run once, right?
> 
> No, it's not.  Normal workqueues only guarantee non-reentrance on
> local CPU.  If you want to guarantee that only one instance of a given
> item is executing across all CPUs, you need to use the nrt workqueue.
> 
> Thanks.

Hmm, in that case it looks like a nasty race could get
triggered, with try_fill_recv run on multiple CPUs in parallel,
corrupting the linked list within the vq.

Using the mutex as my patch did will fix that naturally, as well.

Rusty, am I missing something?

> -- 
> tejun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-20 19:30               ` Michael S. Tsirkin
@ 2011-12-20 19:31                 ` Tejun Heo
  2011-12-20 19:45                   ` Michael S. Tsirkin
  0 siblings, 1 reply; 15+ messages in thread
From: Tejun Heo @ 2011-12-20 19:31 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Amit Shah, netdev, linux-kernel, virtualization

On Tue, Dec 20, 2011 at 09:30:55PM +0200, Michael S. Tsirkin wrote:
> Hmm, in that case it looks like a nasty race could get
> triggered, with try_fill_recv run on multiple CPUs in parallel,
> corrupting the linked list within the vq.
> 
> Using the mutex as my patch did will fix that naturally, as well.

Don't know the code but just use nrt wq.  There's even a system one
called system_nrq_wq.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-20 19:31                 ` Tejun Heo
@ 2011-12-20 19:45                   ` Michael S. Tsirkin
  2011-12-20 23:43                     ` Rusty Russell
  0 siblings, 1 reply; 15+ messages in thread
From: Michael S. Tsirkin @ 2011-12-20 19:45 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Amit Shah, netdev, linux-kernel, virtualization

On Tue, Dec 20, 2011 at 11:31:54AM -0800, Tejun Heo wrote:
> On Tue, Dec 20, 2011 at 09:30:55PM +0200, Michael S. Tsirkin wrote:
> > Hmm, in that case it looks like a nasty race could get
> > triggered, with try_fill_recv run on multiple CPUs in parallel,
> > corrupting the linked list within the vq.
> > 
> > Using the mutex as my patch did will fix that naturally, as well.
> 
> Don't know the code but just use nrt wq.  There's even a system one
> called system_nrq_wq.
> 
> Thanks.

We can, but we need the mutex for other reasons, anyway.

> -- 
> tejun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-20 19:45                   ` Michael S. Tsirkin
@ 2011-12-20 23:43                     ` Rusty Russell
  2011-12-21  9:06                       ` Michael S. Tsirkin
  0 siblings, 1 reply; 15+ messages in thread
From: Rusty Russell @ 2011-12-20 23:43 UTC (permalink / raw)
  To: Michael S. Tsirkin, Tejun Heo
  Cc: Amit Shah, netdev, linux-kernel, virtualization

On Tue, 20 Dec 2011 21:45:19 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Tue, Dec 20, 2011 at 11:31:54AM -0800, Tejun Heo wrote:
> > On Tue, Dec 20, 2011 at 09:30:55PM +0200, Michael S. Tsirkin wrote:
> > > Hmm, in that case it looks like a nasty race could get
> > > triggered, with try_fill_recv run on multiple CPUs in parallel,
> > > corrupting the linked list within the vq.
> > > 
> > > Using the mutex as my patch did will fix that naturally, as well.
> > 
> > Don't know the code but just use nrt wq.  There's even a system one
> > called system_nrq_wq.
> > 
> > Thanks.
> 
> We can, but we need the mutex for other reasons, anyway.

Well, here's the alternate approach.  What do you think?

Finding two wq issues makes you justifiably cautious, but it almost
feels like giving up to simply wrap it in a lock.  The APIs are designed
to let us do it without a lock; I was just using them wrong.

Two patches in one mail.  It's gauche, but it's an RFC only (untested).

Cheers,
Rusty.

virtio_net: set/cancel work on ndo_open/ndo_stop

Michael S. Tsirkin noticed that we could run the refill work after
ndo_close, which can re-enable napi - we don't disable it until
virtnet_remove.  This is clearly wrong, so move the workqueue control
to ndo_open and ndo_stop (aka. virtnet_open and virtnet_close).

One subtle point: virtnet_probe() could simply fail if it couldn't
allocate a receive buffer, but that's less polite in virtnet_open() so
we schedule a refill as we do in the normal receive path if we run out
of memory.

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -719,6 +719,10 @@ static int virtnet_open(struct net_devic
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
+	/* Make sure we have some buffersl if oom use wq. */
+	if (!try_fill_recv(vi, GFP_KERNEL))
+		schedule_delayed_work(&vi->refill, 0);
+
 	virtnet_napi_enable(vi);
 	return 0;
 }
@@ -772,6 +776,8 @@ static int virtnet_close(struct net_devi
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
+	/* Make sure refill_work doesn't re-enable napi! */
+	cancel_delayed_work_sync(&vi->refill);
 	napi_disable(&vi->napi);
 
 	return 0;
@@ -1082,7 +1088,6 @@ static int virtnet_probe(struct virtio_d
 
 unregister:
 	unregister_netdev(dev);
-	cancel_delayed_work_sync(&vi->refill);
 free_vqs:
 	vdev->config->del_vqs(vdev);
 free_stats:
@@ -1121,9 +1126,7 @@ static void __devexit virtnet_remove(str
 	/* Stop all the virtqueues. */
 	vdev->config->reset(vdev);
 
-
 	unregister_netdev(vi->dev);
-	cancel_delayed_work_sync(&vi->refill);
 
 	/* Free unused buffers in both send and recv, if any. */
 	free_unused_bufs(vi);


virtio_net: use non-reentrant workqueue.

Michael S. Tsirkin also noticed that we could run the refill work
multiple CPUs: if we kick off a refill on one CPU and then on another,
they would both manipulate the queue at the same time (they use
napi_disable to avoid racing against the receive handler itself).

Tejun points out that this is what the WQ_NON_REENTRANT flag is for,
and that there is a convenient system kthread we can use.

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -501,7 +501,7 @@ static void refill_work(struct work_stru
 	/* In theory, this can happen: if we don't get any buffers in
 	 * we will *never* try to fill again. */
 	if (still_empty)
-		schedule_delayed_work(&vi->refill, HZ/2);
+		queue_delayed_work(system_nrt_wq, &vi->refill, HZ/2);
 }
 
 static int virtnet_poll(struct napi_struct *napi, int budget)
@@ -520,7 +520,7 @@ again:
 
 	if (vi->num < vi->max / 2) {
 		if (!try_fill_recv(vi, GFP_ATOMIC))
-			schedule_delayed_work(&vi->refill, 0);
+			queue_delayed_work(system_nrt_wq, &vi->refill, 0);
 	}
 
 	/* Out of packets? */
@@ -721,7 +721,7 @@ static int virtnet_open(struct net_devic
 
 	/* Make sure we have some buffersl if oom use wq. */
 	if (!try_fill_recv(vi, GFP_KERNEL))
-		schedule_delayed_work(&vi->refill, 0);
+		queue_delayed_work(&system_nrt_wq, &vi->refill, 0);
 
 	virtnet_napi_enable(vi);
 	return 0;

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-20 23:43                     ` Rusty Russell
@ 2011-12-21  9:06                       ` Michael S. Tsirkin
  2011-12-22  3:53                         ` Rusty Russell
  0 siblings, 1 reply; 15+ messages in thread
From: Michael S. Tsirkin @ 2011-12-21  9:06 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Tejun Heo, Amit Shah, netdev, linux-kernel, virtualization

On Wed, Dec 21, 2011 at 10:13:18AM +1030, Rusty Russell wrote:
> On Tue, 20 Dec 2011 21:45:19 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Tue, Dec 20, 2011 at 11:31:54AM -0800, Tejun Heo wrote:
> > > On Tue, Dec 20, 2011 at 09:30:55PM +0200, Michael S. Tsirkin wrote:
> > > > Hmm, in that case it looks like a nasty race could get
> > > > triggered, with try_fill_recv run on multiple CPUs in parallel,
> > > > corrupting the linked list within the vq.
> > > > 
> > > > Using the mutex as my patch did will fix that naturally, as well.
> > > 
> > > Don't know the code but just use nrt wq.  There's even a system one
> > > called system_nrq_wq.
> > > 
> > > Thanks.
> > 
> > We can, but we need the mutex for other reasons, anyway.
> 
> Well, here's the alternate approach.  What do you think?

It looks very clean, thanks. Some documentation suggestions below.
Also - Cc stable on this and the block patch?

> Finding two wq issues makes you justifiably cautious, but it almost
> feels like giving up to simply wrap it in a lock.  The APIs are designed
> to let us do it without a lock; I was just using them wrong.

One thing I note is that this scheme works because there's a single
entity disabling/enabling napi and the refill thread.
So it's possible that Amit will need to add a lock and track NAPI
state anyway to make suspend work. But we'll see.

> Two patches in one mail.  It's gauche, but it's an RFC only (untested).
> 
> Cheers,
> Rusty.
> 
> virtio_net: set/cancel work on ndo_open/ndo_stop
> 
> Michael S. Tsirkin noticed that we could run the refill work after
> ndo_close, which can re-enable napi - we don't disable it until
> virtnet_remove.  This is clearly wrong, so move the workqueue control
> to ndo_open and ndo_stop (aka. virtnet_open and virtnet_close).
> 
> One subtle point: virtnet_probe() could simply fail if it couldn't
> allocate a receive buffer, but that's less polite in virtnet_open() so
> we schedule a refill as we do in the normal receive path if we run out
> of memory.
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -719,6 +719,10 @@ static int virtnet_open(struct net_devic
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
>  
> +	/* Make sure we have some buffersl if oom use wq. */

Typo :)

> +	if (!try_fill_recv(vi, GFP_KERNEL))
> +		schedule_delayed_work(&vi->refill, 0);
> +
>  	virtnet_napi_enable(vi);
>  	return 0;
>  }
> @@ -772,6 +776,8 @@ static int virtnet_close(struct net_devi
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
>  
> +	/* Make sure refill_work doesn't re-enable napi! */
> +	cancel_delayed_work_sync(&vi->refill);
>  	napi_disable(&vi->napi);
>  
>  	return 0;
> @@ -1082,7 +1088,6 @@ static int virtnet_probe(struct virtio_d
>  
>  unregister:
>  	unregister_netdev(dev);
> -	cancel_delayed_work_sync(&vi->refill);
>  free_vqs:
>  	vdev->config->del_vqs(vdev);
>  free_stats:
> @@ -1121,9 +1126,7 @@ static void __devexit virtnet_remove(str
>  	/* Stop all the virtqueues. */
>  	vdev->config->reset(vdev);
>  
> -
>  	unregister_netdev(vi->dev);
> -	cancel_delayed_work_sync(&vi->refill);
>  
>  	/* Free unused buffers in both send and recv, if any. */
>  	free_unused_bufs(vi);
> 
> 
> virtio_net: use non-reentrant workqueue.
> 
> Michael S. Tsirkin also noticed that we could run the refill work
> multiple CPUs: if we kick off a refill on one CPU and then on another,
> they would both manipulate the queue at the same time (they use
> napi_disable to avoid racing against the receive handler itself).
> 
> Tejun points out that this is what the WQ_NON_REENTRANT flag is for,
> and that there is a convenient system kthread we can use.
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -501,7 +501,7 @@ static void refill_work(struct work_stru
>  	/* In theory, this can happen: if we don't get any buffers in
>  	 * we will *never* try to fill again. */
>  	if (still_empty)
> -		schedule_delayed_work(&vi->refill, HZ/2);
> +		queue_delayed_work(system_nrt_wq, &vi->refill, HZ/2);
>  }
>  
>  static int virtnet_poll(struct napi_struct *napi, int budget)
> @@ -520,7 +520,7 @@ again:
>  
>  	if (vi->num < vi->max / 2) {
>  		if (!try_fill_recv(vi, GFP_ATOMIC))
> -			schedule_delayed_work(&vi->refill, 0);
> +			queue_delayed_work(system_nrt_wq, &vi->refill, 0);
>  	}
>  
>  	/* Out of packets? */
> @@ -721,7 +721,7 @@ static int virtnet_open(struct net_devic
>  
>  	/* Make sure we have some buffersl if oom use wq. */
>  	if (!try_fill_recv(vi, GFP_KERNEL))
> -		schedule_delayed_work(&vi->refill, 0);
> +		queue_delayed_work(&system_nrt_wq, &vi->refill, 0);
>  
>  	virtnet_napi_enable(vi);
>  	return 0;

Maybe document some rules:
- try_fill_recv must always run from napi or with napi disabled ...
- refill only runs when device is open

-- 
MST

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] virtio_net: fix refill related races
  2011-12-21  9:06                       ` Michael S. Tsirkin
@ 2011-12-22  3:53                         ` Rusty Russell
  0 siblings, 0 replies; 15+ messages in thread
From: Rusty Russell @ 2011-12-22  3:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Tejun Heo, Amit Shah, netdev, linux-kernel, virtualization

On Wed, 21 Dec 2011 11:06:37 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Wed, Dec 21, 2011 at 10:13:18AM +1030, Rusty Russell wrote:
> > On Tue, 20 Dec 2011 21:45:19 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Tue, Dec 20, 2011 at 11:31:54AM -0800, Tejun Heo wrote:
> > > > On Tue, Dec 20, 2011 at 09:30:55PM +0200, Michael S. Tsirkin wrote:
> > > > > Hmm, in that case it looks like a nasty race could get
> > > > > triggered, with try_fill_recv run on multiple CPUs in parallel,
> > > > > corrupting the linked list within the vq.
> > > > > 
> > > > > Using the mutex as my patch did will fix that naturally, as well.
> > > > 
> > > > Don't know the code but just use nrt wq.  There's even a system one
> > > > called system_nrq_wq.
> > > > 
> > > > Thanks.
> > > 
> > > We can, but we need the mutex for other reasons, anyway.
> > 
> > Well, here's the alternate approach.  What do you think?
> 
> It looks very clean, thanks. Some documentation suggestions below.
> Also - Cc stable on this and the block patch?

AFAICT we haven't seen this bug, and theoretical bugs don't get into
-stable.

> > Finding two wq issues makes you justifiably cautious, but it almost
> > feels like giving up to simply wrap it in a lock.  The APIs are designed
> > to let us do it without a lock; I was just using them wrong.
> 
> One thing I note is that this scheme works because there's a single
> entity disabling/enabling napi and the refill thread.
> So it's possible that Amit will need to add a lock and track NAPI
> state anyway to make suspend work. But we'll see.

Fixed typo, documented the locking, queued for -next.

Thanks!
Rusty.

From: Rusty Russell <rusty@rustcorp.com.au>
Subject: virtio_net: set/cancel work on ndo_open/ndo_stop

Michael S. Tsirkin noticed that we could run the refill work after
ndo_close, which can re-enable napi - we don't disable it until
virtnet_remove.  This is clearly wrong, so move the workqueue control
to ndo_open and ndo_stop (aka. virtnet_open and virtnet_close).

One subtle point: virtnet_probe() could simply fail if it couldn't
allocate a receive buffer, but that's less polite in virtnet_open() so
we schedule a refill as we do in the normal receive path if we run out
of memory.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/virtio_net.c |   17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -439,7 +439,13 @@ static int add_recvbuf_mergeable(struct 
 	return err;
 }
 
-/* Returns false if we couldn't fill entirely (OOM). */
+/*
+ * Returns false if we couldn't fill entirely (OOM).
+ *
+ * Normally run in the receive path, but can also be run from ndo_open
+ * before we're receiving packets, or from refill_work which is
+ * careful to disable receiving (using napi_disable).
+ */
 static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
 {
 	int err;
@@ -719,6 +725,10 @@ static int virtnet_open(struct net_devic
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
+	/* Make sure we have some buffers: if oom use wq. */
+	if (!try_fill_recv(vi, GFP_KERNEL))
+		schedule_delayed_work(&vi->refill, 0);
+
 	virtnet_napi_enable(vi);
 	return 0;
 }
@@ -772,6 +782,8 @@ static int virtnet_close(struct net_devi
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
+	/* Make sure refill_work doesn't re-enable napi! */
+	cancel_delayed_work_sync(&vi->refill);
 	napi_disable(&vi->napi);
 
 	return 0;
@@ -1082,7 +1094,6 @@ static int virtnet_probe(struct virtio_d
 
 unregister:
 	unregister_netdev(dev);
-	cancel_delayed_work_sync(&vi->refill);
 free_vqs:
 	vdev->config->del_vqs(vdev);
 free_stats:
@@ -1121,9 +1132,7 @@ static void __devexit virtnet_remove(str
 	/* Stop all the virtqueues. */
 	vdev->config->reset(vdev);
 
-
 	unregister_netdev(vi->dev);
-	cancel_delayed_work_sync(&vi->refill);
 
 	/* Free unused buffers in both send and recv, if any. */
 	free_unused_bufs(vi);

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2011-12-22  3:53 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-12-07 15:21 [PATCH RFC] virtio_net: fix refill related races Michael S. Tsirkin
2011-12-08  4:37 ` Rusty Russell
2011-12-11 14:44   ` Michael S. Tsirkin
2011-12-11 22:55     ` Rusty Russell
2011-12-12 11:54       ` Michael S. Tsirkin
2011-12-13  2:35         ` Rusty Russell
2011-12-14 23:54           ` Tejun Heo
2011-12-20 19:09           ` Michael S. Tsirkin
2011-12-20 19:09             ` Tejun Heo
2011-12-20 19:30               ` Michael S. Tsirkin
2011-12-20 19:31                 ` Tejun Heo
2011-12-20 19:45                   ` Michael S. Tsirkin
2011-12-20 23:43                     ` Rusty Russell
2011-12-21  9:06                       ` Michael S. Tsirkin
2011-12-22  3:53                         ` Rusty Russell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).