Linux virtualization list
 help / color / mirror / Atom feed
* Re: [RESENDx2] [PULL] virtio: fix barriers for virtio-mmio
From: Rusty Russell @ 2011-12-29  4:31 UTC (permalink / raw)
  To: Ohad Ben-Cohen; +Cc: linux-kernel, linux-arm, virtualization
In-Reply-To: <CAK=WgbacH1tXHJiHNKLNNsFA7-vnFT_J6vYOANh0wzGyUj8Zpg@mail.gmail.com>

On Fri, 23 Dec 2011 13:35:26 +0200, Ohad Ben-Cohen <ohad@wizery.com> wrote:
> On Fri, Dec 23, 2011 at 11:35 AM, Linus Torvalds
> And rpmsg is using virtio to avoid implementing another shared memory
> "wire" protocol. And of course to be able to reuse all the existing
> virtio drivers (e.g. net, block, console) with a remote core backend.

OK, I've applied this patch; you might want to carry it in your tree
too.  You'll need to fix up your call to vring_new_virtqueue()....

From: Rusty Russell <rusty@rustcorp.com.au>
Subject: virtio: harsher barriers for rpmsg.

We were cheating with our barriers; using the smp ones rather than the
real device ones.  That was fine, until rpmsg came along, which is
used to talk to a real device (a non-SMP CPU).

Unfortunately, just putting back the real barriers (reverting
d57ed95d) causes a performance regression on virtio-pci.  In
particular, Amos reports netbench's TCP_RR over virtio_net CPU
utilization increased up to 35% while throughput went down by up to
14%.

By comparison, this branch is in the noise.

Reference: https://lkml.org/lkml/2011/12/11/22

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c |    8 +++++---
 drivers/s390/kvm/kvm_virtio.c  |    2 +-
 drivers/virtio/virtio_mmio.c   |    4 ++--
 drivers/virtio/virtio_pci.c    |    4 ++--
 drivers/virtio/virtio_ring.c   |   34 +++++++++++++++++++++-------------
 include/linux/virtio_ring.h    |    1 +
 tools/virtio/linux/virtio.h    |    1 +
 tools/virtio/virtio_test.c     |    3 ++-
 8 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -292,10 +292,12 @@ static struct virtqueue *lg_find_vq(stru
 
 	/*
 	 * OK, tell virtio_ring.c to set up a virtqueue now we know its size
-	 * and we've got a pointer to its pages.
+	 * and we've got a pointer to its pages.  Note that we set weak_barriers
+	 * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu
+	 * barriers.
 	 */
-	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
-				 vdev, lvq->pages, lg_notify, callback, name);
+	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev,
+				 true, lvq->pages, lg_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -198,7 +198,7 @@ static struct virtqueue *kvm_find_vq(str
 		goto out;
 
 	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
-				 vdev, (void *) config->address,
+				 vdev, true, (void *) config->address,
 				 kvm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -310,8 +310,8 @@ static struct virtqueue *vm_setup_vq(str
 			vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
 
 	/* Create the vring */
-	vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN,
-				 vdev, info->queue, vm_notify, callback, name);
+	vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
+				 true, info->queue, vm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto error_new_virtqueue;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -414,8 +414,8 @@ static struct virtqueue *setup_vq(struct
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
-	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, vp_notify, callback, name);
+	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
+				 true, info->queue, vp_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -28,17 +28,20 @@
 #ifdef CONFIG_SMP
 /* Where possible, use SMP barriers which are more lightweight than mandatory
  * barriers, because mandatory barriers control MMIO effects on accesses
- * through relaxed memory I/O windows (which virtio does not use). */
-#define virtio_mb() smp_mb()
-#define virtio_rmb() smp_rmb()
-#define virtio_wmb() smp_wmb()
+ * through relaxed memory I/O windows (which virtio-pci does not use). */
+#define virtio_mb(vq) \
+	do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0)
+#define virtio_rmb(vq) \
+	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
+#define virtio_wmb(vq) \
+	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
 #else
 /* We must force memory ordering even if guest is UP since host could be
  * running on another CPU, but SMP barriers are defined to barrier() in that
  * configuration. So fall back to mandatory barriers instead. */
-#define virtio_mb() mb()
-#define virtio_rmb() rmb()
-#define virtio_wmb() wmb()
+#define virtio_mb(vq) mb()
+#define virtio_rmb(vq) rmb()
+#define virtio_wmb(vq) wmb()
 #endif
 
 #ifdef DEBUG
@@ -77,6 +80,9 @@ struct vring_virtqueue
 	/* Actual memory layout for this queue */
 	struct vring vring;
 
+	/* Can we use weak barriers? */
+	bool weak_barriers;
+
 	/* Other side has made a mess, don't try any more. */
 	bool broken;
 
@@ -245,14 +251,14 @@ void virtqueue_kick(struct virtqueue *_v
 	START_USE(vq);
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
-	virtio_wmb();
+	virtio_wmb(vq);
 
 	old = vq->vring.avail->idx;
 	new = vq->vring.avail->idx = old + vq->num_added;
 	vq->num_added = 0;
 
 	/* Need to update avail index before checking if we should notify */
-	virtio_mb();
+	virtio_mb(vq);
 
 	if (vq->event ?
 	    vring_need_event(vring_avail_event(&vq->vring), new, old) :
@@ -314,7 +320,7 @@ void *virtqueue_get_buf(struct virtqueue
 	}
 
 	/* Only get used array entries after they have been exposed by host. */
-	virtio_rmb();
+	virtio_rmb(vq);
 
 	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
 	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
@@ -337,7 +343,7 @@ void *virtqueue_get_buf(struct virtqueue
 	 * the read in the next get_buf call. */
 	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
 		vring_used_event(&vq->vring) = vq->last_used_idx;
-		virtio_mb();
+		virtio_mb(vq);
 	}
 
 	END_USE(vq);
@@ -366,7 +372,7 @@ bool virtqueue_enable_cb(struct virtqueu
 	 * entry. Always do both to keep code simple. */
 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
 	vring_used_event(&vq->vring) = vq->last_used_idx;
-	virtio_mb();
+	virtio_mb(vq);
 	if (unlikely(more_used(vq))) {
 		END_USE(vq);
 		return false;
@@ -393,7 +399,7 @@ bool virtqueue_enable_cb_delayed(struct 
 	/* TODO: tune this threshold */
 	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
 	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
-	virtio_mb();
+	virtio_mb(vq);
 	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
 		END_USE(vq);
 		return false;
@@ -453,6 +459,7 @@ EXPORT_SYMBOL_GPL(vring_interrupt);
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
 				      void (*callback)(struct virtqueue *),
@@ -476,6 +483,7 @@ struct virtqueue *vring_new_virtqueue(un
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
 	vq->notify = notify;
+	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -168,6 +168,7 @@ struct virtqueue;
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -214,6 +214,7 @@ void *virtqueue_detach_unused_buf(struct
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -92,7 +92,8 @@ static void vq_info_add(struct vdev_info
 	assert(r >= 0);
 	memset(info->ring, 0, vring_size(num, 4096));
 	vring_init(&info->vring, num, info->ring, 4096);
-	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring,
+	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev,
+				       true, info->ring,
 				       vq_notify, vq_callback, "test");
 	assert(info->vq);
 	info->vq->priv = info;

^ permalink raw reply

* [PATCH 1/1] Drivers:hv: Fix a bug in vmbus_driver_unregister()
From: K. Y. Srinivasan @ 2011-12-27 21:49 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, virtualization, ohering
  Cc: K. Y. Srinivasan, Fuzhou Chen, Sasha Levin, stable

The function vmbus_exists() was introduced recently to deal with cases where
the vmbus driver failed to initialize and yet other Hyper-V drivers attempted
to register with the vmbus bus driver. This patch introduced a bug where
vmbus_driver_unregister() would fail to unregister the driver. This patch 
fixes the problem.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Fuzhou Chen <fuzhouch@microsoft.com>
Cc: Sasha Levin <levinsasha928@gmail.com>
Cc: stable <stable@vger.kernel.org>
---
 drivers/hv/vmbus_drv.c |    5 +----
 1 files changed, 1 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index c3875f7..a220e57 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -627,10 +627,7 @@ void vmbus_driver_unregister(struct hv_driver *hv_driver)
 	pr_info("unregistering driver %s\n", hv_driver->name);
 
 	if (!vmbus_exists())
-		return;
-
-	driver_unregister(&hv_driver->driver);
-
+		driver_unregister(&hv_driver->driver);
 }
 EXPORT_SYMBOL_GPL(vmbus_driver_unregister);
 
-- 
1.7.4.1

^ permalink raw reply related

* Re: [RESENDx2] [PULL] virtio: fix barriers for virtio-mmio
From: Rusty Russell @ 2011-12-24  3:01 UTC (permalink / raw)
  To: Ohad Ben-Cohen, Linus Torvalds; +Cc: linux-kernel, linux-arm, virtualization
In-Reply-To: <CAK=WgbacH1tXHJiHNKLNNsFA7-vnFT_J6vYOANh0wzGyUj8Zpg@mail.gmail.com>

On Fri, 23 Dec 2011 13:35:26 +0200, Ohad Ben-Cohen <ohad@wizery.com> wrote:
> On Fri, Dec 23, 2011 at 11:35 AM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> > On x86, there really is never any reason to use the heavy memory
> > barriers unless you are talking to a real device. And last I saw,
> > "virtio" was still about virtual IO.

Not any more, as the commit message describes.

But I missed that they're using rpmsg, which isn't merged yet.

Sorry for the noise,
Rusty.
PS. Switching barriers by bus type is too crude anyway, let's come up
    with something better...

^ permalink raw reply

* [PATCH 4/9] drivers/xen/gntalloc.c: introduce missing kfree
From: Julia Lawall @ 2011-12-23 17:39 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: Jeremy Fitzhardinge, xen-devel, kernel-janitors, linux-kernel,
	virtualization

Error handling code following a kmalloc should free the allocated data.
Out_unlock is used on both success and failure, so free vm_priv before
jumping to that label.

A simplified version of the semantic match that finds the problem is as
follows: (http://coccinelle.lip6.fr)

// <smpl>
@r exists@
local idexpression x;
statement S;
identifier f1;
position p1,p2;
expression *ptr != NULL;
@@

x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...);
...
if (x == NULL) S
<... when != x
     when != if (...) { <+...x...+> }
x->f1
...>
(
 return \(0\|<+...x...+>\|ptr\);
|
 return@p2 ...;
)

@script:python@
p1 << r.p1;
p2 << r.p2;
@@

print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>

---
 drivers/xen/gntalloc.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index e2400c8..934985d 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -525,6 +525,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
 		rv = -ENOENT;
 		pr_debug("%s: Could not find grant reference",
 				__func__);
+		kfree(vm_priv);
 		goto out_unlock;
 	}

^ permalink raw reply related

* Re: [RESENDx2] [PULL] virtio: fix barriers for virtio-mmio
From: Ohad Ben-Cohen @ 2011-12-23 11:35 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, linux-arm, virtualization
In-Reply-To: <CA+55aFzKU0GWqpQo2AzAawoVm=qPaJiwKTcifeYApyZP_Hj3nQ@mail.gmail.com>

On Fri, Dec 23, 2011 at 11:35 AM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On x86, there really is never any reason to use the heavy memory
> barriers unless you are talking to a real device. And last I saw,
> "virtio" was still about virtual IO.

I reported this originally, so maybe I should describe our use case a
bit here (it's not virtio-mmio). It's a bit long, so apologizes in
advance.

Almost every SoC today have several additional cores (DSP or whatnot)
which usually employ some hardware multimedia accelerators and are
used to offload cpu-intensive tasks from the main application
processor. These other cores are used in an asymmetric multiprocessing
configurations, i.e. they run their own instance of operating system
(which can be some flavor of RTOS, or Linux, or whatnot. anything
goes).

Virtually every SoC vendor have this (lots of ARM vendors, but it's
definitely not limited to ARM), and they all have their own way of
controlling, and communicating with, those remote cores. And it's
usually rather big (tens of thousands loc), out-of-tree and very
vendor-specific code.

So we're trying to fix this by introducing some generic code that'd
control those remote cores and let drivers talk to them, which all
vendors could then use. I'll be sending you a 3.3 pull request for
this, but you can already take a look in linux-next at drivers/rpmsg
(inter-processor communication bus) and drivers/remoteproc (framework
for booting a remote core).

And rpmsg is using virtio to avoid implementing another shared memory
"wire" protocol. And of course to be able to reuse all the existing
virtio drivers (e.g. net, block, console) with a remote core backend.

Which leads me to the specific issue we have. On OMAP4, the virtio
kick is implemented using a memory-mapped mailbox device. After
updating a vring (which is mapped using ARM's Normal memory) and
before kicking the remote core (using the mailbox device which is
mapped using ARM's Device memory) we must use a "heavy" memory barrier
(i.e. ARM's DSB). Otherwise, if only an smp memory barrier is used
(i.e. DMB on ARM), the kick might jump ahead before the remote core
has observed the updates to the vrings. And then bad things happen.

We didn't want to inflict the performance degradation on the
virtualization use cases (which can run concurrently with the remote
core scenarios), hence the dynamic "IO or SMP barrier" thingy.

(Btw we don't have enough information about other
setups/configurations as other vendors just begin using virtio for
these kind of scenarios, but we guess this is probably isn't limited
to OMAP. Fixing it at the transport layer sounds reasonable, although
there are other ways to do this too).

Hope this makes sense,

Thanks,
Ohad.

^ permalink raw reply

* Re: [RESENDx2] [PULL] virtio: fix barriers for virtio-mmio
From: Linus Torvalds @ 2011-12-23  9:35 UTC (permalink / raw)
  To: Rusty Russell; +Cc: linux-kernel, virtualization
In-Reply-To: <CA+55aFyvMkAtXPRxiFR+UpZKGHMhp8Vd9aYg1dkaoKMwD5MLcw@mail.gmail.com>

On Fri, Dec 23, 2011 at 1:31 AM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> So the whole thing looks confused. There's never any reason to
> actually use the expensive sfence/rfences at all. Afaik you still just
> want smp_*mb() for all cases.

But note that I haven't thought deeply about it, I just looked at the
patch and went "Hmm, that can't be right", and then it kind of got
dropped because I forgot about it.

On x86, there really is never any reason to use the heavy memory
barriers unless you are talking to a real device. And last I saw,
"virtio" was still about virtual IO.

Now, I could in theory imagine that you migth want to unconditionally
have the "smp_*mb()" barriers in the case where you are running UP
guest in an SMP host, but that's not what the patch actually did. It
did that odd crazy dynamic "IO barrier or SMP barrier" thing, which
just is confused and doesn't make sense to me in any situation I can
think of.

                    Linus

^ permalink raw reply

* Re: [RESENDx2] [PULL] virtio: fix barriers for virtio-mmio
From: Linus Torvalds @ 2011-12-23  9:31 UTC (permalink / raw)
  To: Rusty Russell; +Cc: linux-kernel, virtualization
In-Reply-To: <8739cbkgv2.fsf@rustcorp.com.au>

On Thu, Dec 22, 2011 at 9:16 PM, Rusty Russell <rusty@rustcorp.com.au> wrote:
> This one as a signed tag on github, in case the inline patch was
> the reason you dropped this.

No, and please why do you make your tag-names be your email address?
That's just odd. It seems to be related to some broken SCM system that
thinks that tag-names are global and somehow different from
branch-names.

No, the reason I didn't pull was that the games with the barriers just
threw me off. They seem hacky and disgusting beyond words. And afaik
there are no actual *devices* that implement virtio-mmu, and the
commit that adds the mmu thing talks about qemu interfaces to. So it's
still just virtual, which makes the whole thing just masturbatory,
afaik. There's nothing out-of-order in virtual mmio.

So the whole thing looks confused. There's never any reason to
actually use the expensive sfence/rfences at all. Afaik you still just
want smp_*mb() for all cases.

                  Linus

^ permalink raw reply

* [RESENDx2] [PULL] virtio: fix barriers for virtio-mmio
From: Rusty Russell @ 2011-12-23  5:16 UTC (permalink / raw)
  To: torvalds; +Cc: linux-kernel, virtualization

This one as a signed tag on github, in case the inline patch was
the reason you dropped this.

virtio-mmio in new 3.2, and they found a corruption bug.  Please apply.

 * [new tag]         rusty@rustcorp.com.au -> rusty@rustcorp.com.au
The following changes since commit b3b1b70e62a603f473619dbebc3b3d23f535e6f8:

  Merge branch 'usb-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb (2011-12-22 12:59:47 -0800)

are available in the git repository at:

  git://github.com/rustyrussell/linux.git master

Rusty Russell (1):
      virtio: harsher barriers for virtio-mmio.

 drivers/lguest/lguest_device.c |    8 +++++---
 drivers/s390/kvm/kvm_virtio.c  |    2 +-
 drivers/virtio/virtio_mmio.c   |    7 ++++---
 drivers/virtio/virtio_pci.c    |    4 ++--
 drivers/virtio/virtio_ring.c   |   34 +++++++++++++++++++++-------------
 include/linux/virtio_ring.h    |    1 +
 tools/virtio/linux/virtio.h    |    1 +
 tools/virtio/virtio_test.c     |    3 ++-
 8 files changed, 37 insertions(+), 23 deletions(-)

commit ef3a731beb9a030e552945a734dc898b5525e2f7
Author: Rusty Russell <rusty@rustcorp.com.au>
Date:   Fri Dec 23 15:07:56 2011 +1030

    virtio: harsher barriers for virtio-mmio.
    
    We were cheating with our barriers; using the smp ones rather than the
    real device ones.  That was fine, until virtio-mmio came along, which
    could be talking to a real device (a non-SMP CPU).
    
    Unfortunately, just putting back the real barriers (reverting
    d57ed95d) causes a performance regression on virtio-pci.  In
    particular, Amos reports netbench's TCP_RR over virtio_net CPU
    utilization increased up to 35% while throughput went down by up to
    14%.
    
    By comparison, this branch is in the noise.
    
    Reference: https://lkml.org/lkml/2011/12/11/22
    
    Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

 drivers/lguest/lguest_device.c |    8 +++++---
 drivers/s390/kvm/kvm_virtio.c  |    2 +-
 drivers/virtio/virtio_mmio.c   |    7 ++++---
 drivers/virtio/virtio_pci.c    |    4 ++--
 drivers/virtio/virtio_ring.c   |   34 +++++++++++++++++++++-------------
 include/linux/virtio_ring.h    |    1 +
 tools/virtio/linux/virtio.h    |    1 +
 tools/virtio/virtio_test.c     |    3 ++-
 8 files changed, 37 insertions(+), 23 deletions(-)

^ permalink raw reply

* CFP: The 9th Int. Conf. on Autonomic Computing (ICAC) 2012
From: Ioan Raicu @ 2011-12-22 23:02 UTC (permalink / raw)
  To: virtualization

CALL FOR PAPERS and WORKSHOP PROPOSALS

The 9th International Conference on Autonomic Computing (ICAC 2012)

September 17-21, 2012. San Jose, CA, USA
http://icac2012.cs.fiu.edu/
-----------------------------------------------------------------

IMPORTANT DATES

Paper and Poster Submission: March 9, 2012, 11:59pm PST
Notification: May 18, 2012
Camera-ready Due: June 8, 2012
Workshop Proposal Submission: February 10, 2012
-----------------------------------------------------------------

OVERVIEW
ICAC is the leading conference on autonomic computing techniques,
foundations, and applications. Autonomic computing refers to
methods and means for automated management of performance, fault,
security, and configuration with little involvement of users or
administrators. Systems introducing new autonomic features are
becoming increasingly prevalent, motivating research that spans
a variety of areas, from computer systems, networking, software
engineering, and data management to machine learning, control
theory, and bio-inspired computing. ICAC brings together
researchers and practitioners across these disciplines to
address multiple facets of adaptation and self-management in
computing systems and applications from different perspectives.
Autonomic computing solutions are sought for clouds, grids,
data centers, enterprise software, internet services, data
services, smart phones, embedded systems, and sensor networks.
In these environments, resources and applications must be managed
to maximize performance and minimize cost, while maintaining
predictable and reliable behavior in the face of varying
workloads, failures, and malicious threats. Papers are solicited
from all areas of autonomic computing, including (but not limited
to):

* End-to-end techniques for management of resources, workloads,
   performance, faults, power/cooling, security, and others.

* Self-managing components, such as server, storage, network
   protocols, or specific application elements, and embedded and
   mobile end systems such as smart phones.

* Decision and analysis techniques and their use, such as machine
   learning, control theory, predictive methods, probability and
   stochastic processes, queuing theory methodologies, emergent
   behavior, rule-based systems, and bio-inspired techniques.

* Monitoring systems for autonomic computing.

* Hypervisor, operating systems, hardware, or application support
   for autonomic computing.

* Novel human interfaces for monitoring and controlling autonomic
   systems.

* Management topics, such as specification and modeling of
   service-level agreements, behavior enforcement and tie-in with
   IT governance.

* Toolkits, frameworks, principles and architectures, from
   software engineering practices and experimental methodologies
   to agent-based techniques and virtualization.

* Fundamental science and theory of self-managing systems:
   understanding, controlling or exploiting system behaviors to
   enforce autonomic properties.

* Applications of autonomic computing and experiences with
   prototyped or deployed systems solving real-world problems in
   science, engineering, business and society.

Papers will be judged on originality, significance, interest,
correctness, clarity and relevance to the broader community.
Papers should report on experiences, measurements, user studies,
or other evaluations, as appropriate. Evaluations of a prototype
or large-scale deployment of systems and applications is expected.

PAPER AND POSTER SUBMISSIONS
Full papers (a maximum of 10 pages in the two-column ACM proceedings
format) and posters (2 pages) are invited on a wide variety of
topics relating to autonomic computing. Submitted papers must be
original work, and may not be under consideration for another
conference or journal. Complete formatting and submission
instructions can be found on the conference web site. Accepted
papers and posters will appear in proceedings distributed at the
conference and available electronically. Relevant top ICAC'12
papers will be invited for "fast-track" submissions to the
ACM Transactions on Autonomous and Adaptive Systems (TAAS).

WORKSHOPS, DEMONSTRATIONS AND EXHIBITION
ICAC'12 welcomes proposals for co-located workshops on topics of
interest to the autonomic computing community. Workshop proposals
should be submitted to the Workshop Chair, Fred Douglis
(f.douglis@computer.org) by February 10, 2012. Workshops are
expected to publish proceedings, and should cover areas that
complement the main program. ICAC'12 will also feature a
demonstration and exhibition session consisting of prototypes and
technology artifacts such as demonstrating autonomic software or
autonomic computing principles. Entries will be judged by a
separate committee led by the demo/exhibit chair.

INDUSTRY SESSION
One of ICAC's important roles is to bring together researchers
and practitioners from academia and industry. In its industry
session, ICAC helps fulfill this role by presenting an industry
viewpoint on technologies, products, and market needs. The
industry session also addresses current challenges, and
opportunities for academic and corporate research collaborations.
We encourage industry leaders, including entrepreneurs, product
developers, architects, managers, marketers and end users,
to submit their papers and posters reflecting such industry
perspectives as part of the regular submission process.
------------------------------------------------------------------

ORGANIZERS

GENERAL CHAIR
Dejan Milojicic, HP Labs

PROGRAM CHAIRS
Dongyan Xu, Purdue University
Vanish Talwar, HP Labs

INDUSTRY CHAIR
Xiaoyun Zhu, VMware

WORKSHOPS CHAIR
Fred Douglis, EMC

POSTERS/DEMO/EXHIBITS CHAIR
Eno Thereska, Microsoft Research

FINANCE CHAIR
Michael Kozuch, Intel

LOCAL ARRANGEMENT CHAIR
Jessica Blaine

PUBLICITY CHAIRS
Daniel Batista, University of São Paulo
Vartan Padaryan, ISP/Russian Academy of Sci.
Ioan Raicu, Illinois Inst. of Technology
Jianfeng Zhan, ICT/Chinese Academy of Sci.
Ming Zhao, Florida Intl. University

PROGRAM COMMITTEE
Tarek Abdelzaher, UIUC
Umesh Bellur, IIT, Bombay
Ken Birman, Cornell University
Rajkumar Buyya, Univ. of Melbourne
Rocky Chang, Hong Kong Polytechnic University
Yuan Chen, HP Labs
Alva Couch, Tufts University
Peter Dinda, Northwestern University
Fred Douglis, EMC
Renato Figueiredo, University of Florida
Mohamed Hefeeda, Qatar Computing Research Institute
Joe Hellerstein, Google
Geoff Jiang, NEC Labs
Jeff Kephart, IBM Research
Emre Kiciman, Microsoft Research
Fabio Kon, University of São Paulo
Michael Kozuch, Intel
Dejan Milojicic, HP Labs
Klara Nahrstedt, UIUC
Priya Narasimhan, CMU
Manish Parashar, Rutgers University
Ioan Raicu, Illinois Inst. of Technology
Omer Rana, Cardiff University
Masoud Sadjadi, Florida Intl. University
Rick Schlichting, AT&T Labs
Hartmut Schmeck, KIT
Karsten Schwan, Georgia Tech
Onn Shehory, IBM Research
Eno Thereska, Microsoft Research
Xiaoyun Zhu, VMware

-- 
=================================================================
Ioan Raicu, Ph.D.
Assistant Professor, Illinois Institute of Technology (IIT)
Guest Research Faculty, Argonne National Laboratory (ANL)
=================================================================
Data-Intensive Distributed Systems Laboratory, CS/IIT
Distributed Systems Laboratory, MCS/ANL
=================================================================
Cel:    1-847-722-0876
Office: 1-312-567-5704
Email:  iraicu@cs.iit.edu
Web:    http://www.cs.iit.edu/~iraicu/
Web:    http://datasys.cs.iit.edu/
=================================================================
=================================================================

^ permalink raw reply

* [PATCH v6 11/11] virtio: balloon: Add freeze, restore handlers to support S4
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

Handling balloon hibernate / restore is tricky.  If the balloon was
inflated before going into the hibernation state, upon resume, the host
will not have any memory of that.  Any pages that were passed on to the
host earlier would most likely be invalid, and the host will have to
re-balloon to the previous value to get in the pre-hibernate state.

So the only sane thing for the guest to do here is to discard all the
pages that were put in the balloon.  When to discard the pages is the
next question.

One solution is to deflate the balloon just before writing the image to
the disk (in the freeze() PM callback).  However, asking for pages from
the host just to discard them immediately after seems wasteful of
resources.  Hence, it makes sense to do this by just fudging our
counters soon after wakeup.  This means we don't deflate the balloon
before sleep, and also don't put unnecessary pressure on the host.

This also helps in the thaw case: if the freeze fails for whatever
reason, the balloon should continue to remain in the inflated state.
This was tested by issuing 'swapoff -a' and trying to go into the S4
state.  That fails, and the balloon stays inflated, as expected.  Both
the host and the guest are happy.

Finally, in the restore() callback, we empty the list of pages that were
previously given off to the host, add the appropriate number of pages to
the totalram_pages counter, reset the num_pages counter to 0, and
all is fine.

As a last step, delete the vqs on the freeze callback to prepare for
hibernation, and re-create them in the restore and thaw callbacks to
resume normal operation.

The kthread doesn't race with any operations here, since it's frozen
before the freeze() call and is thawed after the thaw() and restore()
callbacks, so we're safe with that.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/virtio/virtio_balloon.c |   47 +++++++++++++++++++++++++++++++++++++++
 1 files changed, 47 insertions(+), 0 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1ff3cf4..4c327c7 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -363,6 +363,48 @@ static void __devexit virtballoon_remove(struct virtio_device *vdev)
 	kfree(vb);
 }
 
+#ifdef CONFIG_PM
+static int virtballoon_freeze(struct virtio_device *vdev)
+{
+	/*
+	 * The kthread is already frozen by the PM core before this
+	 * function is called.
+	 */
+
+	/* Ensure we don't get any more requests from the host */
+	vdev->config->reset(vdev);
+	vdev->config->del_vqs(vdev);
+	return 0;
+}
+
+static int virtballoon_thaw(struct virtio_device *vdev)
+{
+	return init_vqs(vdev->priv);
+}
+
+static int virtballoon_restore(struct virtio_device *vdev)
+{
+	struct virtio_balloon *vb = vdev->priv;
+	struct page *page, *page2;
+
+	/* We're starting from a clean slate */
+	vb->num_pages = 0;
+
+	/*
+	 * If a request wasn't complete at the time of freezing, this
+	 * could have been set.
+	 */
+	vb->need_stats_update = 0;
+
+	/* We don't have these pages in the balloon anymore! */
+	list_for_each_entry_safe(page, page2, &vb->pages, lru) {
+		list_del(&page->lru);
+		totalram_pages++;
+	}
+	return init_vqs(vdev->priv);
+}
+#endif
+
 static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
@@ -377,6 +419,11 @@ static struct virtio_driver virtio_balloon_driver = {
 	.probe =	virtballoon_probe,
 	.remove =	__devexit_p(virtballoon_remove),
 	.config_changed = virtballoon_changed,
+#ifdef CONFIG_PM
+	.freeze	=	virtballoon_freeze,
+	.restore =	virtballoon_restore,
+	.thaw =		virtballoon_thaw,
+#endif
 };
 
 static int __init init(void)
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 10/11] virtio: balloon: Move vq initialization into separate function
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

The probe and PM restore functions will share this code.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/virtio/virtio_balloon.c |   48 ++++++++++++++++++++++++--------------
 1 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 94fd738..1ff3cf4 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -275,32 +275,21 @@ static int balloon(void *_vballoon)
 	return 0;
 }
 
-static int virtballoon_probe(struct virtio_device *vdev)
+static int init_vqs(struct virtio_balloon *vb)
 {
-	struct virtio_balloon *vb;
 	struct virtqueue *vqs[3];
 	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
 	const char *names[] = { "inflate", "deflate", "stats" };
 	int err, nvqs;
 
-	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
-	if (!vb) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	INIT_LIST_HEAD(&vb->pages);
-	vb->num_pages = 0;
-	init_waitqueue_head(&vb->config_change);
-	vb->vdev = vdev;
-	vb->need_stats_update = 0;
-
-	/* We expect two virtqueues: inflate and deflate,
-	 * and optionally stat. */
+	/*
+	 * We expect two virtqueues: inflate and deflate, and
+	 * optionally stat.
+	 */
 	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
+	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names);
 	if (err)
-		goto out_free_vb;
+		return err;
 
 	vb->inflate_vq = vqs[0];
 	vb->deflate_vq = vqs[1];
@@ -317,6 +306,29 @@ static int virtballoon_probe(struct virtio_device *vdev)
 			BUG();
 		virtqueue_kick(vb->stats_vq);
 	}
+	return 0;
+}
+
+static int virtballoon_probe(struct virtio_device *vdev)
+{
+	struct virtio_balloon *vb;
+	int err;
+
+	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
+	if (!vb) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&vb->pages);
+	vb->num_pages = 0;
+	init_waitqueue_head(&vb->config_change);
+	vb->vdev = vdev;
+	vb->need_stats_update = 0;
+
+	err = init_vqs(vb);
+	if (err)
+		goto out_free_vb;
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 09/11] virtio: net: Add freeze, restore handlers to support S4
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

Remove all the vqs, disable napi and detach from the netdev on
hibernation.

Re-create vqs after restoring from a hibernated image, re-enable napi
and re-attach the netdev.  This keeps networking working across
hibernation.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/net/virtio_net.c |   46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 46 insertions(+), 0 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7a2a5bf..b31670f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1159,6 +1159,48 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
 	free_netdev(vi->dev);
 }
 
+#ifdef CONFIG_PM
+static int virtnet_freeze(struct virtio_device *vdev)
+{
+	struct virtnet_info *vi = vdev->priv;
+
+	virtqueue_disable_cb(vi->rvq);
+	virtqueue_disable_cb(vi->svq);
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
+		virtqueue_disable_cb(vi->cvq);
+
+	netif_device_detach(vi->dev);
+	cancel_delayed_work_sync(&vi->refill);
+
+	if (netif_running(vi->dev))
+		napi_disable(&vi->napi);
+
+	remove_vq_common(vi);
+
+	return 0;
+}
+
+static int virtnet_restore(struct virtio_device *vdev)
+{
+	struct virtnet_info *vi = vdev->priv;
+	int err;
+
+	err = init_vqs(vi);
+	if (err)
+		return err;
+
+	if (netif_running(vi->dev))
+		virtnet_napi_enable(vi);
+
+	netif_device_attach(vi->dev);
+
+	if (!try_fill_recv(vi, GFP_KERNEL))
+		schedule_delayed_work(&vi->refill, 0);
+
+	return 0;
+}
+#endif
+
 static struct virtio_device_id id_table[] = {
 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
 	{ 0 },
@@ -1183,6 +1225,10 @@ static struct virtio_driver virtio_net_driver = {
 	.probe =	virtnet_probe,
 	.remove =	__devexit_p(virtnet_remove),
 	.config_changed = virtnet_config_changed,
+#ifdef CONFIG_PM
+	.freeze =	virtnet_freeze,
+	.restore =	virtnet_restore,
+#endif
 };
 
 static int __init init(void)
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 08/11] virtio: net: Move vq and vq buf removal into separate function
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

The remove and PM freeze functions will share this code.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/net/virtio_net.c |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ec8c400..7a2a5bf 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1134,22 +1134,26 @@ static void free_unused_bufs(struct virtnet_info *vi)
 	BUG_ON(vi->num != 0);
 }
 
-static void __devexit virtnet_remove(struct virtio_device *vdev)
+static void remove_vq_common(struct virtnet_info *vi)
 {
-	struct virtnet_info *vi = vdev->priv;
-
-	/* Stop all the virtqueues. */
-	vdev->config->reset(vdev);
-
-	unregister_netdev(vi->dev);
+	vi->vdev->config->reset(vi->vdev);
 
 	/* Free unused buffers in both send and recv, if any. */
 	free_unused_bufs(vi);
 
-	vdev->config->del_vqs(vi->vdev);
+	vi->vdev->config->del_vqs(vi->vdev);
 
 	while (vi->pages)
 		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
+}
+
+static void __devexit virtnet_remove(struct virtio_device *vdev)
+{
+	struct virtnet_info *vi = vdev->priv;
+
+	unregister_netdev(vi->dev);
+
+	remove_vq_common(vi);
 
 	free_percpu(vi->stats);
 	free_netdev(vi->dev);
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 07/11] virtio: net: Move vq initialization into separate function
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

The probe and PM restore functions will share this code.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/net/virtio_net.c |   47 +++++++++++++++++++++++++++------------------
 1 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1b67e8a..ec8c400 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -966,15 +966,38 @@ static void virtnet_config_changed(struct virtio_device *vdev)
 	virtnet_update_status(vi);
 }
 
+static int init_vqs(struct virtnet_info *vi)
+{
+	struct virtqueue *vqs[3];
+	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
+	const char *names[] = { "input", "output", "control" };
+	int nvqs, err;
+
+	/* We expect two virtqueues, receive then send,
+	 * and optionally control. */
+	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
+
+	err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names);
+	if (err)
+		return err;
+
+	vi->rvq = vqs[0];
+	vi->svq = vqs[1];
+
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
+		vi->cvq = vqs[2];
+
+		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
+			vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
+	}
+	return 0;
+}
+
 static int virtnet_probe(struct virtio_device *vdev)
 {
 	int err;
 	struct net_device *dev;
 	struct virtnet_info *vi;
-	struct virtqueue *vqs[3];
-	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
-	const char *names[] = { "input", "output", "control" };
-	int nvqs;
 
 	/* Allocate ourselves a network device with room for our info */
 	dev = alloc_etherdev(sizeof(struct virtnet_info));
@@ -1046,24 +1069,10 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
-	/* We expect two virtqueues, receive then send,
-	 * and optionally control. */
-	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
-
-	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
+	err = init_vqs(vi);
 	if (err)
 		goto free_stats;
 
-	vi->rvq = vqs[0];
-	vi->svq = vqs[1];
-
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
-		vi->cvq = vqs[2];
-
-		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
-			dev->features |= NETIF_F_HW_VLAN_FILTER;
-	}
-
 	err = register_netdev(dev);
 	if (err) {
 		pr_debug("virtio_net: registering device failed\n");
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 06/11] virtio: blk: Add freeze, restore handlers to support S4
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

Delete the vq and flush any pending requests from the block queue on the
freeze callback to prepare for hibernation.

Re-create the vq in the restore callback to resume normal function.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/block/virtio_blk.c |   44 ++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 44 insertions(+), 0 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 86c9c5b..e3bd9dc 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -588,6 +588,46 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
 	ida_simple_remove(&vd_index_ida, index);
 }
 
+#ifdef CONFIG_PM
+static int virtblk_freeze(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+
+	/* Ensure we don't receive any more interrupts */
+	vdev->config->reset(vdev);
+
+	/* Prevent config work handler from accessing the device. */
+	mutex_lock(&vblk->config_lock);
+	vblk->config_enable = false;
+	mutex_unlock(&vblk->config_lock);
+
+	flush_work(&vblk->config_work);
+
+	spin_lock_irq(vblk->disk->queue->queue_lock);
+	blk_stop_queue(vblk->disk->queue);
+	spin_unlock_irq(vblk->disk->queue->queue_lock);
+	blk_sync_queue(vblk->disk->queue);
+
+	vdev->config->del_vqs(vdev);
+	return 0;
+}
+
+static int virtblk_restore(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+	int ret;
+
+	vblk->config_enable = true;
+	ret = init_vq(vdev->priv);
+	if (!ret) {
+		spin_lock_irq(vblk->disk->queue->queue_lock);
+		blk_start_queue(vblk->disk->queue);
+		spin_unlock_irq(vblk->disk->queue->queue_lock);
+	}
+	return ret;
+}
+#endif
+
 static const struct virtio_device_id id_table[] = {
 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
 	{ 0 },
@@ -613,6 +653,10 @@ static struct virtio_driver __refdata virtio_blk = {
 	.probe			= virtblk_probe,
 	.remove			= __devexit_p(virtblk_remove),
 	.config_changed		= virtblk_config_changed,
+#ifdef CONFIG_PM
+	.freeze			= virtblk_freeze,
+	.restore		= virtblk_restore,
+#endif
 };
 
 static int __init init(void)
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 05/11] virtio: blk: Move vq initialization to separate function
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

The probe and PM restore functions will share this code.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/block/virtio_blk.c |   19 ++++++++++++++-----
 1 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 34633f3..86c9c5b 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -362,6 +362,18 @@ static void virtblk_config_changed(struct virtio_device *vdev)
 	queue_work(virtblk_wq, &vblk->config_work);
 }
 
+static int init_vq(struct virtio_blk *vblk)
+{
+	int err = 0;
+
+	/* We expect one virtqueue, for output. */
+	vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests");
+	if (IS_ERR(vblk->vq))
+		err = PTR_ERR(vblk->vq);
+
+	return err;
+}
+
 static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
@@ -405,12 +417,9 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
 	vblk->config_enable = true;
 
-	/* We expect one virtqueue, for output. */
-	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
-	if (IS_ERR(vblk->vq)) {
-		err = PTR_ERR(vblk->vq);
+	err = init_vq(vblk);
+	if (err)
 		goto out_free_vblk;
-	}
 
 	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
 	if (!vblk->pool) {
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 04/11] virtio: console: Add freeze and restore handlers to support S4
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

Remove all vqs and associated buffers in the freeze callback which
prepares us to go into hibernation state.  On restore, re-create all the
vqs and populate the input vqs with buffers to get to the pre-hibernate
state.

Note: Any outstanding unconsumed buffers are discarded; which means
there's a possibility of data loss in case the host or the guest didn't
consume any data already present in the vqs.  This can be addressed in a
later patch series, perhaps in virtio common code.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/char/virtio_console.c |   58 +++++++++++++++++++++++++++++++++++++++++
 1 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index e14f5aa..fd2fd6f 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1844,6 +1844,60 @@ static unsigned int features[] = {
 	VIRTIO_CONSOLE_F_MULTIPORT,
 };
 
+#ifdef CONFIG_PM
+static int virtcons_freeze(struct virtio_device *vdev)
+{
+	struct ports_device *portdev;
+	struct port *port;
+
+	portdev = vdev->priv;
+
+	vdev->config->reset(vdev);
+
+	cancel_work_sync(&portdev->control_work);
+	remove_controlq_data(portdev);
+
+	list_for_each_entry(port, &portdev->ports, list) {
+		/*
+		 * We'll ask the host later if the new invocation has
+		 * the port opened or closed.
+		 */
+		port->host_connected = false;
+		remove_port_data(port);
+	}
+	remove_vqs(portdev);
+
+	return 0;
+}
+
+static int virtcons_restore(struct virtio_device *vdev)
+{
+	struct ports_device *portdev;
+	struct port *port;
+	int ret;
+
+	portdev = vdev->priv;
+
+	ret = init_vqs(portdev);
+	if (ret)
+		return ret;
+
+	if (use_multiport(portdev))
+		fill_queue(portdev->c_ivq, &portdev->cvq_lock);
+
+	list_for_each_entry(port, &portdev->ports, list) {
+		port->in_vq = portdev->in_vqs[port->id];
+		port->out_vq = portdev->out_vqs[port->id];
+
+		fill_queue(port->in_vq, &port->inbuf_lock);
+
+		/* Get port open/close status on the host */
+		send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1);
+	}
+	return 0;
+}
+#endif
+
 static struct virtio_driver virtio_console = {
 	.feature_table = features,
 	.feature_table_size = ARRAY_SIZE(features),
@@ -1853,6 +1907,10 @@ static struct virtio_driver virtio_console = {
 	.probe =	virtcons_probe,
 	.remove =	virtcons_remove,
 	.config_changed = config_intr,
+#ifdef CONFIG_PM
+	.freeze =	virtcons_freeze,
+	.restore =	virtcons_restore,
+#endif
 };
 
 static int __init init(void)
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 03/11] virtio: console: Move vq and vq buf removal into separate functions
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

This common code will be shared with the PM freeze function.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/char/virtio_console.c |   68 ++++++++++++++++++++++++-----------------
 1 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 8e3c46d..e14f5aa 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1271,6 +1271,20 @@ static void remove_port(struct kref *kref)
 	kfree(port);
 }
 
+static void remove_port_data(struct port *port)
+{
+	struct port_buffer *buf;
+
+	/* Remove unused data this port might have received. */
+	discard_port_data(port);
+
+	reclaim_consumed_buffers(port);
+
+	/* Remove buffers we queued up for the Host to send us data in. */
+	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
+		free_buf(buf);
+}
+
 /*
  * Port got unplugged.  Remove port from portdev's list and drop the
  * kref reference.  If no userspace has this port opened, it will
@@ -1278,8 +1292,6 @@ static void remove_port(struct kref *kref)
  */
 static void unplug_port(struct port *port)
 {
-	struct port_buffer *buf;
-
 	spin_lock_irq(&port->portdev->ports_lock);
 	list_del(&port->list);
 	spin_unlock_irq(&port->portdev->ports_lock);
@@ -1300,14 +1312,7 @@ static void unplug_port(struct port *port)
 		hvc_remove(port->cons.hvc);
 	}
 
-	/* Remove unused data this port might have received. */
-	discard_port_data(port);
-
-	reclaim_consumed_buffers(port);
-
-	/* Remove buffers we queued up for the Host to send us data in. */
-	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
-		free_buf(buf);
+	remove_port_data(port);
 
 	/*
 	 * We should just assume the device itself has gone off --
@@ -1659,6 +1664,28 @@ static const struct file_operations portdev_fops = {
 	.owner = THIS_MODULE,
 };
 
+static void remove_vqs(struct ports_device *portdev)
+{
+	portdev->vdev->config->del_vqs(portdev->vdev);
+	kfree(portdev->in_vqs);
+	kfree(portdev->out_vqs);
+}
+
+static void remove_controlq_data(struct ports_device *portdev)
+{
+	struct port_buffer *buf;
+	unsigned int len;
+
+	if (!use_multiport(portdev))
+		return;
+
+	while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
+		free_buf(buf);
+
+	while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
+		free_buf(buf);
+}
+
 /*
  * Once we're further in boot, we get probed like any other virtio
  * device.
@@ -1764,9 +1791,7 @@ free_vqs:
 	/* The host might want to notify mgmt sw about device add failure */
 	__send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID,
 			   VIRTIO_CONSOLE_DEVICE_READY, 0);
-	vdev->config->del_vqs(vdev);
-	kfree(portdev->in_vqs);
-	kfree(portdev->out_vqs);
+	remove_vqs(portdev);
 free_chrdev:
 	unregister_chrdev(portdev->chr_major, "virtio-portsdev");
 free:
@@ -1804,21 +1829,8 @@ static void virtcons_remove(struct virtio_device *vdev)
 	 * have to just stop using the port, as the vqs are going
 	 * away.
 	 */
-	if (use_multiport(portdev)) {
-		struct port_buffer *buf;
-		unsigned int len;
-
-		while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
-			free_buf(buf);
-
-		while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
-			free_buf(buf);
-	}
-
-	vdev->config->del_vqs(vdev);
-	kfree(portdev->in_vqs);
-	kfree(portdev->out_vqs);
-
+	remove_controlq_data(portdev);
+	remove_vqs(portdev);
 	kfree(portdev);
 }
 
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 02/11] virtio: pci: add PM notification handlers for restore, freeze, thaw, poweroff
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

Handle thaw, restore and freeze notifications from the PM core.  Expose
these to individual virtio drivers that can quiesce and resume vq
operations.  For drivers not implementing the thaw() method, use the
restore method instead.

These functions also save device-specific data so that the device can be
put in pre-suspend state after resume, and disable and enable the PCI
device in the freeze and resume functions, respectively.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/virtio/virtio_pci.c |   94 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/virtio.h      |    5 ++
 2 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 23e1532..63bf242 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -55,6 +55,10 @@ struct virtio_pci_device
 	unsigned msix_vectors;
 	/* Vectors allocated, excluding per-vq vectors if any */
 	unsigned msix_used_vectors;
+
+	/* Status saved during hibernate/restore */
+	u8 saved_status;
+
 	/* Whether we have vector per vq */
 	bool per_vq_vectors;
 };
@@ -726,9 +730,95 @@ static int virtio_pci_resume(struct device *dev)
 	return 0;
 }
 
+static int virtio_pci_freeze(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	ret = 0;
+	vp_dev->saved_status = vp_get_status(&vp_dev->vdev);
+	if (drv && drv->freeze)
+		ret = drv->freeze(&vp_dev->vdev);
+
+	if (!ret)
+		pci_disable_device(pci_dev);
+	return ret;
+}
+
+static int restore_common(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	int ret;
+
+	ret = pci_enable_device(pci_dev);
+	if (ret)
+		return ret;
+	pci_set_master(pci_dev);
+	vp_finalize_features(&vp_dev->vdev);
+
+	return ret;
+}
+
+static int virtio_pci_thaw(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	ret = restore_common(dev);
+	if (ret)
+		return ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	if (drv && drv->thaw)
+		ret = drv->thaw(&vp_dev->vdev);
+	else if (drv && drv->restore)
+		ret = drv->restore(&vp_dev->vdev);
+
+	/* Finally, tell the device we're all set */
+	if (!ret)
+		vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+
+	return ret;
+}
+
+static int virtio_pci_restore(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	ret = restore_common(dev);
+	if (!ret && drv && drv->restore)
+		ret = drv->restore(&vp_dev->vdev);
+
+	/* Finally, tell the device we're all set */
+	if (!ret)
+		vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+
+	return ret;
+}
+
 static const struct dev_pm_ops virtio_pci_pm_ops = {
-	.suspend = virtio_pci_suspend,
-	.resume  = virtio_pci_resume,
+	.suspend	= virtio_pci_suspend,
+	.resume		= virtio_pci_resume,
+	.freeze		= virtio_pci_freeze,
+	.thaw		= virtio_pci_thaw,
+	.restore	= virtio_pci_restore,
+	.poweroff	= virtio_pci_suspend,
 };
 #endif
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4c069d8..92902ab 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -146,6 +146,11 @@ struct virtio_driver {
 	int (*probe)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);
 	void (*config_changed)(struct virtio_device *dev);
+#ifdef CONFIG_PM
+	int (*freeze)(struct virtio_device *dev);
+	int (*thaw)(struct virtio_device *dev);
+	int (*restore)(struct virtio_device *dev);
+#endif
 };
 
 int register_virtio_driver(struct virtio_driver *drv);
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 01/11] virtio: pci: switch to new PM API
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin
In-Reply-To: <cover.1324553223.git.amit.shah@redhat.com>

The older PM API doesn't have a way to get notifications on hibernate
events.  Switch to the newer one that gives us those notifications.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/virtio/virtio_pci.c |   16 ++++++++++++----
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 03d1984..23e1532 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -708,19 +708,28 @@ static void __devexit virtio_pci_remove(struct pci_dev *pci_dev)
 }
 
 #ifdef CONFIG_PM
-static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state)
+static int virtio_pci_suspend(struct device *dev)
 {
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
 	pci_save_state(pci_dev);
 	pci_set_power_state(pci_dev, PCI_D3hot);
 	return 0;
 }
 
-static int virtio_pci_resume(struct pci_dev *pci_dev)
+static int virtio_pci_resume(struct device *dev)
 {
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
 	pci_restore_state(pci_dev);
 	pci_set_power_state(pci_dev, PCI_D0);
 	return 0;
 }
+
+static const struct dev_pm_ops virtio_pci_pm_ops = {
+	.suspend = virtio_pci_suspend,
+	.resume  = virtio_pci_resume,
+};
 #endif
 
 static struct pci_driver virtio_pci_driver = {
@@ -729,8 +738,7 @@ static struct pci_driver virtio_pci_driver = {
 	.probe		= virtio_pci_probe,
 	.remove		= __devexit_p(virtio_pci_remove),
 #ifdef CONFIG_PM
-	.suspend	= virtio_pci_suspend,
-	.resume		= virtio_pci_resume,
+	.driver.pm	= &virtio_pci_pm_ops,
 #endif
 };
 
-- 
1.7.7.4

^ permalink raw reply related

* [PATCH v6 00/11] virtio: s4 support
From: Amit Shah @ 2011-12-22 11:28 UTC (permalink / raw)
  To: Virtualization List; +Cc: Amit Shah, linux-pm, linux-kernel, Michael S. Tsirkin

Hi,

These patches add support for S4 to virtio (pci) and all drivers.

For each driver, all vqs are removed before hibernation, and then
re-created after restore.  Some driver-specific uninit and init work
is also done in the freeze and restore functions.

All the drivers in testing work fine:

* virtio-blk is used for the only disk in the VM, IO works fine before
  and after.  'dd if=/dev/zero of=/tmp/bigfile bs=1024 count=200000'
  across S4 gives same sha1sum for the file in the guest as well as
  one that's created without invoking S4.

* virtio-console: port IO keeps working fine before and after.
  * If a port is waiting for data from the host (blocking read(2)
    call), this works fine in both the cases: host-side connection is
    available or unavailable after resume.  In case the host-side
    connection isn't available, the blocking call is terminated.  If
    it is available, the call continues to remain in blocked state
    till further data arrives.

* virtio-net: ping remains active across S4.

* virtio-balloon: Works fine before and after.  Forgets the ballooned
  value across S4 (see details in commit log). Maintains ballooned
  value on failed freeze.

All of these tests are run in parallel.

v6:
 - Based on top of block and net module remove race fix patches

v5:
 - Enable virtio device after the driver-specific restore/thaw
   callbacks are completed.
 - Balloon: Don't exit kthread on freeze.  It's already frozen by the
   PM API before invoking the freeze callback, so exiting is pointless.

v4:
 - Disable / enable napi across S4 (Michael S. Tsirkin)
 - Balloon: lots of improvements (I had neglected this driver thinking
   it was a simple one, but this one needed the most thought!  Check
   the commit log for patch 12 for details.)
 - Net, Blk: Reset device as the first operation on freeze

v3:
 - Reset vqs before deleting them (Sasha Levin)
 - Flush block queue before freeze (Rusty)
 - Detach netdev before freeze (Michael S. Tsirkin)

v2:
 - fix checkpatch errors/warnings

Amit Shah (11):
  virtio: pci: switch to new PM API
  virtio: pci: add PM notification handlers for restore, freeze, thaw,
    poweroff
  virtio: console: Move vq and vq buf removal into separate functions
  virtio: console: Add freeze and restore handlers to support S4
  virtio: blk: Move vq initialization to separate function
  virtio: blk: Add freeze, restore handlers to support S4
  virtio: net: Move vq initialization into separate function
  virtio: net: Move vq and vq buf removal into separate function
  virtio: net: Add freeze, restore handlers to support S4
  virtio: balloon: Move vq initialization into separate function
  virtio: balloon: Add freeze, restore handlers to support S4

 drivers/block/virtio_blk.c      |   63 ++++++++++++++++++--
 drivers/char/virtio_console.c   |  126 ++++++++++++++++++++++++++++++---------
 drivers/net/virtio_net.c        |  113 ++++++++++++++++++++++++++--------
 drivers/virtio/virtio_balloon.c |   95 ++++++++++++++++++++++++------
 drivers/virtio/virtio_pci.c     |  106 +++++++++++++++++++++++++++++++-
 include/linux/virtio.h          |    5 ++
 6 files changed, 426 insertions(+), 82 deletions(-)

-- 
1.7.7.4

^ permalink raw reply

* Re: [PATCH RFC] virtio_net: fix refill related races
From: Rusty Russell @ 2011-12-22  3:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Tejun Heo, Amit Shah, netdev, linux-kernel, virtualization
In-Reply-To: <20111221090637.GA31592@redhat.com>

On Wed, 21 Dec 2011 11:06:37 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Wed, Dec 21, 2011 at 10:13:18AM +1030, Rusty Russell wrote:
> > On Tue, 20 Dec 2011 21:45:19 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Tue, Dec 20, 2011 at 11:31:54AM -0800, Tejun Heo wrote:
> > > > On Tue, Dec 20, 2011 at 09:30:55PM +0200, Michael S. Tsirkin wrote:
> > > > > Hmm, in that case it looks like a nasty race could get
> > > > > triggered, with try_fill_recv run on multiple CPUs in parallel,
> > > > > corrupting the linked list within the vq.
> > > > > 
> > > > > Using the mutex as my patch did will fix that naturally, as well.
> > > > 
> > > > Don't know the code but just use nrt wq.  There's even a system one
> > > > called system_nrq_wq.
> > > > 
> > > > Thanks.
> > > 
> > > We can, but we need the mutex for other reasons, anyway.
> > 
> > Well, here's the alternate approach.  What do you think?
> 
> It looks very clean, thanks. Some documentation suggestions below.
> Also - Cc stable on this and the block patch?

AFAICT we haven't seen this bug, and theoretical bugs don't get into
-stable.

> > Finding two wq issues makes you justifiably cautious, but it almost
> > feels like giving up to simply wrap it in a lock.  The APIs are designed
> > to let us do it without a lock; I was just using them wrong.
> 
> One thing I note is that this scheme works because there's a single
> entity disabling/enabling napi and the refill thread.
> So it's possible that Amit will need to add a lock and track NAPI
> state anyway to make suspend work. But we'll see.

Fixed typo, documented the locking, queued for -next.

Thanks!
Rusty.

From: Rusty Russell <rusty@rustcorp.com.au>
Subject: virtio_net: set/cancel work on ndo_open/ndo_stop

Michael S. Tsirkin noticed that we could run the refill work after
ndo_close, which can re-enable napi - we don't disable it until
virtnet_remove.  This is clearly wrong, so move the workqueue control
to ndo_open and ndo_stop (aka. virtnet_open and virtnet_close).

One subtle point: virtnet_probe() could simply fail if it couldn't
allocate a receive buffer, but that's less polite in virtnet_open() so
we schedule a refill as we do in the normal receive path if we run out
of memory.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/virtio_net.c |   17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -439,7 +439,13 @@ static int add_recvbuf_mergeable(struct 
 	return err;
 }
 
-/* Returns false if we couldn't fill entirely (OOM). */
+/*
+ * Returns false if we couldn't fill entirely (OOM).
+ *
+ * Normally run in the receive path, but can also be run from ndo_open
+ * before we're receiving packets, or from refill_work which is
+ * careful to disable receiving (using napi_disable).
+ */
 static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
 {
 	int err;
@@ -719,6 +725,10 @@ static int virtnet_open(struct net_devic
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
+	/* Make sure we have some buffers: if oom use wq. */
+	if (!try_fill_recv(vi, GFP_KERNEL))
+		schedule_delayed_work(&vi->refill, 0);
+
 	virtnet_napi_enable(vi);
 	return 0;
 }
@@ -772,6 +782,8 @@ static int virtnet_close(struct net_devi
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
+	/* Make sure refill_work doesn't re-enable napi! */
+	cancel_delayed_work_sync(&vi->refill);
 	napi_disable(&vi->napi);
 
 	return 0;
@@ -1082,7 +1094,6 @@ static int virtnet_probe(struct virtio_d
 
 unregister:
 	unregister_netdev(dev);
-	cancel_delayed_work_sync(&vi->refill);
 free_vqs:
 	vdev->config->del_vqs(vdev);
 free_stats:
@@ -1121,9 +1132,7 @@ static void __devexit virtnet_remove(str
 	/* Stop all the virtqueues. */
 	vdev->config->reset(vdev);
 
-
 	unregister_netdev(vi->dev);
-	cancel_delayed_work_sync(&vi->refill);
 
 	/* Free unused buffers in both send and recv, if any. */
 	free_unused_bufs(vi);

^ permalink raw reply

* Re: [RFC 7/11] virtio_pci: new, capability-aware driver.
From: Michael S. Tsirkin @ 2011-12-21  9:19 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Christian Borntraeger, Sasha Levin, Pawel Moll, virtualization
In-Reply-To: <878vm6daqy.fsf@rustcorp.com.au>

On Wed, Dec 21, 2011 at 11:03:25AM +1030, Rusty Russell wrote:
> > > What does the host do
> > > if the guest screws things up?  How long do you wait for them to
> > > complete the seqlock?  Or does it save the old version for use in the
> > > duration?
> > 
> > Yes, it will have to only apply the change when seqlock is dropped.
> 
> If the seqlock is in normal memory, how does it get notified?  It would
> have to poll.  That's annoying, since you don't know when to give up and
> declare the device terminally broken.

OK, so you think all devices need a config vq then?
It actually has other benefits:
- devices don't need a design choice between configuration and vq
- minimum number of MSI vectors per device will be 1, not 2
- reduced PCI memory usage
- configuration updates become more lightweight

-- 
MST

^ permalink raw reply

* Re: [PATCH RFC] virtio_net: fix refill related races
From: Michael S. Tsirkin @ 2011-12-21  9:06 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Tejun Heo, Amit Shah, netdev, linux-kernel, virtualization
In-Reply-To: <87hb0udd2h.fsf@rustcorp.com.au>

On Wed, Dec 21, 2011 at 10:13:18AM +1030, Rusty Russell wrote:
> On Tue, 20 Dec 2011 21:45:19 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Tue, Dec 20, 2011 at 11:31:54AM -0800, Tejun Heo wrote:
> > > On Tue, Dec 20, 2011 at 09:30:55PM +0200, Michael S. Tsirkin wrote:
> > > > Hmm, in that case it looks like a nasty race could get
> > > > triggered, with try_fill_recv run on multiple CPUs in parallel,
> > > > corrupting the linked list within the vq.
> > > > 
> > > > Using the mutex as my patch did will fix that naturally, as well.
> > > 
> > > Don't know the code but just use nrt wq.  There's even a system one
> > > called system_nrq_wq.
> > > 
> > > Thanks.
> > 
> > We can, but we need the mutex for other reasons, anyway.
> 
> Well, here's the alternate approach.  What do you think?

It looks very clean, thanks. Some documentation suggestions below.
Also - Cc stable on this and the block patch?

> Finding two wq issues makes you justifiably cautious, but it almost
> feels like giving up to simply wrap it in a lock.  The APIs are designed
> to let us do it without a lock; I was just using them wrong.

One thing I note is that this scheme works because there's a single
entity disabling/enabling napi and the refill thread.
So it's possible that Amit will need to add a lock and track NAPI
state anyway to make suspend work. But we'll see.

> Two patches in one mail.  It's gauche, but it's an RFC only (untested).
> 
> Cheers,
> Rusty.
> 
> virtio_net: set/cancel work on ndo_open/ndo_stop
> 
> Michael S. Tsirkin noticed that we could run the refill work after
> ndo_close, which can re-enable napi - we don't disable it until
> virtnet_remove.  This is clearly wrong, so move the workqueue control
> to ndo_open and ndo_stop (aka. virtnet_open and virtnet_close).
> 
> One subtle point: virtnet_probe() could simply fail if it couldn't
> allocate a receive buffer, but that's less polite in virtnet_open() so
> we schedule a refill as we do in the normal receive path if we run out
> of memory.
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -719,6 +719,10 @@ static int virtnet_open(struct net_devic
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
>  
> +	/* Make sure we have some buffersl if oom use wq. */

Typo :)

> +	if (!try_fill_recv(vi, GFP_KERNEL))
> +		schedule_delayed_work(&vi->refill, 0);
> +
>  	virtnet_napi_enable(vi);
>  	return 0;
>  }
> @@ -772,6 +776,8 @@ static int virtnet_close(struct net_devi
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
>  
> +	/* Make sure refill_work doesn't re-enable napi! */
> +	cancel_delayed_work_sync(&vi->refill);
>  	napi_disable(&vi->napi);
>  
>  	return 0;
> @@ -1082,7 +1088,6 @@ static int virtnet_probe(struct virtio_d
>  
>  unregister:
>  	unregister_netdev(dev);
> -	cancel_delayed_work_sync(&vi->refill);
>  free_vqs:
>  	vdev->config->del_vqs(vdev);
>  free_stats:
> @@ -1121,9 +1126,7 @@ static void __devexit virtnet_remove(str
>  	/* Stop all the virtqueues. */
>  	vdev->config->reset(vdev);
>  
> -
>  	unregister_netdev(vi->dev);
> -	cancel_delayed_work_sync(&vi->refill);
>  
>  	/* Free unused buffers in both send and recv, if any. */
>  	free_unused_bufs(vi);
> 
> 
> virtio_net: use non-reentrant workqueue.
> 
> Michael S. Tsirkin also noticed that we could run the refill work
> multiple CPUs: if we kick off a refill on one CPU and then on another,
> they would both manipulate the queue at the same time (they use
> napi_disable to avoid racing against the receive handler itself).
> 
> Tejun points out that this is what the WQ_NON_REENTRANT flag is for,
> and that there is a convenient system kthread we can use.
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -501,7 +501,7 @@ static void refill_work(struct work_stru
>  	/* In theory, this can happen: if we don't get any buffers in
>  	 * we will *never* try to fill again. */
>  	if (still_empty)
> -		schedule_delayed_work(&vi->refill, HZ/2);
> +		queue_delayed_work(system_nrt_wq, &vi->refill, HZ/2);
>  }
>  
>  static int virtnet_poll(struct napi_struct *napi, int budget)
> @@ -520,7 +520,7 @@ again:
>  
>  	if (vi->num < vi->max / 2) {
>  		if (!try_fill_recv(vi, GFP_ATOMIC))
> -			schedule_delayed_work(&vi->refill, 0);
> +			queue_delayed_work(system_nrt_wq, &vi->refill, 0);
>  	}
>  
>  	/* Out of packets? */
> @@ -721,7 +721,7 @@ static int virtnet_open(struct net_devic
>  
>  	/* Make sure we have some buffersl if oom use wq. */
>  	if (!try_fill_recv(vi, GFP_KERNEL))
> -		schedule_delayed_work(&vi->refill, 0);
> +		queue_delayed_work(&system_nrt_wq, &vi->refill, 0);
>  
>  	virtnet_napi_enable(vi);
>  	return 0;

Maybe document some rules:
- try_fill_recv must always run from napi or with napi disabled ...
- refill only runs when device is open

-- 
MST

^ permalink raw reply

* [PATCH] virtio: harsher barriers for virtio-mmio.
From: Rusty Russell @ 2011-12-21  5:21 UTC (permalink / raw)
  To: torvalds
  Cc: kvm, Michael S. Tsirkin, Benjamin Herrenschmidt, linux-kernel,
	virtualization, rhod, linux-arm-kernel

We were cheating with our barriers; using the smp ones rather than the
real device ones.  That was fine, until virtio-mmio came along, which
could be talking to a real device (a non-SMP CPU).

Unfortunately, just putting back the real barriers (reverting
d57ed95d) causes a performance regression on virtio-pci.  In
particular, Amos reports netbench's TCP_RR over virtio_net CPU
utilization increased up to 35% while throughput went down by up to
14%.

By comparison, this branch is in the noise.

Reference: https://lkml.org/lkml/2011/12/11/22

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c |   10 ++++++----
 drivers/s390/kvm/kvm_virtio.c  |    2 +-
 drivers/virtio/virtio_mmio.c   |    7 ++++---
 drivers/virtio/virtio_pci.c    |    4 ++--
 drivers/virtio/virtio_ring.c   |   34 +++++++++++++++++++++-------------
 include/linux/virtio_ring.h    |    1 +
 tools/virtio/linux/virtio.h    |    1 +
 tools/virtio/virtio_test.c     |    3 ++-
 8 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -291,11 +291,13 @@ static struct virtqueue *lg_find_vq(stru
 	}
 
 	/*
-	 * OK, tell virtio_ring.c to set up a virtqueue now we know its size
-	 * and we've got a pointer to its pages.
+	 * OK, tell virtio_ring.c to set up a virtqueue now we know its size
+	 * and we've got a pointer to its pages.  Note that we set weak_barriers
+	 * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu
+	 * barriers.
 	 */
-	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
-				 vdev, lvq->pages, lg_notify, callback, name);
+	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev,
+				 true, lvq->pages, lg_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -198,7 +198,7 @@ static struct virtqueue *kvm_find_vq(str
 		goto out;
 
 	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
-				 vdev, (void *) config->address,
+				 vdev, true, (void *) config->address,
 				 kvm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -309,9 +309,10 @@ static struct virtqueue *vm_setup_vq(str
 	writel(virt_to_phys(info->queue) >> PAGE_SHIFT,
 			vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
 
-	/* Create the vring */
-	vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN,
-				 vdev, info->queue, vm_notify, callback, name);
+	/* Create the vring: no weak barriers, the other side is could
+	 * be an independent "device". */
+	vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
+				 false, info->queue, vm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto error_new_virtqueue;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -414,8 +414,8 @@ static struct virtqueue *setup_vq(struct
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
-	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, vp_notify, callback, name);
+	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
+				 true, info->queue, vp_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -28,17 +28,20 @@
 #ifdef CONFIG_SMP
 /* Where possible, use SMP barriers which are more lightweight than mandatory
  * barriers, because mandatory barriers control MMIO effects on accesses
- * through relaxed memory I/O windows (which virtio does not use). */
-#define virtio_mb() smp_mb()
-#define virtio_rmb() smp_rmb()
-#define virtio_wmb() smp_wmb()
+ * through relaxed memory I/O windows (which virtio-pci does not use). */
+#define virtio_mb(vq) \
+	do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0)
+#define virtio_rmb(vq) \
+	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
+#define virtio_wmb(vq) \
+	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
 #else
 /* We must force memory ordering even if guest is UP since host could be
  * running on another CPU, but SMP barriers are defined to barrier() in that
  * configuration. So fall back to mandatory barriers instead. */
-#define virtio_mb() mb()
-#define virtio_rmb() rmb()
-#define virtio_wmb() wmb()
+#define virtio_mb(vq) mb()
+#define virtio_rmb(vq) rmb()
+#define virtio_wmb(vq) wmb()
 #endif
 
 #ifdef DEBUG
@@ -77,6 +80,9 @@ struct vring_virtqueue
 	/* Actual memory layout for this queue */
 	struct vring vring;
 
+	/* Can we use weak barriers? */
+	bool weak_barriers;
+
 	/* Other side has made a mess, don't try any more. */
 	bool broken;
 
@@ -245,14 +251,14 @@ void virtqueue_kick(struct virtqueue *_v
 	START_USE(vq);
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
-	virtio_wmb();
+	virtio_wmb(vq);
 
 	old = vq->vring.avail->idx;
 	new = vq->vring.avail->idx = old + vq->num_added;
 	vq->num_added = 0;
 
 	/* Need to update avail index before checking if we should notify */
-	virtio_mb();
+	virtio_mb(vq);
 
 	if (vq->event ?
 	    vring_need_event(vring_avail_event(&vq->vring), new, old) :
@@ -314,7 +320,7 @@ void *virtqueue_get_buf(struct virtqueue
 	}
 
 	/* Only get used array entries after they have been exposed by host. */
-	virtio_rmb();
+	virtio_rmb(vq);
 
 	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
 	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
@@ -337,7 +343,7 @@ void *virtqueue_get_buf(struct virtqueue
 	 * the read in the next get_buf call. */
 	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
 		vring_used_event(&vq->vring) = vq->last_used_idx;
-		virtio_mb();
+		virtio_mb(vq);
 	}
 
 	END_USE(vq);
@@ -366,7 +372,7 @@ bool virtqueue_enable_cb(struct virtqueu
 	 * entry. Always do both to keep code simple. */
 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
 	vring_used_event(&vq->vring) = vq->last_used_idx;
-	virtio_mb();
+	virtio_mb(vq);
 	if (unlikely(more_used(vq))) {
 		END_USE(vq);
 		return false;
@@ -393,7 +399,7 @@ bool virtqueue_enable_cb_delayed(struct 
 	/* TODO: tune this threshold */
 	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
 	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
-	virtio_mb();
+	virtio_mb(vq);
 	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
 		END_USE(vq);
 		return false;
@@ -453,6 +459,7 @@ EXPORT_SYMBOL_GPL(vring_interrupt);
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
 				      void (*callback)(struct virtqueue *),
@@ -476,6 +483,7 @@ struct virtqueue *vring_new_virtqueue(un
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
 	vq->notify = notify;
+	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -168,6 +168,7 @@ struct virtqueue;
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -214,6 +214,7 @@ void *virtqueue_detach_unused_buf(struct
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -92,7 +92,8 @@ static void vq_info_add(struct vdev_info
 	assert(r >= 0);
 	memset(info->ring, 0, vring_size(num, 4096));
 	vring_init(&info->vring, num, info->ring, 4096);
-	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring,
+	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev,
+				       true, info->ring,
 				       vq_notify, vq_callback, "test");
 	assert(info->vq);
 	info->vq->priv = info;

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox