* [PATCH] KVM virtio balloon driver
@ 2008-01-14 20:03 Marcelo Tosatti
2008-01-14 21:29 ` Anthony Liguori
2008-01-14 23:32 ` Rusty Russell
0 siblings, 2 replies; 24+ messages in thread
From: Marcelo Tosatti @ 2008-01-14 20:03 UTC (permalink / raw)
To: Rusty Russell; +Cc: virtualization
Hi Rusty,
It was agreed that the balloon driver should be merged through the
virtio tree, so here it goes. It depends on the config_changed patch
posted earlier.
-----
Following patch adds the KVM balloon driver.
Changes from last version:
- Get rid of global variables/structure
- Use page->lru to link ballooned pages
- Use dev_dbg/dev_printk
- Proper kthread_should_stop handling
- Move shared definitions to separate header
- Use ->config_changed method for notification
This depends on Rusty's config_changed patch.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: linux-2.6-nv/drivers/virtio/Kconfig
===================================================================
--- linux-2.6-nv.orig/drivers/virtio/Kconfig
+++ linux-2.6-nv/drivers/virtio/Kconfig
@@ -23,3 +23,12 @@ config VIRTIO_PCI
If unsure, say M.
+config KVM_BALLOON
+ tristate "KVM balloon driver (EXPERIMENTAL)"
+ depends on VIRTIO_PCI
+ ---help---
+ This driver provides support for ballooning memory in/out of a
+ KVM paravirt guest.
+
+ If unsure, say M.
+
Index: linux-2.6-nv/drivers/virtio/Makefile
===================================================================
--- linux-2.6-nv.orig/drivers/virtio/Makefile
+++ linux-2.6-nv/drivers/virtio/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_VIRTIO) += virtio.o
obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
+obj-$(CONFIG_KVM_BALLOON) += kvm_balloon.o
Index: linux-2.6-nv/drivers/virtio/kvm_balloon.c
===================================================================
--- /dev/null
+++ linux-2.6-nv/drivers/virtio/kvm_balloon.c
@@ -0,0 +1,537 @@
+/*
+ * KVM guest balloon driver
+ *
+ * Copyright (C) 2007, Qumranet, Inc., Dor Laor <dor.laor@qumranet.com>
+ * Copyright (C) 2007, Red Hat, Inc., Marcelo Tosatti <mtosatti@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#define DEBUG
+#include <asm/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/wait.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_balloon.h>
+#include <linux/preempt.h>
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+
+MODULE_AUTHOR ("Dor Laor");
+MODULE_DESCRIPTION ("Implements guest ballooning support");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
+
+static int kvm_balloon_debug;
+
+#define dprintk(dev, str...) if (kvm_balloon_debug) dev_dbg(dev, str)
+
+#define BALLOON_DATA_SIZE 200
+
+struct balloon_buf {
+ struct virtio_balloon_hdr hdr;
+ u8 data[BALLOON_DATA_SIZE];
+};
+
+struct balloon_work {
+ struct balloon_buf *buf;
+ struct list_head list;
+};
+
+#define VIRTIO_MAX_SG 2
+
+struct virtballoon {
+ struct virtio_device *vdev;
+ struct virtqueue *vq;
+ struct task_struct *balloon_thread;
+ wait_queue_head_t balloon_wait;
+ wait_queue_head_t rmmod_wait;
+ uint32_t target_nrpages;
+ atomic_t inflight_bufs;
+ int balloon_size;
+ struct list_head balloon_plist;
+ struct list_head balloon_work;
+ spinlock_t plist_lock;
+ spinlock_t queue_lock;
+ struct list_head list;
+};
+
+struct balloon_buf *alloc_balloon_buf(struct virtio_device *vdev, gfp_t flags)
+{
+ struct balloon_buf *buf;
+
+ buf = kzalloc(sizeof(struct balloon_buf), flags);
+ if (!buf)
+ dev_printk(KERN_ERR, &vdev->dev, "%s: alloc fail\n", __func__);
+
+ return buf;
+}
+
+static int send_balloon_buf(struct virtballoon *v, uint8_t cmd,
+ struct balloon_buf *buf)
+{
+ struct scatterlist sg[VIRTIO_MAX_SG];
+ int err = 0;
+
+ buf->hdr.cmd = cmd;
+
+ sg_init_table(sg, VIRTIO_MAX_SG);
+ sg_set_buf(&sg[0], &buf->hdr, sizeof(buf->hdr));
+ sg_set_buf(&sg[1], &buf->data, sizeof(buf->data));
+
+ spin_lock_irq(&v->queue_lock);
+ err = v->vq->vq_ops->add_buf(v->vq, sg, 0, 2, buf);
+ if (err) {
+ dev_printk(KERN_ERR, &v->vq->vdev->dev, "%s: add_buf err\n",
+ __func__);
+ goto out;
+ }
+
+ /* TODO: kick several balloon buffers at once */
+ v->vq->vq_ops->kick(v->vq);
+out:
+ spin_unlock_irq(&v->queue_lock);
+ atomic_inc(&v->inflight_bufs);
+ return err;
+}
+
+static int kvm_balloon_inflate(struct virtballoon *v, int32_t npages)
+{
+ LIST_HEAD(tmp_list);
+ struct page *page, *tmp;
+ struct balloon_buf *buf;
+ u32 *pfn;
+ int allocated = 0;
+ int i, r = -ENOMEM;
+
+ buf = alloc_balloon_buf(v->vdev, GFP_KERNEL);
+ if (!buf)
+ return r;
+
+ pfn = (u32 *)&buf->data;
+ *pfn++ = (u32)npages;
+
+ for (i = 0; i < npages; i++) {
+ page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY);
+ if (!page)
+ goto out_free;
+ list_add(&page->lru, &tmp_list);
+ allocated++;
+ *pfn = page_to_pfn(page);
+ pfn++;
+ }
+
+ r = send_balloon_buf(v, CMD_BALLOON_INFLATE, buf);
+ if (r)
+ goto out_free;
+
+ spin_lock(&v->plist_lock);
+ list_splice(&tmp_list, &v->balloon_plist);
+ v->balloon_size += allocated;
+ totalram_pages -= allocated;
+ dprintk(&v->vdev->dev, "%s: current balloon size=%d\n", __func__,
+ v->balloon_size);
+ spin_unlock(&v->plist_lock);
+ return allocated;
+
+out_free:
+ list_for_each_entry_safe(page, tmp, &tmp_list, lru) {
+ list_del(&page->lru);
+ __free_page(page);
+ }
+ return r;
+}
+
+static int kvm_balloon_deflate(struct virtballoon *v, int32_t npages)
+{
+ LIST_HEAD(tmp_list);
+ struct page *page, *tmp;
+ struct balloon_buf *buf;
+ u32 *pfn;
+ int deallocated = 0;
+ int r = 0;
+
+ buf = alloc_balloon_buf(v->vdev, GFP_KERNEL);
+ if (!buf)
+ return r;
+
+ spin_lock(&v->plist_lock);
+
+ if (v->balloon_size < npages) {
+ dev_printk(KERN_INFO, &v->vdev->dev,
+ "%s: balloon=%d with deflate rq=%d\n",
+ __func__, v->balloon_size, npages);
+ npages = v->balloon_size;
+ if (!npages)
+ goto out;
+ }
+
+ pfn = (u32 *)&buf->data;
+ *pfn++ = (u32)-npages;
+
+ /*
+ * Move the balloon pages to tmp list before issuing
+ * the virtio buffer
+ */
+ list_for_each_entry_safe(page, tmp, &v->balloon_plist, lru) {
+ *pfn++ = page_to_pfn(page);
+ list_move(&page->lru, &tmp_list);
+ if (++deallocated == npages)
+ break;
+ }
+
+ r = send_balloon_buf(v, CMD_BALLOON_DEFLATE, buf);
+ if (r)
+ goto out;
+
+ list_for_each_entry_safe(page, tmp, &tmp_list, lru)
+ list_del_init(&page->lru);
+
+ v->balloon_size -= npages;
+ totalram_pages += npages;
+ dprintk(&v->vdev->dev, "%s: current balloon size=%d\n", __func__,
+ v->balloon_size);
+
+ spin_unlock(&v->plist_lock);
+ return deallocated;
+
+out:
+ list_splice(&tmp_list, &v->balloon_plist);
+ spin_unlock(&v->plist_lock);
+ return r;
+}
+
+#define MAX_BALLOON_PAGES_PER_OP (BALLOON_DATA_SIZE/sizeof(u32)) \
+ - sizeof(int32_t)
+#define MAX_BALLOON_XFLATE_OP 1000000
+
+static int kvm_balloon_xflate(struct virtballoon *v, int32_t npages)
+{
+ int r = -EINVAL, i;
+ int iterations;
+ int abspages;
+ int curr_pages = 0;
+ int gfns_per_buf;
+
+ abspages = abs(npages);
+
+ if (abspages > MAX_BALLOON_XFLATE_OP) {
+ dev_printk(KERN_ERR, &v->vdev->dev,
+ "%s: bad npages=%d\n", __func__, npages);
+ return -EINVAL;
+ }
+
+ dprintk(&v->vdev->dev, "%s: got %s, npages=%d\n", __func__,
+ (npages > 0)? "inflate":"deflate", npages);
+
+ gfns_per_buf = MAX_BALLOON_PAGES_PER_OP;
+
+ /*
+ * Call the balloon in PAGE_SIZE*pfns-per-buf
+ * iterations
+ */
+ iterations = DIV_ROUND_UP(abspages, gfns_per_buf);
+ dprintk(&v->vdev->dev, "%s: iterations=%d\n", __func__, iterations);
+
+ for (i = 0; i < iterations; i++) {
+ int32_t pages_in_iteration =
+ min(abspages - curr_pages, gfns_per_buf);
+
+ if (npages > 0)
+ r = kvm_balloon_inflate(v, pages_in_iteration);
+ else
+ r = kvm_balloon_deflate(v, pages_in_iteration);
+
+ if (r < 0)
+ return r;
+ curr_pages += r;
+ if (r != pages_in_iteration)
+ break;
+ cond_resched();
+ }
+
+ return curr_pages;
+}
+
+static void inflate_done(struct virtballoon *v, struct balloon_buf *buf)
+{
+ uint8_t status = buf->hdr.status;
+
+ /* error inflating, return pages to the system */
+ if (status) {
+ struct page *page;
+ u32 *pfn = (u32 *)&buf->data;
+ int npages = (int)*pfn++;
+ int i;
+
+ spin_lock(&v->plist_lock);
+ for (i=0;i<npages;i++) {
+ page = pfn_to_page(*pfn);
+ list_del_init(&page->lru);
+ __free_page(page);
+ v->balloon_size--;
+ totalram_pages++;
+ v->target_nrpages++;
+ pfn++;
+ }
+ spin_unlock(&v->plist_lock);
+ }
+}
+
+static void deflate_done(struct virtballoon *v, struct balloon_buf *buf)
+{
+ uint8_t status = buf->hdr.status;
+
+ /* deflate OK, return pages to the system */
+ if (!status) {
+ u32 *pfn = (u32 *)&buf->data;
+ int npages, i;
+
+ npages = (int)*pfn++;
+ npages = abs(npages);
+
+ for (i = 0; i<npages; i++) {
+ __free_page(pfn_to_page(*pfn));
+ pfn++;
+ }
+ /* deflate error, add pages back to ballooned list */
+ } else {
+ u32 *pfn = (u32 *)&buf->data;
+ int npages, i;
+ struct page *page;
+
+ npages = (int)*pfn++;
+ npages = abs(npages);
+
+ spin_lock(&v->plist_lock);
+ for (i = 0; i < npages; i++) {
+ page = pfn_to_page(*pfn++);
+ list_add(&page->lru, &v->balloon_plist);
+ v->balloon_size++;
+ totalram_pages--;
+ v->target_nrpages--;
+ }
+ spin_unlock(&v->plist_lock);
+ }
+ return;
+}
+
+static int balloon_thread(void *p)
+{
+ struct virtballoon *v = p;
+ DEFINE_WAIT(wait);
+ int rmmod = 0;
+
+ set_freezable();
+ while (!kthread_should_stop()) {
+ int delta;
+
+ prepare_to_wait(&v->balloon_wait, &wait, TASK_INTERRUPTIBLE);
+ schedule();
+ finish_wait(&v->balloon_wait, &wait);
+
+ try_to_freeze();
+
+ /* wait for kthread_stop() if rmmod has been called */
+ if (rmmod)
+ continue;
+
+ spin_lock_irq(&v->plist_lock);
+ delta = totalram_pages - v->target_nrpages;
+ spin_unlock_irq(&v->plist_lock);
+
+ if (delta)
+ kvm_balloon_xflate(v, delta);
+
+ spin_lock_irq(&v->queue_lock);
+ while (!list_empty(&v->balloon_work)) {
+ struct balloon_work *work;
+ struct balloon_buf *buf;
+
+ work = list_entry(v->balloon_work.next,
+ struct balloon_work, list);
+ list_del(&work->list);
+ spin_unlock_irq(&v->queue_lock);
+ buf = work->buf;
+ kfree(work);
+
+ switch(buf->hdr.cmd) {
+ case CMD_BALLOON_DEFLATE:
+ deflate_done(v, buf);
+ break;
+ case CMD_BALLOON_INFLATE:
+ inflate_done(v, buf);
+ break;
+ default:
+ printk("%s: unknown cmd 0x%x\n", __func__,
+ buf->hdr.cmd);
+ }
+ kfree(buf);
+ if (atomic_dec_and_test(&v->inflight_bufs)) {
+ if (waitqueue_active(&v->rmmod_wait)) {
+ wake_up(&v->rmmod_wait);
+ rmmod = 1;
+ }
+ }
+ cond_resched();
+ spin_lock_irq(&v->queue_lock);
+ }
+ spin_unlock_irq(&v->queue_lock);
+ }
+ return 0;
+}
+
+static bool balloon_tx_done(struct virtqueue *vq)
+{
+ struct balloon_buf *buf;
+ struct virtballoon *v = vq->vdev->priv;
+ unsigned int len;
+
+ spin_lock(&v->queue_lock);
+ while ((buf = vq->vq_ops->get_buf(vq, &len)) != NULL) {
+ struct balloon_work *work;
+
+ work = kzalloc(sizeof(struct balloon_work), GFP_ATOMIC);
+ if (!work)
+ continue;
+ INIT_LIST_HEAD(&work->list);
+ work->buf = buf;
+
+ list_add(&work->list, &v->balloon_work);
+ }
+ spin_unlock(&v->queue_lock);
+ wake_up(&v->balloon_wait);
+
+ return true;
+}
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID},
+ { 0 },
+};
+
+static LIST_HEAD(balloon_devices);
+
+static int balloon_probe(struct virtio_device *vdev)
+{
+ int err = -EINVAL;
+ struct virtballoon *v;
+
+ v = kzalloc(GFP_KERNEL, sizeof(struct virtballoon));
+ if (!v)
+ return -ENOMEM;
+
+ v->vq = vdev->config->find_vq(vdev, 0, balloon_tx_done);
+ if (IS_ERR(v->vq))
+ goto out_free;
+
+ v->vdev = vdev;
+
+ init_waitqueue_head(&v->balloon_wait);
+ init_waitqueue_head(&v->rmmod_wait);
+ spin_lock_init(&v->plist_lock);
+ spin_lock_init(&v->queue_lock);
+ INIT_LIST_HEAD(&v->balloon_plist);
+ INIT_LIST_HEAD(&v->balloon_work);
+ INIT_LIST_HEAD(&v->list);
+ atomic_set(&v->inflight_bufs, 0);
+
+ vdev->priv = v;
+
+ v->balloon_thread = kthread_run(balloon_thread, v, "kvm_balloond");
+ if (IS_ERR(v->balloon_thread))
+ goto out_free_vq;
+
+ list_add(&v->list, &balloon_devices);
+
+ dev_printk(KERN_INFO, &v->vdev->dev, "registered\n");
+
+ return 0;
+
+out_free_vq:
+ vdev->config->del_vq(v->vq);
+out_free:
+ kfree(v);
+ return err;
+}
+
+static void balloon_remove(struct virtio_device *vdev)
+{
+ struct virtballoon *v = vdev->priv;
+
+ kthread_stop(v->balloon_thread);
+ vdev->config->del_vq(v->vq);
+ list_del(&v->list);
+ kfree(v);
+}
+
+static void balloon_config_changed(struct virtio_device *vdev)
+{
+ struct virtballoon *v = vdev->priv;
+
+ spin_lock(&v->plist_lock);
+ __virtio_config_val(v->vdev, 0, &v->target_nrpages);
+ spin_unlock(&v->plist_lock);
+ wake_up(&v->balloon_wait);
+ dprintk(&vdev->dev, "%s\n", __func__);
+}
+
+static struct virtio_driver virtio_balloon = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = balloon_probe,
+ .remove = __devexit_p(balloon_remove),
+ .config_changed = balloon_config_changed,
+};
+
+module_param(kvm_balloon_debug, int, 0);
+
+static int __init kvm_balloon_init(void)
+{
+ return register_virtio_driver(&virtio_balloon);
+}
+
+static void __exit kvm_balloon_exit(void)
+{
+ struct virtballoon *v;
+
+ list_for_each_entry(v, &balloon_devices, list) {
+ spin_lock(&v->plist_lock);
+ if (v->balloon_size) {
+ DEFINE_WAIT(wait);
+
+ v->target_nrpages += v->balloon_size;
+ spin_unlock(&v->plist_lock);
+ wake_up(&v->balloon_wait);
+ prepare_to_wait(&v->rmmod_wait, &wait,
+ TASK_INTERRUPTIBLE);
+ schedule();
+ finish_wait(&v->rmmod_wait, &wait);
+ spin_lock(&v->plist_lock);
+ }
+
+ if (v->balloon_size)
+ dev_printk(KERN_ERR, &v->vdev->dev,
+ "%s: exit while balloon not empty!\n",
+ __func__);
+
+ spin_unlock(&v->plist_lock);
+ }
+
+ unregister_virtio_driver(&virtio_balloon);
+}
+
+module_init(kvm_balloon_init);
+module_exit(kvm_balloon_exit);
Index: linux-2.6-nv/drivers/virtio/virtio_pci.c
===================================================================
--- linux-2.6-nv.orig/drivers/virtio/virtio_pci.c
+++ linux-2.6-nv/drivers/virtio/virtio_pci.c
@@ -67,6 +67,7 @@ static struct pci_device_id virtio_pci_i
{ 0x1AF4, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */
{ 0x1AF4, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */
{ 0x1AF4, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */
+ { 0x1AF4, 0x1003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Balloon */
{ 0 },
};
Index: linux-2.6-nv/include/linux/virtio_balloon.h
===================================================================
--- /dev/null
+++ linux-2.6-nv/include/linux/virtio_balloon.h
@@ -0,0 +1,20 @@
+#ifndef _LINUX_VIRTIO_BALLOON_H
+#define _LINUX_VIRTIO_BALLOON_H
+#include <linux/virtio_config.h>
+
+#define VIRTIO_ID_BALLOON 3
+
+#define CMD_BALLOON_INFLATE 0x1
+#define CMD_BALLOON_DEFLATE 0x2
+
+struct virtio_balloon_hdr {
+ uint8_t cmd;
+ uint8_t status;
+};
+
+struct virtio_balloon_config
+{
+ uint32_t target_nrpages;
+};
+
+#endif /* _LINUX_VIRTIO_BALLOON_H */
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH] KVM virtio balloon driver 2008-01-14 20:03 [PATCH] KVM virtio balloon driver Marcelo Tosatti @ 2008-01-14 21:29 ` Anthony Liguori 2008-01-15 14:22 ` Marcelo Tosatti 2008-01-14 23:32 ` Rusty Russell 1 sibling, 1 reply; 24+ messages in thread From: Anthony Liguori @ 2008-01-14 21:29 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: virtualization Marcelo Tosatti wrote: > Hi Rusty, > > It was agreed that the balloon driver should be merged through the > virtio tree, so here it goes. It depends on the config_changed patch > posted earlier. > > > ----- > > Following patch adds the KVM balloon driver. > > Changes from last version: > - Get rid of global variables/structure > - Use page->lru to link ballooned pages > - Use dev_dbg/dev_printk > - Proper kthread_should_stop handling > - Move shared definitions to separate header > - Use ->config_changed method for notification > > This depends on Rusty's config_changed patch. > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> > > > Index: linux-2.6-nv/drivers/virtio/Kconfig > =================================================================== > --- linux-2.6-nv.orig/drivers/virtio/Kconfig > +++ linux-2.6-nv/drivers/virtio/Kconfig > @@ -23,3 +23,12 @@ config VIRTIO_PCI > > If unsure, say M. > > +config KVM_BALLOON > + tristate "KVM balloon driver (EXPERIMENTAL)" > + depends on VIRTIO_PCI > + ---help--- > + This driver provides support for ballooning memory in/out of a > + KVM paravirt guest. > + > + If unsure, say M. > + Please rename from KVM_BALLOON to VIRTIO_BALLOON. Also, it doesn't depend on VIRTIO_PCI. It should select VIRTIO and VIRTIO_RING. > Index: linux-2.6-nv/drivers/virtio/Makefile > =================================================================== > --- linux-2.6-nv.orig/drivers/virtio/Makefile > +++ linux-2.6-nv/drivers/virtio/Makefile > @@ -1,3 +1,4 @@ > obj-$(CONFIG_VIRTIO) += virtio.o > obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o > +obj-$(CONFIG_KVM_BALLOON) += kvm_balloon.o > Index: linux-2.6-nv/drivers/virtio/kvm_balloon.c > =================================================================== > --- /dev/null > +++ linux-2.6-nv/drivers/virtio/kvm_balloon.c > @@ -0,0 +1,537 @@ > +/* > + * KVM guest balloon driver > + * > + * Copyright (C) 2007, Qumranet, Inc., Dor Laor <dor.laor@qumranet.com> > + * Copyright (C) 2007, Red Hat, Inc., Marcelo Tosatti <mtosatti@redhat.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + */ > + > +#define DEBUG > +#include <asm/uaccess.h> > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <linux/percpu.h> > +#include <linux/init.h> > +#include <linux/interrupt.h> > +#include <linux/mm.h> > +#include <linux/swap.h> > +#include <linux/wait.h> > +#include <linux/kthread.h> > +#include <linux/freezer.h> > +#include <linux/version.h> > +#include <linux/virtio.h> > +#include <linux/virtio_config.h> > +#include <linux/virtio_balloon.h> > +#include <linux/preempt.h> > +#include <linux/kvm_types.h> > +#include <linux/kvm_host.h> Please don't include kvm_types or kvm_host. > + > +MODULE_AUTHOR ("Dor Laor"); > +MODULE_DESCRIPTION ("Implements guest ballooning support"); > +MODULE_LICENSE("GPL"); > +MODULE_VERSION("1"); > + > +static int kvm_balloon_debug; > + > +#define dprintk(dev, str...) if (kvm_balloon_debug) dev_dbg(dev, str) This can go away. Regards, Anthony Liguori > +#define BALLOON_DATA_SIZE 200 > + > +struct balloon_buf { > + struct virtio_balloon_hdr hdr; > + u8 data[BALLOON_DATA_SIZE]; > +}; > + > +struct balloon_work { > + struct balloon_buf *buf; > + struct list_head list; > +}; > + > +#define VIRTIO_MAX_SG 2 > + > +struct virtballoon { > + struct virtio_device *vdev; > + struct virtqueue *vq; > + struct task_struct *balloon_thread; > + wait_queue_head_t balloon_wait; > + wait_queue_head_t rmmod_wait; > + uint32_t target_nrpages; > + atomic_t inflight_bufs; > + int balloon_size; > + struct list_head balloon_plist; > + struct list_head balloon_work; > + spinlock_t plist_lock; > + spinlock_t queue_lock; > + struct list_head list; > +}; > + > +struct balloon_buf *alloc_balloon_buf(struct virtio_device *vdev, gfp_t flags) > +{ > + struct balloon_buf *buf; > + > + buf = kzalloc(sizeof(struct balloon_buf), flags); > + if (!buf) > + dev_printk(KERN_ERR, &vdev->dev, "%s: alloc fail\n", __func__); > + > + return buf; > +} > + > +static int send_balloon_buf(struct virtballoon *v, uint8_t cmd, > + struct balloon_buf *buf) > +{ > + struct scatterlist sg[VIRTIO_MAX_SG]; > + int err = 0; > + > + buf->hdr.cmd = cmd; > + > + sg_init_table(sg, VIRTIO_MAX_SG); > + sg_set_buf(&sg[0], &buf->hdr, sizeof(buf->hdr)); > + sg_set_buf(&sg[1], &buf->data, sizeof(buf->data)); > + > + spin_lock_irq(&v->queue_lock); > + err = v->vq->vq_ops->add_buf(v->vq, sg, 0, 2, buf); > + if (err) { > + dev_printk(KERN_ERR, &v->vq->vdev->dev, "%s: add_buf err\n", > + __func__); > + goto out; > + } > + > + /* TODO: kick several balloon buffers at once */ > + v->vq->vq_ops->kick(v->vq); > +out: > + spin_unlock_irq(&v->queue_lock); > + atomic_inc(&v->inflight_bufs); > + return err; > +} > + > +static int kvm_balloon_inflate(struct virtballoon *v, int32_t npages) > +{ > + LIST_HEAD(tmp_list); > + struct page *page, *tmp; > + struct balloon_buf *buf; > + u32 *pfn; > + int allocated = 0; > + int i, r = -ENOMEM; > + > + buf = alloc_balloon_buf(v->vdev, GFP_KERNEL); > + if (!buf) > + return r; > + > + pfn = (u32 *)&buf->data; > + *pfn++ = (u32)npages; > + > + for (i = 0; i < npages; i++) { > + page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); > + if (!page) > + goto out_free; > + list_add(&page->lru, &tmp_list); > + allocated++; > + *pfn = page_to_pfn(page); > + pfn++; > + } > + > + r = send_balloon_buf(v, CMD_BALLOON_INFLATE, buf); > + if (r) > + goto out_free; > + > + spin_lock(&v->plist_lock); > + list_splice(&tmp_list, &v->balloon_plist); > + v->balloon_size += allocated; > + totalram_pages -= allocated; > + dprintk(&v->vdev->dev, "%s: current balloon size=%d\n", __func__, > + v->balloon_size); > + spin_unlock(&v->plist_lock); > + return allocated; > + > +out_free: > + list_for_each_entry_safe(page, tmp, &tmp_list, lru) { > + list_del(&page->lru); > + __free_page(page); > + } > + return r; > +} > + > +static int kvm_balloon_deflate(struct virtballoon *v, int32_t npages) > +{ > + LIST_HEAD(tmp_list); > + struct page *page, *tmp; > + struct balloon_buf *buf; > + u32 *pfn; > + int deallocated = 0; > + int r = 0; > + > + buf = alloc_balloon_buf(v->vdev, GFP_KERNEL); > + if (!buf) > + return r; > + > + spin_lock(&v->plist_lock); > + > + if (v->balloon_size < npages) { > + dev_printk(KERN_INFO, &v->vdev->dev, > + "%s: balloon=%d with deflate rq=%d\n", > + __func__, v->balloon_size, npages); > + npages = v->balloon_size; > + if (!npages) > + goto out; > + } > + > + pfn = (u32 *)&buf->data; > + *pfn++ = (u32)-npages; > + > + /* > + * Move the balloon pages to tmp list before issuing > + * the virtio buffer > + */ > + list_for_each_entry_safe(page, tmp, &v->balloon_plist, lru) { > + *pfn++ = page_to_pfn(page); > + list_move(&page->lru, &tmp_list); > + if (++deallocated == npages) > + break; > + } > + > + r = send_balloon_buf(v, CMD_BALLOON_DEFLATE, buf); > + if (r) > + goto out; > + > + list_for_each_entry_safe(page, tmp, &tmp_list, lru) > + list_del_init(&page->lru); > + > + v->balloon_size -= npages; > + totalram_pages += npages; > + dprintk(&v->vdev->dev, "%s: current balloon size=%d\n", __func__, > + v->balloon_size); > + > + spin_unlock(&v->plist_lock); > + return deallocated; > + > +out: > + list_splice(&tmp_list, &v->balloon_plist); > + spin_unlock(&v->plist_lock); > + return r; > +} > + > +#define MAX_BALLOON_PAGES_PER_OP (BALLOON_DATA_SIZE/sizeof(u32)) \ > + - sizeof(int32_t) > +#define MAX_BALLOON_XFLATE_OP 1000000 > + > +static int kvm_balloon_xflate(struct virtballoon *v, int32_t npages) > +{ > + int r = -EINVAL, i; > + int iterations; > + int abspages; > + int curr_pages = 0; > + int gfns_per_buf; > + > + abspages = abs(npages); > + > + if (abspages > MAX_BALLOON_XFLATE_OP) { > + dev_printk(KERN_ERR, &v->vdev->dev, > + "%s: bad npages=%d\n", __func__, npages); > + return -EINVAL; > + } > + > + dprintk(&v->vdev->dev, "%s: got %s, npages=%d\n", __func__, > + (npages > 0)? "inflate":"deflate", npages); > + > + gfns_per_buf = MAX_BALLOON_PAGES_PER_OP; > + > + /* > + * Call the balloon in PAGE_SIZE*pfns-per-buf > + * iterations > + */ > + iterations = DIV_ROUND_UP(abspages, gfns_per_buf); > + dprintk(&v->vdev->dev, "%s: iterations=%d\n", __func__, iterations); > + > + for (i = 0; i < iterations; i++) { > + int32_t pages_in_iteration = > + min(abspages - curr_pages, gfns_per_buf); > + > + if (npages > 0) > + r = kvm_balloon_inflate(v, pages_in_iteration); > + else > + r = kvm_balloon_deflate(v, pages_in_iteration); > + > + if (r < 0) > + return r; > + curr_pages += r; > + if (r != pages_in_iteration) > + break; > + cond_resched(); > + } > + > + return curr_pages; > +} > + > +static void inflate_done(struct virtballoon *v, struct balloon_buf *buf) > +{ > + uint8_t status = buf->hdr.status; > + > + /* error inflating, return pages to the system */ > + if (status) { > + struct page *page; > + u32 *pfn = (u32 *)&buf->data; > + int npages = (int)*pfn++; > + int i; > + > + spin_lock(&v->plist_lock); > + for (i=0;i<npages;i++) { > + page = pfn_to_page(*pfn); > + list_del_init(&page->lru); > + __free_page(page); > + v->balloon_size--; > + totalram_pages++; > + v->target_nrpages++; > + pfn++; > + } > + spin_unlock(&v->plist_lock); > + } > +} > + > +static void deflate_done(struct virtballoon *v, struct balloon_buf *buf) > +{ > + uint8_t status = buf->hdr.status; > + > + /* deflate OK, return pages to the system */ > + if (!status) { > + u32 *pfn = (u32 *)&buf->data; > + int npages, i; > + > + npages = (int)*pfn++; > + npages = abs(npages); > + > + for (i = 0; i<npages; i++) { > + __free_page(pfn_to_page(*pfn)); > + pfn++; > + } > + /* deflate error, add pages back to ballooned list */ > + } else { > + u32 *pfn = (u32 *)&buf->data; > + int npages, i; > + struct page *page; > + > + npages = (int)*pfn++; > + npages = abs(npages); > + > + spin_lock(&v->plist_lock); > + for (i = 0; i < npages; i++) { > + page = pfn_to_page(*pfn++); > + list_add(&page->lru, &v->balloon_plist); > + v->balloon_size++; > + totalram_pages--; > + v->target_nrpages--; > + } > + spin_unlock(&v->plist_lock); > + } > + return; > +} > + > +static int balloon_thread(void *p) > +{ > + struct virtballoon *v = p; > + DEFINE_WAIT(wait); > + int rmmod = 0; > + > + set_freezable(); > + while (!kthread_should_stop()) { > + int delta; > + > + prepare_to_wait(&v->balloon_wait, &wait, TASK_INTERRUPTIBLE); > + schedule(); > + finish_wait(&v->balloon_wait, &wait); > + > + try_to_freeze(); > + > + /* wait for kthread_stop() if rmmod has been called */ > + if (rmmod) > + continue; > + > + spin_lock_irq(&v->plist_lock); > + delta = totalram_pages - v->target_nrpages; > + spin_unlock_irq(&v->plist_lock); > + > + if (delta) > + kvm_balloon_xflate(v, delta); > + > + spin_lock_irq(&v->queue_lock); > + while (!list_empty(&v->balloon_work)) { > + struct balloon_work *work; > + struct balloon_buf *buf; > + > + work = list_entry(v->balloon_work.next, > + struct balloon_work, list); > + list_del(&work->list); > + spin_unlock_irq(&v->queue_lock); > + buf = work->buf; > + kfree(work); > + > + switch(buf->hdr.cmd) { > + case CMD_BALLOON_DEFLATE: > + deflate_done(v, buf); > + break; > + case CMD_BALLOON_INFLATE: > + inflate_done(v, buf); > + break; > + default: > + printk("%s: unknown cmd 0x%x\n", __func__, > + buf->hdr.cmd); > + } > + kfree(buf); > + if (atomic_dec_and_test(&v->inflight_bufs)) { > + if (waitqueue_active(&v->rmmod_wait)) { > + wake_up(&v->rmmod_wait); > + rmmod = 1; > + } > + } > + cond_resched(); > + spin_lock_irq(&v->queue_lock); > + } > + spin_unlock_irq(&v->queue_lock); > + } > + return 0; > +} > + > +static bool balloon_tx_done(struct virtqueue *vq) > +{ > + struct balloon_buf *buf; > + struct virtballoon *v = vq->vdev->priv; > + unsigned int len; > + > + spin_lock(&v->queue_lock); > + while ((buf = vq->vq_ops->get_buf(vq, &len)) != NULL) { > + struct balloon_work *work; > + > + work = kzalloc(sizeof(struct balloon_work), GFP_ATOMIC); > + if (!work) > + continue; > + INIT_LIST_HEAD(&work->list); > + work->buf = buf; > + > + list_add(&work->list, &v->balloon_work); > + } > + spin_unlock(&v->queue_lock); > + wake_up(&v->balloon_wait); > + > + return true; > +} > + > +static struct virtio_device_id id_table[] = { > + { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID}, > + { 0 }, > +}; > + > +static LIST_HEAD(balloon_devices); > + > +static int balloon_probe(struct virtio_device *vdev) > +{ > + int err = -EINVAL; > + struct virtballoon *v; > + > + v = kzalloc(GFP_KERNEL, sizeof(struct virtballoon)); > + if (!v) > + return -ENOMEM; > + > + v->vq = vdev->config->find_vq(vdev, 0, balloon_tx_done); > + if (IS_ERR(v->vq)) > + goto out_free; > + > + v->vdev = vdev; > + > + init_waitqueue_head(&v->balloon_wait); > + init_waitqueue_head(&v->rmmod_wait); > + spin_lock_init(&v->plist_lock); > + spin_lock_init(&v->queue_lock); > + INIT_LIST_HEAD(&v->balloon_plist); > + INIT_LIST_HEAD(&v->balloon_work); > + INIT_LIST_HEAD(&v->list); > + atomic_set(&v->inflight_bufs, 0); > + > + vdev->priv = v; > + > + v->balloon_thread = kthread_run(balloon_thread, v, "kvm_balloond"); > + if (IS_ERR(v->balloon_thread)) > + goto out_free_vq; > + > + list_add(&v->list, &balloon_devices); > + > + dev_printk(KERN_INFO, &v->vdev->dev, "registered\n"); > + > + return 0; > + > +out_free_vq: > + vdev->config->del_vq(v->vq); > +out_free: > + kfree(v); > + return err; > +} > + > +static void balloon_remove(struct virtio_device *vdev) > +{ > + struct virtballoon *v = vdev->priv; > + > + kthread_stop(v->balloon_thread); > + vdev->config->del_vq(v->vq); > + list_del(&v->list); > + kfree(v); > +} > + > +static void balloon_config_changed(struct virtio_device *vdev) > +{ > + struct virtballoon *v = vdev->priv; > + > + spin_lock(&v->plist_lock); > + __virtio_config_val(v->vdev, 0, &v->target_nrpages); > + spin_unlock(&v->plist_lock); > + wake_up(&v->balloon_wait); > + dprintk(&vdev->dev, "%s\n", __func__); > +} > + > +static struct virtio_driver virtio_balloon = { > + .driver.name = KBUILD_MODNAME, > + .driver.owner = THIS_MODULE, > + .id_table = id_table, > + .probe = balloon_probe, > + .remove = __devexit_p(balloon_remove), > + .config_changed = balloon_config_changed, > +}; > + > +module_param(kvm_balloon_debug, int, 0); > + > +static int __init kvm_balloon_init(void) > +{ > + return register_virtio_driver(&virtio_balloon); > +} > + > +static void __exit kvm_balloon_exit(void) > +{ > + struct virtballoon *v; > + > + list_for_each_entry(v, &balloon_devices, list) { > + spin_lock(&v->plist_lock); > + if (v->balloon_size) { > + DEFINE_WAIT(wait); > + > + v->target_nrpages += v->balloon_size; > + spin_unlock(&v->plist_lock); > + wake_up(&v->balloon_wait); > + prepare_to_wait(&v->rmmod_wait, &wait, > + TASK_INTERRUPTIBLE); > + schedule(); > + finish_wait(&v->rmmod_wait, &wait); > + spin_lock(&v->plist_lock); > + } > + > + if (v->balloon_size) > + dev_printk(KERN_ERR, &v->vdev->dev, > + "%s: exit while balloon not empty!\n", > + __func__); > + > + spin_unlock(&v->plist_lock); > + } > + > + unregister_virtio_driver(&virtio_balloon); > +} > + > +module_init(kvm_balloon_init); > +module_exit(kvm_balloon_exit); > Index: linux-2.6-nv/drivers/virtio/virtio_pci.c > =================================================================== > --- linux-2.6-nv.orig/drivers/virtio/virtio_pci.c > +++ linux-2.6-nv/drivers/virtio/virtio_pci.c > @@ -67,6 +67,7 @@ static struct pci_device_id virtio_pci_i > { 0x1AF4, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ > { 0x1AF4, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ > { 0x1AF4, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ > + { 0x1AF4, 0x1003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Balloon */ > { 0 }, > }; > > Index: linux-2.6-nv/include/linux/virtio_balloon.h > =================================================================== > --- /dev/null > +++ linux-2.6-nv/include/linux/virtio_balloon.h > @@ -0,0 +1,20 @@ > +#ifndef _LINUX_VIRTIO_BALLOON_H > +#define _LINUX_VIRTIO_BALLOON_H > +#include <linux/virtio_config.h> > + > +#define VIRTIO_ID_BALLOON 3 > + > +#define CMD_BALLOON_INFLATE 0x1 > +#define CMD_BALLOON_DEFLATE 0x2 > + > +struct virtio_balloon_hdr { > + uint8_t cmd; > + uint8_t status; > +}; > + > +struct virtio_balloon_config > +{ > + uint32_t target_nrpages; > +}; > + > +#endif /* _LINUX_VIRTIO_BALLOON_H */ ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] KVM virtio balloon driver 2008-01-14 21:29 ` Anthony Liguori @ 2008-01-15 14:22 ` Marcelo Tosatti 0 siblings, 0 replies; 24+ messages in thread From: Marcelo Tosatti @ 2008-01-15 14:22 UTC (permalink / raw) To: Anthony Liguori; +Cc: Marcelo Tosatti, virtualization On Mon, Jan 14, 2008 at 03:29:45PM -0600, Anthony Liguori wrote: > Marcelo Tosatti wrote: > >Hi Rusty, > > > >It was agreed that the balloon driver should be merged through the > >virtio tree, so here it goes. It depends on the config_changed patch > >posted earlier. > > > > > >----- > > > >Following patch adds the KVM balloon driver. > > > >Changes from last version: > >- Get rid of global variables/structure > >- Use page->lru to link ballooned pages > >- Use dev_dbg/dev_printk > >- Proper kthread_should_stop handling > >- Move shared definitions to separate header > >- Use ->config_changed method for notification > > > >This depends on Rusty's config_changed patch. > > > >Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> > > > > > >Index: linux-2.6-nv/drivers/virtio/Kconfig > >=================================================================== > >--- linux-2.6-nv.orig/drivers/virtio/Kconfig > >+++ linux-2.6-nv/drivers/virtio/Kconfig > >@@ -23,3 +23,12 @@ config VIRTIO_PCI > > > > If unsure, say M. > > > >+config KVM_BALLOON > >+ tristate "KVM balloon driver (EXPERIMENTAL)" > >+ depends on VIRTIO_PCI > >+ ---help--- > >+ This driver provides support for ballooning memory in/out of a > >+ KVM paravirt guest. > >+ > >+ If unsure, say M. > >+ > > > Please rename from KVM_BALLOON to VIRTIO_BALLOON. Also, it doesn't > depend on VIRTIO_PCI. It should select VIRTIO and VIRTIO_RING. > > > > >Index: linux-2.6-nv/drivers/virtio/Makefile > >=================================================================== > >--- linux-2.6-nv.orig/drivers/virtio/Makefile > >+++ linux-2.6-nv/drivers/virtio/Makefile > >@@ -1,3 +1,4 @@ > > obj-$(CONFIG_VIRTIO) += virtio.o > > obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o > > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o > >+obj-$(CONFIG_KVM_BALLOON) += kvm_balloon.o > >Index: linux-2.6-nv/drivers/virtio/kvm_balloon.c > >=================================================================== > >--- /dev/null > >+++ linux-2.6-nv/drivers/virtio/kvm_balloon.c > >@@ -0,0 +1,537 @@ > >+/* > >+ * KVM guest balloon driver > >+ * > >+ * Copyright (C) 2007, Qumranet, Inc., Dor Laor <dor.laor@qumranet.com> > >+ * Copyright (C) 2007, Red Hat, Inc., Marcelo Tosatti > ><mtosatti@redhat.com> > >+ * > >+ * This work is licensed under the terms of the GNU GPL, version 2. See > >+ * the COPYING file in the top-level directory. > >+ */ > >+ > >+#define DEBUG > >+#include <asm/uaccess.h> > >+#include <linux/kernel.h> > >+#include <linux/module.h> > >+#include <linux/percpu.h> > >+#include <linux/init.h> > >+#include <linux/interrupt.h> > >+#include <linux/mm.h> > >+#include <linux/swap.h> > >+#include <linux/wait.h> > >+#include <linux/kthread.h> > >+#include <linux/freezer.h> > >+#include <linux/version.h> > >+#include <linux/virtio.h> > >+#include <linux/virtio_config.h> > >+#include <linux/virtio_balloon.h> > >+#include <linux/preempt.h> > >+#include <linux/kvm_types.h> > >+#include <linux/kvm_host.h> > > Please don't include kvm_types or kvm_host. > > >+ > >+MODULE_AUTHOR ("Dor Laor"); > >+MODULE_DESCRIPTION ("Implements guest ballooning support"); > >+MODULE_LICENSE("GPL"); > >+MODULE_VERSION("1"); > >+ > >+static int kvm_balloon_debug; > >+ > >+#define dprintk(dev, str...) if (kvm_balloon_debug) dev_dbg(dev, str) > > This can go away. I think this is useful for debugging problems on field. Fixed the previous comments, thanks. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] KVM virtio balloon driver 2008-01-14 20:03 [PATCH] KVM virtio balloon driver Marcelo Tosatti 2008-01-14 21:29 ` Anthony Liguori @ 2008-01-14 23:32 ` Rusty Russell 2008-01-15 19:01 ` Marcelo Tosatti 1 sibling, 1 reply; 24+ messages in thread From: Rusty Russell @ 2008-01-14 23:32 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: virtualization On Tuesday 15 January 2008 07:03:57 Marcelo Tosatti wrote: > Hi Rusty, > > It was agreed that the balloon driver should be merged through the > virtio tree, so here it goes. It depends on the config_changed patch > posted earlier. Hi Marcelo, Excellent! Although the main user will be kvm, it'd be nice to have a demonstration in-tree using lguest; any chance of you conjuring up an appropriate patch? If not, I can whip something up. > +config KVM_BALLOON > + tristate "KVM balloon driver (EXPERIMENTAL)" > + depends on VIRTIO_PCI > + ---help--- > + This driver provides support for ballooning memory in/out of a > + KVM paravirt guest. > + > + If unsure, say M. Please don't define "balloon" in terms of "ballooning". How about "This driver supports increasing and decreasing the amount of memory within a KVM guest." ? > + uint32_t target_nrpages; I prefer u32 within the kernel, but no big deal. > +static int send_balloon_buf(struct virtballoon *v, uint8_t cmd, > + struct balloon_buf *buf) > +{ > + struct scatterlist sg[VIRTIO_MAX_SG]; > + int err = 0; > + > + buf->hdr.cmd = cmd; > + > + sg_init_table(sg, VIRTIO_MAX_SG); > + sg_set_buf(&sg[0], &buf->hdr, sizeof(buf->hdr)); > + sg_set_buf(&sg[1], &buf->data, sizeof(buf->data)); Since these are adjacent, can't you just combine them into one sg element? Or does the kvm code rely on the sg as a boundary between header and data? > +static int kvm_balloon_inflate(struct virtballoon *v, int32_t npages) > +{ > + LIST_HEAD(tmp_list); > + struct page *page, *tmp; > + struct balloon_buf *buf; > + u32 *pfn; > + int allocated = 0; > + int i, r = -ENOMEM; > + > + buf = alloc_balloon_buf(v->vdev, GFP_KERNEL); > + if (!buf) > + return r; > + > + pfn = (u32 *)&buf->data; > + *pfn++ = (u32)npages; OK, this seems strange. You always use the data portion as an array of u32s, yet you declare it as char[200]. You have a perfectly good header, but you put the number of pages in the first element of the data array: and you hand the entire data array even though you normally only use part of it. You can intuit the number from the sg len, I suggest you do that instead. Looking at this driver, I just don't think this needs to be so complicated: it's not a high speed device, after all. Perhaps allocate one buffer up front, and just reuse that. One simple thread loop, less locking needed. > + for (i = 0; i < npages; i++) { > + page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); > + if (!page) > + goto out_free; > + list_add(&page->lru, &tmp_list); > + allocated++; > + *pfn = page_to_pfn(page); > + pfn++; > + } I think it might be simpler to defer adding to any list until after the callback is done? Helpers to add an array to the balloon array (on successful inflate, or unsuccessful deflate) and to free them (unsuccessful inflate, or successful deflate) would also make the code more readable. > + gfns_per_buf = MAX_BALLOON_PAGES_PER_OP; > + > + /* > + * Call the balloon in PAGE_SIZE*pfns-per-buf > + * iterations > + */ > + iterations = DIV_ROUND_UP(abspages, gfns_per_buf); > + dprintk(&v->vdev->dev, "%s: iterations=%d\n", __func__, iterations); > + > + for (i = 0; i < iterations; i++) { This logic seems overly complex. How about in the main thread: /* We're woken when target changes, in config_changed() */ if (wait_event_interruptible(&v->wq, (diff = atomic_read(&v->target_pages) - total_pages)) == 0) { /* If we submit inflate/deflate request, wait for it to finish. */ if (xflate(v, diff) == 0) wait_for_completion(&v->complete); } xflate just does as much as it can (up to "diff"), and then the interrupt handler adds/removes from the linked list, frees pages, etc and fires the completion. No need for locking, since there's only one pending request at any time. Cheers, Rusty. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] KVM virtio balloon driver 2008-01-14 23:32 ` Rusty Russell @ 2008-01-15 19:01 ` Marcelo Tosatti 2008-01-16 23:12 ` Dor Laor [not found] ` <1200525166.26281.103.camel@localhost.localdomain> 0 siblings, 2 replies; 24+ messages in thread From: Marcelo Tosatti @ 2008-01-15 19:01 UTC (permalink / raw) To: Rusty Russell; +Cc: Anthony Liguori, virtualization On Tue, Jan 15, 2008 at 10:32:08AM +1100, Rusty Russell wrote: > On Tuesday 15 January 2008 07:03:57 Marcelo Tosatti wrote: > > Hi Rusty, > > > > It was agreed that the balloon driver should be merged through the > > virtio tree, so here it goes. It depends on the config_changed patch > > posted earlier. > > Hi Marcelo, > > Excellent! Although the main user will be kvm, it'd be nice to have a > demonstration in-tree using lguest; any chance of you conjuring up an > appropriate patch? > > If not, I can whip something up. > > > +config KVM_BALLOON > > + tristate "KVM balloon driver (EXPERIMENTAL)" > > + depends on VIRTIO_PCI > > + ---help--- > > + This driver provides support for ballooning memory in/out of a > > + KVM paravirt guest. > > + > > + If unsure, say M. > > Please don't define "balloon" in terms of "ballooning". How about "This > driver supports increasing and decreasing the amount of memory within a KVM > guest." ? > > > + uint32_t target_nrpages; > > I prefer u32 within the kernel, but no big deal. > > > +static int send_balloon_buf(struct virtballoon *v, uint8_t cmd, > > + struct balloon_buf *buf) > > +{ > > + struct scatterlist sg[VIRTIO_MAX_SG]; > > + int err = 0; > > + > > + buf->hdr.cmd = cmd; > > + > > + sg_init_table(sg, VIRTIO_MAX_SG); > > + sg_set_buf(&sg[0], &buf->hdr, sizeof(buf->hdr)); > > + sg_set_buf(&sg[1], &buf->data, sizeof(buf->data)); > > Since these are adjacent, can't you just combine them into one sg element? Or > does the kvm code rely on the sg as a boundary between header and data? > > > +static int kvm_balloon_inflate(struct virtballoon *v, int32_t npages) > > +{ > > + LIST_HEAD(tmp_list); > > + struct page *page, *tmp; > > + struct balloon_buf *buf; > > + u32 *pfn; > > + int allocated = 0; > > + int i, r = -ENOMEM; > > + > > + buf = alloc_balloon_buf(v->vdev, GFP_KERNEL); > > + if (!buf) > > + return r; > > + > > + pfn = (u32 *)&buf->data; > > + *pfn++ = (u32)npages; > > OK, this seems strange. You always use the data portion as an array of u32s, > yet you declare it as char[200]. You have a perfectly good header, but you > put the number of pages in the first element of the data array: and you hand > the entire data array even though you normally only use part of it. > > You can intuit the number from the sg len, I suggest you do that instead. > > Looking at this driver, I just don't think this needs to be so complicated: > it's not a high speed device, after all. Perhaps allocate one buffer up > front, and just reuse that. One simple thread loop, less locking needed. > > > + for (i = 0; i < npages; i++) { > > + page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); > > + if (!page) > > + goto out_free; > > + list_add(&page->lru, &tmp_list); > > + allocated++; > > + *pfn = page_to_pfn(page); > > + pfn++; > > + } > > I think it might be simpler to defer adding to any list until after the > callback is done? Helpers to add an array to the balloon array (on > successful inflate, or unsuccessful deflate) and to free them (unsuccessful > inflate, or successful deflate) would also make the code more readable. > > > + gfns_per_buf = MAX_BALLOON_PAGES_PER_OP; > > + > > + /* > > + * Call the balloon in PAGE_SIZE*pfns-per-buf > > + * iterations > > + */ > > + iterations = DIV_ROUND_UP(abspages, gfns_per_buf); > > + dprintk(&v->vdev->dev, "%s: iterations=%d\n", __func__, iterations); > > + > > + for (i = 0; i < iterations; i++) { > > This logic seems overly complex. How about in the main thread: > > /* We're woken when target changes, in config_changed() */ > if (wait_event_interruptible(&v->wq, > (diff = atomic_read(&v->target_pages) - total_pages)) == 0) { > > /* If we submit inflate/deflate request, wait for it to finish. */ > if (xflate(v, diff) == 0) > wait_for_completion(&v->complete); > } > > xflate just does as much as it can (up to "diff"), and then the interrupt > handler adds/removes from the linked list, frees pages, etc and fires the > completion. > > No need for locking, since there's only one pending request at any time. OK, thats simpler. How about this: [PATCH] Virtio balloon driver Add a balloon driver for KVM, host<->guest communication is performed via virtio. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Index: linux-2.6-nv/drivers/virtio/Kconfig =================================================================== --- linux-2.6-nv.orig/drivers/virtio/Kconfig +++ linux-2.6-nv/drivers/virtio/Kconfig @@ -23,3 +23,13 @@ config VIRTIO_PCI If unsure, say M. +config VIRTIO_BALLOON + tristate "Virtio balloon driver (EXPERIMENTAL)" + select VIRTIO + select VIRTIO_RING + ---help--- + This driver supports increasing and decreasing the amount + of memory within a KVM guest. + + If unsure, say M. + Index: linux-2.6-nv/drivers/virtio/Makefile =================================================================== --- linux-2.6-nv.orig/drivers/virtio/Makefile +++ linux-2.6-nv/drivers/virtio/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o +obj-$(CONFIG_VIRTIO_BALLOON) += kvm_balloon.o Index: linux-2.6-nv/drivers/virtio/kvm_balloon.c =================================================================== --- /dev/null +++ linux-2.6-nv/drivers/virtio/kvm_balloon.c @@ -0,0 +1,404 @@ +/* + * KVM guest balloon driver + * + * Copyright (C) 2007, Qumranet, Inc., Dor Laor <dor.laor@qumranet.com> + * Copyright (C) 2007, Red Hat, Inc., Marcelo Tosatti <mtosatti@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#define DEBUG +#include <asm/uaccess.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/wait.h> +#include <linux/kthread.h> +#include <linux/freezer.h> +#include <linux/version.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_balloon.h> +#include <linux/preempt.h> + +MODULE_AUTHOR ("Dor Laor"); +MODULE_DESCRIPTION ("Implements guest ballooning support"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1"); + +static int kvm_balloon_debug; + +#define dprintk(dev, str...) if (kvm_balloon_debug) dev_dbg(dev, str) + +#define BALLOON_DATA_SIZE 200 + +struct balloon_buf { + struct virtio_balloon_hdr hdr; + u8 data[BALLOON_DATA_SIZE]; +}; + +#define VIRTIO_MAX_SG 2 + +struct virtballoon { + struct virtio_device *vdev; + struct virtqueue *vq; + struct task_struct *balloon_thread; + struct balloon_buf buf; + wait_queue_head_t balloon_wait; + wait_queue_head_t rmmod_wait; + struct completion complete; + atomic_t target_nrpages; + int balloon_size; + struct list_head balloon_plist; + struct list_head list; +}; + + +static int send_balloon_buf(struct virtballoon *v, u8 cmd, + struct balloon_buf *buf, unsigned int data_len) +{ + struct scatterlist sg[VIRTIO_MAX_SG]; + int err = 0; + + buf->hdr.cmd = cmd; + + sg_init_table(sg, VIRTIO_MAX_SG); + sg_set_buf(&sg[0], &buf->hdr, sizeof(buf->hdr)); + sg_set_buf(&sg[1], &buf->data, data_len); + + err = v->vq->vq_ops->add_buf(v->vq, sg, 0, 2, buf); + if (err) { + dev_printk(KERN_ERR, &v->vq->vdev->dev, "%s: add_buf err\n", + __func__); + goto out; + } + + v->vq->vq_ops->kick(v->vq); +out: + return err; +} + +static int kvm_balloon_inflate(struct virtballoon *v, int32_t npages) +{ + struct page *page; + struct balloon_buf *buf = &v->buf; + u32 *pfn; + int allocated = 0; + int i, r = -ENOMEM; + + pfn = (u32 *)&buf->data; + for (i = 0; i < npages; i++) { + page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); + if (!page) + goto out_free; + allocated++; + *pfn = page_to_pfn(page); + pfn++; + } + + r = send_balloon_buf(v, CMD_BALLOON_INFLATE, buf, + allocated * sizeof(u32)); + if (r) + goto out_free; + + dprintk(&v->vdev->dev, "%s: current balloon size=%d\n", __func__, + v->balloon_size); + return r; + +out_free: + pfn = (u32 *)&buf->data; + for (i = 0; i < allocated; i++) + __free_page(pfn_to_page(*pfn++)); + return r; +} + +static int kvm_balloon_deflate(struct virtballoon *v, int32_t npages) +{ + struct page *page; + struct balloon_buf *buf = &v->buf; + u32 *pfn; + int deallocated = 0; + int r = 0; + + if (v->balloon_size < npages) { + dev_printk(KERN_INFO, &v->vdev->dev, + "%s: balloon=%d with deflate rq=%d\n", + __func__, v->balloon_size, npages); + npages = v->balloon_size; + if (!npages) { + r = -EINVAL; + goto out; + } + } + + dprintk(&v->vdev->dev, "%s: current balloon size=%d\n", __func__, + v->balloon_size); + + pfn = (u32 *)&buf->data; + + list_for_each_entry(page, &v->balloon_plist, lru) { + *pfn++ = page_to_pfn(page); + if (++deallocated == npages) + break; + } + + r = send_balloon_buf(v, CMD_BALLOON_DEFLATE, buf, + deallocated * sizeof(u32)); +out: + return r; +} + +#define MAX_BALLOON_PAGES_PER_OP (BALLOON_DATA_SIZE/sizeof(u32)) +#define MAX_BALLOON_XFLATE_OP 1000000 + +static int kvm_balloon_xflate(struct virtballoon *v, int32_t npages) +{ + int r = -EINVAL; + int pages_in_iteration; + int abspages; + int maxpages = MAX_BALLOON_PAGES_PER_OP; + + abspages = abs(npages); + + if (abspages > MAX_BALLOON_XFLATE_OP) { + dev_printk(KERN_ERR, &v->vdev->dev, + "%s: bad npages=%d\n", __func__, npages); + goto out; + } + + dprintk(&v->vdev->dev, "%s: got %s, npages=%d\n", __func__, + (npages > 0)? "inflate":"deflate", npages); + + pages_in_iteration = min(maxpages, abspages); + + if (npages > 0) + r = kvm_balloon_inflate(v, pages_in_iteration); + else + r = kvm_balloon_deflate(v, pages_in_iteration); + +out: + /* stop the thread in case of an error */ + if (r) + atomic_set(&v->target_nrpages, totalram_pages); + return r; +} + +static void free_page_array(struct balloon_buf *buf, unsigned int npages) +{ + struct page *page; + u32 *pfn = (u32 *)&buf->data; + int i; + + for (i=0; i<npages; i++) { + page = pfn_to_page(*pfn); + list_del_init(&page->lru); + __free_page(page); + pfn++; + } +} + +static void add_page_array(struct virtballoon *v, struct balloon_buf *buf, + unsigned int npages) +{ + struct page *page; + u32 *pfn = (u32 *)&buf->data; + int i; + + for (i=0; i<npages; i++) { + page = pfn_to_page(*pfn); + v->balloon_size++; + totalram_pages--; + list_add(&page->lru, &v->balloon_plist); + pfn++; + } +} + +static void inflate_done(struct virtballoon *v, struct balloon_buf *buf, + unsigned int npages) +{ + u8 status = buf->hdr.status; + + /* inflate OK */ + if (!status) + add_page_array(v, buf, npages); + else + free_page_array(buf, npages); +} + +static void deflate_done(struct virtballoon *v, struct balloon_buf *buf, + unsigned int npages) +{ + u8 status = buf->hdr.status; + + /* deflate OK, return pages to the system */ + if (!status) { + free_page_array(buf, npages); + totalram_pages += npages; + v->balloon_size -= npages; + } + return; +} + +static int balloon_thread(void *p) +{ + struct virtballoon *v = p; + DEFINE_WAIT(wait); + + set_freezable(); + while (!kthread_should_stop()) { + int diff, r; + + try_to_freeze(); + + r = wait_event_interruptible(v->balloon_wait, + (diff = totalram_pages - + atomic_read(&v->target_nrpages)) + || kthread_should_stop()); + if (r == 0) + if (kvm_balloon_xflate(v, diff) == 0) + wait_for_completion(&v->complete); + + if (waitqueue_active(&v->rmmod_wait)) { + diff = totalram_pages - atomic_read(&v->target_nrpages); + if (!diff) + wake_up(&v->rmmod_wait); + } + } + return 0; +} + +static bool balloon_tx_done(struct virtqueue *vq) +{ + struct balloon_buf *buf; + struct virtballoon *v = vq->vdev->priv; + unsigned int len; + + buf = vq->vq_ops->get_buf(vq, &len); + if (buf) { + switch(buf->hdr.cmd) { + case CMD_BALLOON_DEFLATE: + deflate_done(v, buf, len/sizeof(u32)); + break; + case CMD_BALLOON_INFLATE: + inflate_done(v, buf, len/sizeof(u32)); + break; + default: + printk("%s: unknown cmd 0x%x\n", __func__, + buf->hdr.cmd); + } + complete(&v->complete); + } + return true; +} + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID}, + { 0 }, +}; + +static LIST_HEAD(balloon_devices); + +static int balloon_probe(struct virtio_device *vdev) +{ + int err = -EINVAL; + struct virtballoon *v; + + v = kzalloc(GFP_KERNEL, sizeof(struct virtballoon)); + if (!v) + return -ENOMEM; + + v->vq = vdev->config->find_vq(vdev, 0, balloon_tx_done); + if (IS_ERR(v->vq)) + goto out_free; + + v->vdev = vdev; + + init_waitqueue_head(&v->balloon_wait); + init_waitqueue_head(&v->rmmod_wait); + INIT_LIST_HEAD(&v->balloon_plist); + INIT_LIST_HEAD(&v->list); + init_completion(&v->complete); + atomic_set(&v->target_nrpages, totalram_pages); + + vdev->priv = v; + + v->balloon_thread = kthread_run(balloon_thread, v, "kvm_balloond"); + if (IS_ERR(v->balloon_thread)) + goto out_free_vq; + + list_add(&v->list, &balloon_devices); + dev_printk(KERN_INFO, &v->vdev->dev, "registered\n"); + + return 0; + +out_free_vq: + vdev->config->del_vq(v->vq); +out_free: + kfree(v); + return err; +} + +static void balloon_remove(struct virtio_device *vdev) +{ + struct virtballoon *v = vdev->priv; + + kthread_stop(v->balloon_thread); + vdev->config->del_vq(v->vq); + list_del(&v->list); + kfree(v); +} + +static void balloon_config_changed(struct virtio_device *vdev) +{ + struct virtballoon *v = vdev->priv; + u32 target_nrpages; + + __virtio_config_val(v->vdev, 0, &target_nrpages); + atomic_set(&v->target_nrpages, target_nrpages); + wake_up(&v->balloon_wait); + dprintk(&vdev->dev, "%s\n", __func__); +} + +static struct virtio_driver virtio_balloon = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = balloon_probe, + .remove = __devexit_p(balloon_remove), + .config_changed = balloon_config_changed, +}; + +module_param(kvm_balloon_debug, int, 0); + +static int __init kvm_balloon_init(void) +{ + return register_virtio_driver(&virtio_balloon); +} + +static void __exit kvm_balloon_exit(void) +{ + struct virtballoon *v; + + list_for_each_entry(v, &balloon_devices, list) { + while (v->balloon_size) { + DEFINE_WAIT(wait); + + atomic_add(v->balloon_size, &v->target_nrpages); + wake_up(&v->balloon_wait); + prepare_to_wait(&v->rmmod_wait, &wait, + TASK_INTERRUPTIBLE); + schedule_timeout(HZ*10); + finish_wait(&v->rmmod_wait, &wait); + } + } + + unregister_virtio_driver(&virtio_balloon); +} + +module_init(kvm_balloon_init); +module_exit(kvm_balloon_exit); Index: linux-2.6-nv/drivers/virtio/virtio_pci.c =================================================================== --- linux-2.6-nv.orig/drivers/virtio/virtio_pci.c +++ linux-2.6-nv/drivers/virtio/virtio_pci.c @@ -67,6 +67,7 @@ static struct pci_device_id virtio_pci_i { 0x1AF4, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ { 0x1AF4, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ { 0x1AF4, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ + { 0x1AF4, 0x1003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Balloon */ { 0 }, }; Index: linux-2.6-nv/include/linux/virtio_balloon.h =================================================================== --- /dev/null +++ linux-2.6-nv/include/linux/virtio_balloon.h @@ -0,0 +1,20 @@ +#ifndef _LINUX_VIRTIO_BALLOON_H +#define _LINUX_VIRTIO_BALLOON_H +#include <linux/virtio_config.h> + +#define VIRTIO_ID_BALLOON 3 + +#define CMD_BALLOON_INFLATE 0x1 +#define CMD_BALLOON_DEFLATE 0x2 + +struct virtio_balloon_hdr { + __u8 cmd; + __u8 status; +}; + +struct virtio_balloon_config +{ + __u32 target_nrpages; +}; + +#endif /* _LINUX_VIRTIO_BALLOON_H */ ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] KVM virtio balloon driver 2008-01-15 19:01 ` Marcelo Tosatti @ 2008-01-16 23:12 ` Dor Laor [not found] ` <1200525166.26281.103.camel@localhost.localdomain> 1 sibling, 0 replies; 24+ messages in thread From: Dor Laor @ 2008-01-16 23:12 UTC (permalink / raw) To: Marcelo Tosatti, kvm-devel; +Cc: Anthony Liguori, virtualization On Tue, 2008-01-15 at 17:01 -0200, Marcelo Tosatti wrote: > OK, thats simpler. How about this: > It's sure is simpler :) > [PATCH] Virtio balloon driver > > Add a balloon driver for KVM, host<->guest communication is performed > via virtio. > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> [snip] > +static void free_page_array(struct balloon_buf *buf, unsigned int npages) > +{ > + struct page *page; > + u32 *pfn = (u32 *)&buf->data; > + int i; > + > + for (i=0; i<npages; i++) { > + page = pfn_to_page(*pfn); > + list_del_init(&page->lru); > + __free_page(page); > + pfn++; In add_page_array below you update baloon_size & totalram_pages, it is need here too. > + } > +} > + > +static void add_page_array(struct virtballoon *v, struct balloon_buf *buf, > + unsigned int npages) > +{ > + struct page *page; > + u32 *pfn = (u32 *)&buf->data; > + int i; > + > + for (i=0; i<npages; i++) { > + page = pfn_to_page(*pfn); > + v->balloon_size++; > + totalram_pages--; > + list_add(&page->lru, &v->balloon_plist); > + pfn++; > + } > +} > + > +static void inflate_done(struct virtballoon *v, struct balloon_buf *buf, > + unsigned int npages) > +{ > + u8 status = buf->hdr.status; > + > + /* inflate OK */ > + if (!status) > + add_page_array(v, buf, npages); > + else > + free_page_array(buf, npages); > +} > + > +static void deflate_done(struct virtballoon *v, struct balloon_buf *buf, > + unsigned int npages) > +{ > + u8 status = buf->hdr.status; > + > + /* deflate OK, return pages to the system */ > + if (!status) { > + free_page_array(buf, npages); If there are update above then no need below. > + totalram_pages += npages; > + v->balloon_size -= npages; > + } > + return; > +} > + [snip] > +static void balloon_config_changed(struct virtio_device *vdev) > +{ > + struct virtballoon *v = vdev->priv; > + u32 target_nrpages; > + A check should be added to see if rmmod_wait is active. If it is then don't allow the monitor to inflate the balloon since we like to remove the module. Best regards, Dor > + __virtio_config_val(v->vdev, 0, &target_nrpages); > + atomic_set(&v->target_nrpages, target_nrpages); > + wake_up(&v->balloon_wait); > + dprintk(&vdev->dev, "%s\n", __func__); > +} > + > +static struct virtio_driver virtio_balloon = { > + .driver.name = KBUILD_MODNAME, > + .driver.owner = THIS_MODULE, > + .id_table = id_table, > + .probe = balloon_probe, > + .remove = __devexit_p(balloon_remove), > + .config_changed = balloon_config_changed, > +}; > + > +module_param(kvm_balloon_debug, int, 0); > + > +static int __init kvm_balloon_init(void) > +{ > + return register_virtio_driver(&virtio_balloon); > +} > + > +static void __exit kvm_balloon_exit(void) > +{ > + struct virtballoon *v; > + > + list_for_each_entry(v, &balloon_devices, list) { > + while (v->balloon_size) { > + DEFINE_WAIT(wait); > + > + atomic_add(v->balloon_size, &v->target_nrpages); > + wake_up(&v->balloon_wait); > + prepare_to_wait(&v->rmmod_wait, &wait, > + TASK_INTERRUPTIBLE); > + schedule_timeout(HZ*10); > + finish_wait(&v->rmmod_wait, &wait); > + } > + } > + > + unregister_virtio_driver(&virtio_balloon); > +} > + > +module_init(kvm_balloon_init); > +module_exit(kvm_balloon_exit); > Index: linux-2.6-nv/drivers/virtio/virtio_pci.c > =================================================================== > --- linux-2.6-nv.orig/drivers/virtio/virtio_pci.c > +++ linux-2.6-nv/drivers/virtio/virtio_pci.c > @@ -67,6 +67,7 @@ static struct pci_device_id virtio_pci_i > { 0x1AF4, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ > { 0x1AF4, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ > { 0x1AF4, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ > + { 0x1AF4, 0x1003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Balloon */ > { 0 }, > }; > > Index: linux-2.6-nv/include/linux/virtio_balloon.h > =================================================================== > --- /dev/null > +++ linux-2.6-nv/include/linux/virtio_balloon.h > @@ -0,0 +1,20 @@ > +#ifndef _LINUX_VIRTIO_BALLOON_H > +#define _LINUX_VIRTIO_BALLOON_H > +#include <linux/virtio_config.h> > + > +#define VIRTIO_ID_BALLOON 3 > + > +#define CMD_BALLOON_INFLATE 0x1 > +#define CMD_BALLOON_DEFLATE 0x2 > + > +struct virtio_balloon_hdr { > + __u8 cmd; > + __u8 status; > +}; > + > +struct virtio_balloon_config > +{ > + __u32 target_nrpages; > +}; > + > +#endif /* _LINUX_VIRTIO_BALLOON_H */ > _______________________________________________ > Virtualization mailing list > Virtualization@lists.linux-foundation.org > https://lists.linux-foundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <1200525166.26281.103.camel@localhost.localdomain>]
* [PATCH] KVM simplified virtio balloon driver [not found] ` <1200525166.26281.103.camel@localhost.localdomain> @ 2008-01-17 1:45 ` Rusty Russell [not found] ` <200801171245.59510.rusty@rustcorp.com.au> 1 sibling, 0 replies; 24+ messages in thread From: Rusty Russell @ 2008-01-17 1:45 UTC (permalink / raw) To: dor.laor; +Cc: Marcelo Tosatti, kvm-devel, Anthony Liguori, virtualization After discussions with Anthony Liguori, it seems that the virtio balloon can be made even simpler. Here's my attempt. Since the balloon requires Guest cooperation anyway, there seems little reason to force it to tell the Host when it wants to reuse a page. It can simply fault it in. Moreover, the target is best expressed in balloon size, since there is no portable way of getting the total RAM in the system. The host can do the math. Tested with a (fairly hacky) lguest patch. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> --- drivers/virtio/Kconfig | 10 + drivers/virtio/Makefile | 1 drivers/virtio/virtio_balloon.c | 230 ++++++++++++++++++++++++++++++++++++++++ include/linux/virtio_balloon.h | 13 ++ 4 files changed, 254 insertions(+) diff -r c4762959de25 drivers/virtio/Kconfig --- a/drivers/virtio/Kconfig Thu Jan 17 10:31:37 2008 +1100 +++ b/drivers/virtio/Kconfig Thu Jan 17 12:28:23 2008 +1100 @@ -23,3 +23,13 @@ config VIRTIO_PCI If unsure, say M. +config VIRTIO_BALLOON + tristate "Virtio balloon driver (EXPERIMENTAL)" + select VIRTIO + select VIRTIO_RING + ---help--- + This driver supports increasing and decreasing the amount + of memory within a KVM guest. + + If unsure, say M. + diff -r c4762959de25 drivers/virtio/Makefile --- a/drivers/virtio/Makefile Thu Jan 17 10:31:37 2008 +1100 +++ b/drivers/virtio/Makefile Thu Jan 17 12:28:23 2008 +1100 @@ -1,3 +1,4 @@ obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o +obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff -r c4762959de25 drivers/virtio/virtio_balloon.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/virtio/virtio_balloon.c Thu Jan 17 12:28:23 2008 +1100 @@ -0,0 +1,235 @@ +/* Virtio balloon implementation, inspired by Dor Loar and Marcelo + * Tosatti's implementations. + * + * Copyright 2008 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#define DEBUG +#include <linux/virtio.h> +#include <linux/virtio_balloon.h> +#include <linux/swap.h> +#include <linux/kthread.h> +#include <linux/freezer.h> + +struct virtio_balloon +{ + struct virtio_device *vdev; + struct virtqueue *vq; + + /* Where the ballooning thread waits for config to change. */ + wait_queue_head_t config_change; + + /* The thread servicing the balloon. */ + struct task_struct *thread; + + /* Waiting for host to ack the pages we released. */ + struct completion acked; + + /* The pages we've told the Host we're not using. */ + unsigned int num_pages; + struct list_head pages; + + /* The array of pfns we tell the Host about. */ + unsigned int num_pfns; + u32 pfns[256]; +}; + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID}, + { 0 }, +}; + +static void leak_balloon(struct virtio_balloon *vb, unsigned int num) +{ + struct page *page; + unsigned int i; + + /* Simply free pages, and usage will fault them back in. */ + for (i = 0; i < num; i++) { + page = list_first_entry(&vb->pages, struct page, lru); + list_del(&page->lru); + __free_page(page); + vb->num_pages--; + totalram_pages++; + } +} + +static void balloon_ack(struct virtqueue *vq) +{ + struct virtio_balloon *vb; + unsigned int len; + + vb = vq->vq_ops->get_buf(vq, &len); + if (vb) + complete(&vb->acked); +} + +static void fill_balloon(struct virtio_balloon *vb, unsigned int num) +{ + struct scatterlist sg; + + /* We can only do one array worth at a time. */ + num = min(num, ARRAY_SIZE(vb->pfns)); + + for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { + struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); + if (!page) { + if (printk_ratelimit()) + dev_printk(KERN_INFO, &vb->vdev->dev, + "Out of puff! Can't get %u pages\n", + num); + /* Sleep for at least 1/5 of a second before retry. */ + msleep(200); + break; + } + vb->pfns[vb->num_pfns] = page_to_pfn(page); + totalram_pages--; + vb->num_pages++; + list_add(&page->lru, &vb->pages); + } + + /* Didn't get any? Oh well. */ + if (vb->num_pfns == 0) + return; + + init_completion(&vb->acked); + sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); + + /* We should always be able to add one buffer to an empty queue. */ + if (vb->vq->vq_ops->add_buf(vb->vq, &sg, 1, 0, vb) != 0) + BUG(); + vb->vq->vq_ops->kick(vb->vq); + + /* When host has read buffer, this completes via balloon_ack */ + wait_for_completion(&vb->acked); +} + +static void virtballoon_changed(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + + wake_up(&vb->config_change); +} + +static inline int towards_target(struct virtio_balloon *vb) +{ + u32 v; + __virtio_config_val(vb->vdev, + offsetof(struct virtio_balloon_config, num_pages), + &v); + return v - vb->num_pages; +} + +static int balloon(void *_vballoon) +{ + struct virtio_balloon *vb = _vballoon; + + set_freezable(); + while (!kthread_should_stop()) { + int diff; + + try_to_freeze(); + wait_event_interruptible(vb->config_change, + (diff = towards_target(vb)) != 0 + || kthread_should_stop()); + if (diff > 0) + fill_balloon(vb, diff); + else if (diff < 0) + leak_balloon(vb, -diff); + } + return 0; +} + +static int virtballoon_probe(struct virtio_device *vdev) +{ + struct virtio_balloon *vb; + int err; + + vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); + if (!vb) { + err = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&vb->pages); + vb->num_pages = 0; + init_waitqueue_head(&vb->config_change); + vb->vdev = vdev; + + /* We expect one virtqueue, for output. */ + vb->vq = vdev->config->find_vq(vdev, 0, balloon_ack); + if (IS_ERR(vb->vq)) { + err = PTR_ERR(vb->vq); + goto out_free_vb; + } + + vb->thread = kthread_run(balloon, vb, "vballoon"); + if (IS_ERR(vb->thread)) { + err = PTR_ERR(vb->thread); + goto out_del_vq; + } + return 0; + +out_del_vq: + vdev->config->del_vq(vb->vq); +out_free_vb: + kfree(vb); +out: + return err; +} + +static void virtballoon_remove(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + struct page *i, *next; + + kthread_stop(vb->thread); + + /* There might be pages left in the balloon: free them. */ + list_for_each_entry_safe(i, next, &vb->pages, lru) { + vb->num_pages--; + __free_page(i); + } + BUG_ON(vb->num_pages); + + vdev->config->del_vq(vb->vq); + kfree(vb); +} + +static struct virtio_driver virtio_balloon = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtballoon_probe, + .remove = __devexit_p(virtballoon_remove), + .config_changed = virtballoon_changed, +}; + +static int __init init(void) +{ + return register_virtio_driver(&virtio_balloon); +} + +static void __exit fini(void) +{ + unregister_virtio_driver(&virtio_balloon); +} +module_init(init); +module_exit(fini); + +MODULE_DEVICE_TABLE(virtio, id_table); +MODULE_DESCRIPTION("Virtio balloon driver"); +MODULE_LICENSE("GPL"); diff -r c4762959de25 include/linux/virtio_balloon.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/linux/virtio_balloon.h Thu Jan 17 12:28:23 2008 +1100 @@ -0,0 +1,13 @@ +#ifndef _LINUX_VIRTIO_BALLOON_H +#define _LINUX_VIRTIO_BALLOON_H +#include <linux/virtio_config.h> + +/* The ID for virtio_balloon */ +#define VIRTIO_ID_BALLOON 5 + +struct virtio_balloon_config +{ + /* Number of pages host wants Guest to give up. */ + __le32 num_pages; +}; +#endif /* _LINUX_VIRTIO_BALLOON_H */ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <200801171245.59510.rusty@rustcorp.com.au>]
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver [not found] ` <200801171245.59510.rusty@rustcorp.com.au> @ 2008-01-17 2:14 ` Anthony Liguori [not found] ` <478EBA22.30301@codemonkey.ws> ` (3 subsequent siblings) 4 siblings, 0 replies; 24+ messages in thread From: Anthony Liguori @ 2008-01-17 2:14 UTC (permalink / raw) To: Rusty Russell; +Cc: Marcelo Tosatti, kvm-devel, virtualization Rusty Russell wrote: > After discussions with Anthony Liguori, it seems that the virtio > balloon can be made even simpler. Here's my attempt. > > Since the balloon requires Guest cooperation anyway, there seems > little reason to force it to tell the Host when it wants to reuse a > page. It can simply fault it in. > > Moreover, the target is best expressed in balloon size, since there is > no portable way of getting the total RAM in the system. The host can > do the math. > > Tested with a (fairly hacky) lguest patch. > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> > --- > drivers/virtio/Kconfig | 10 + > drivers/virtio/Makefile | 1 > drivers/virtio/virtio_balloon.c | 230 ++++++++++++++++++++++++++++++++++++++++ > include/linux/virtio_balloon.h | 13 ++ > 4 files changed, 254 insertions(+) > > diff -r c4762959de25 drivers/virtio/Kconfig > --- a/drivers/virtio/Kconfig Thu Jan 17 10:31:37 2008 +1100 > +++ b/drivers/virtio/Kconfig Thu Jan 17 12:28:23 2008 +1100 > @@ -23,3 +23,13 @@ config VIRTIO_PCI > > If unsure, say M. > > +config VIRTIO_BALLOON > + tristate "Virtio balloon driver (EXPERIMENTAL)" > + select VIRTIO > + select VIRTIO_RING > + ---help--- > + This driver supports increasing and decreasing the amount > + of memory within a KVM guest. > + > + If unsure, say M. > + > diff -r c4762959de25 drivers/virtio/Makefile > --- a/drivers/virtio/Makefile Thu Jan 17 10:31:37 2008 +1100 > +++ b/drivers/virtio/Makefile Thu Jan 17 12:28:23 2008 +1100 > @@ -1,3 +1,4 @@ obj-$(CONFIG_VIRTIO) += virtio.o > obj-$(CONFIG_VIRTIO) += virtio.o > obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o > +obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o > diff -r c4762959de25 drivers/virtio/virtio_balloon.c > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/drivers/virtio/virtio_balloon.c Thu Jan 17 12:28:23 2008 +1100 > @@ -0,0 +1,235 @@ > +/* Virtio balloon implementation, inspired by Dor Loar and Marcelo > + * Tosatti's implementations. > + * > + * Copyright 2008 Rusty Russell IBM Corporation > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA > + */ > +#define DEBUG > +#include <linux/virtio.h> > +#include <linux/virtio_balloon.h> > +#include <linux/swap.h> > +#include <linux/kthread.h> > +#include <linux/freezer.h> > + > +struct virtio_balloon > +{ > + struct virtio_device *vdev; > + struct virtqueue *vq; > + > + /* Where the ballooning thread waits for config to change. */ > + wait_queue_head_t config_change; > + > + /* The thread servicing the balloon. */ > + struct task_struct *thread; > + > + /* Waiting for host to ack the pages we released. */ > + struct completion acked; > + > + /* The pages we've told the Host we're not using. */ > + unsigned int num_pages; > + struct list_head pages; > + > + /* The array of pfns we tell the Host about. */ > + unsigned int num_pfns; > + u32 pfns[256]; > +}; > + > +static struct virtio_device_id id_table[] = { > + { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID}, > Could use a space after VIRTIO_DEV_ANY_ID > + { 0 }, > +}; > + > +static void leak_balloon(struct virtio_balloon *vb, unsigned int num) > +{ > + struct page *page; > + unsigned int i; > + > + /* Simply free pages, and usage will fault them back in. */ > + for (i = 0; i < num; i++) { > + page = list_first_entry(&vb->pages, struct page, lru); > + list_del(&page->lru); > + __free_page(page); > + vb->num_pages--; > + totalram_pages++; > Do we really want to modify totalram_pages in this driver? The only other place that I see that modifies it is in mm/memory_hotplug and it also modifies other things (like num_physpages). The cmm driver doesn't touch totalram_pages. It would be very useful too to write vb->num_pages into the config space whenever it was updated. This way, the host can easily keep track of where the guest is at in terms of ballooning. Regards, Anthony Liguori > + } > +} > + > +static void balloon_ack(struct virtqueue *vq) > +{ > + struct virtio_balloon *vb; > + unsigned int len; > + > + vb = vq->vq_ops->get_buf(vq, &len); > + if (vb) > + complete(&vb->acked); > +} > + > +static void fill_balloon(struct virtio_balloon *vb, unsigned int num) > +{ > + struct scatterlist sg; > + > + /* We can only do one array worth at a time. */ > + num = min(num, ARRAY_SIZE(vb->pfns)); > + > + for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { > + struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); > + if (!page) { > + if (printk_ratelimit()) > + dev_printk(KERN_INFO, &vb->vdev->dev, > + "Out of puff! Can't get %u pages\n", > + num); > + /* Sleep for at least 1/5 of a second before retry. */ > + msleep(200); > + break; > + } > + vb->pfns[vb->num_pfns] = page_to_pfn(page); > + totalram_pages--; > + vb->num_pages++; > + list_add(&page->lru, &vb->pages); > + } > + > + /* Didn't get any? Oh well. */ > + if (vb->num_pfns == 0) > + return; > + > + init_completion(&vb->acked); > + sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); > + > + /* We should always be able to add one buffer to an empty queue. */ > + if (vb->vq->vq_ops->add_buf(vb->vq, &sg, 1, 0, vb) != 0) > + BUG(); > + vb->vq->vq_ops->kick(vb->vq); > + > + /* When host has read buffer, this completes via balloon_ack */ > + wait_for_completion(&vb->acked); > +} > + > +static void virtballoon_changed(struct virtio_device *vdev) > +{ > + struct virtio_balloon *vb = vdev->priv; > + > + wake_up(&vb->config_change); > +} > + > +static inline int towards_target(struct virtio_balloon *vb) > +{ > + u32 v; > + __virtio_config_val(vb->vdev, > + offsetof(struct virtio_balloon_config, num_pages), > + &v); > + return v - vb->num_pages; > +} > + > +static int balloon(void *_vballoon) > +{ > + struct virtio_balloon *vb = _vballoon; > + > + set_freezable(); > + while (!kthread_should_stop()) { > + int diff; > + > + try_to_freeze(); > + wait_event_interruptible(vb->config_change, > + (diff = towards_target(vb)) != 0 > + || kthread_should_stop()); > + if (diff > 0) > + fill_balloon(vb, diff); > + else if (diff < 0) > + leak_balloon(vb, -diff); > + } > + return 0; > +} > + > +static int virtballoon_probe(struct virtio_device *vdev) > +{ > + struct virtio_balloon *vb; > + int err; > + > + vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); > + if (!vb) { > + err = -ENOMEM; > + goto out; > + } > + > + INIT_LIST_HEAD(&vb->pages); > + vb->num_pages = 0; > + init_waitqueue_head(&vb->config_change); > + vb->vdev = vdev; > + > + /* We expect one virtqueue, for output. */ > + vb->vq = vdev->config->find_vq(vdev, 0, balloon_ack); > + if (IS_ERR(vb->vq)) { > + err = PTR_ERR(vb->vq); > + goto out_free_vb; > + } > + > + vb->thread = kthread_run(balloon, vb, "vballoon"); > + if (IS_ERR(vb->thread)) { > + err = PTR_ERR(vb->thread); > + goto out_del_vq; > + } > + return 0; > + > +out_del_vq: > + vdev->config->del_vq(vb->vq); > +out_free_vb: > + kfree(vb); > +out: > + return err; > +} > + > +static void virtballoon_remove(struct virtio_device *vdev) > +{ > + struct virtio_balloon *vb = vdev->priv; > + struct page *i, *next; > + > + kthread_stop(vb->thread); > + > + /* There might be pages left in the balloon: free them. */ > + list_for_each_entry_safe(i, next, &vb->pages, lru) { > + vb->num_pages--; > + __free_page(i); > + } > + BUG_ON(vb->num_pages); > + > + vdev->config->del_vq(vb->vq); > + kfree(vb); > +} > + > +static struct virtio_driver virtio_balloon = { > + .driver.name = KBUILD_MODNAME, > + .driver.owner = THIS_MODULE, > + .id_table = id_table, > + .probe = virtballoon_probe, > + .remove = __devexit_p(virtballoon_remove), > + .config_changed = virtballoon_changed, > +}; > + > +static int __init init(void) > +{ > + return register_virtio_driver(&virtio_balloon); > +} > + > +static void __exit fini(void) > +{ > + unregister_virtio_driver(&virtio_balloon); > +} > +module_init(init); > +module_exit(fini); > + > +MODULE_DEVICE_TABLE(virtio, id_table); > +MODULE_DESCRIPTION("Virtio balloon driver"); > +MODULE_LICENSE("GPL"); > diff -r c4762959de25 include/linux/virtio_balloon.h > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/include/linux/virtio_balloon.h Thu Jan 17 12:28:23 2008 +1100 > @@ -0,0 +1,13 @@ > +#ifndef _LINUX_VIRTIO_BALLOON_H > +#define _LINUX_VIRTIO_BALLOON_H > +#include <linux/virtio_config.h> > + > +/* The ID for virtio_balloon */ > +#define VIRTIO_ID_BALLOON 5 > + > +struct virtio_balloon_config > +{ > + /* Number of pages host wants Guest to give up. */ > + __le32 num_pages; > +}; > +#endif /* _LINUX_VIRTIO_BALLOON_H */ > > ------------------------------------------------------------------------- > This SF.net email is sponsored by: Microsoft > Defy all challenges. Microsoft(R) Visual Studio 2008. > http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ > _______________________________________________ > kvm-devel mailing list > kvm-devel@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/kvm-devel > ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <478EBA22.30301@codemonkey.ws>]
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver [not found] ` <478EBA22.30301@codemonkey.ws> @ 2008-01-17 3:29 ` Rusty Russell [not found] ` <200801171429.32888.rusty@rustcorp.com.au> 1 sibling, 0 replies; 24+ messages in thread From: Rusty Russell @ 2008-01-17 3:29 UTC (permalink / raw) To: Anthony Liguori; +Cc: Marcelo Tosatti, kvm-devel, David Miller, virtualization On Thursday 17 January 2008 13:14:58 Anthony Liguori wrote: > Rusty Russell wrote: > > +static struct virtio_device_id id_table[] = { > > + { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID}, > > Could use a space after VIRTIO_DEV_ANY_ID Thanks, fixed. > > + __free_page(page); > > + vb->num_pages--; > > + totalram_pages++; > > Do we really want to modify totalram_pages in this driver? The only > other place that I see that modifies it is in mm/memory_hotplug and it > also modifies other things (like num_physpages). The cmm driver doesn't > touch totalram_pages. I don't think there's a standard here, they're all ad-hoc (eg. no locking) Modifying totalram_pages has the nice effect of showing up in "free" in the guest. We should probably not modify num_physpages, because some places seem to use it as an address space limit. But we should probably fix all those networking size heuristics to use totalram_pages instead of num_physpages. > It would be very useful too to write vb->num_pages into the config space > whenever it was updated. This way, the host can easily keep track of > where the guest is at in terms of ballooning. OTOH it's currently pretty obvious (and usually fatal) if the guest has trouble meeting the balloon requirements. A serious host needs a way of detecting stress in the guest anyway, which this doesn't offer until it's too late... Rusty. ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <200801171429.32888.rusty@rustcorp.com.au>]
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver [not found] ` <200801171429.32888.rusty@rustcorp.com.au> @ 2008-01-17 4:01 ` Anthony Liguori [not found] ` <478ED32A.1060803@codemonkey.ws> 1 sibling, 0 replies; 24+ messages in thread From: Anthony Liguori @ 2008-01-17 4:01 UTC (permalink / raw) To: Rusty Russell; +Cc: Marcelo Tosatti, kvm-devel, David Miller, virtualization Rusty Russell wrote: > On Thursday 17 January 2008 13:14:58 Anthony Liguori wrote: > >> Rusty Russell wrote: >> >>> +static struct virtio_device_id id_table[] = { >>> + { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID}, >>> >> Could use a space after VIRTIO_DEV_ANY_ID >> > > Thanks, fixed. > > >>> + __free_page(page); >>> + vb->num_pages--; >>> + totalram_pages++; >>> >> Do we really want to modify totalram_pages in this driver? The only >> other place that I see that modifies it is in mm/memory_hotplug and it >> also modifies other things (like num_physpages). The cmm driver doesn't >> touch totalram_pages. >> > > I don't think there's a standard here, they're all ad-hoc (eg. no locking) > Modifying totalram_pages has the nice effect of showing up in "free" in the > guest. > > We should probably not modify num_physpages, because some places seem to use > it as an address space limit. But we should probably fix all those > networking size heuristics to use totalram_pages instead of num_physpages. > > >> It would be very useful too to write vb->num_pages into the config space >> whenever it was updated. This way, the host can easily keep track of >> where the guest is at in terms of ballooning. >> > > OTOH it's currently pretty obvious (and usually fatal) if the guest has > trouble meeting the balloon requirements. A serious host needs a way of > detecting stress in the guest anyway, which this doesn't offer until it's too > late... > The question I'm interested in answering though is not if but when. I would like to know when the guest has reached it's target. And while we do get the madvise call outs, it's possible that pages have been faulted in since then. Regards, Anthony Liguori > Rusty. > ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <478ED32A.1060803@codemonkey.ws>]
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver [not found] ` <478ED32A.1060803@codemonkey.ws> @ 2008-01-17 5:59 ` Rusty Russell 2008-01-19 7:05 ` Avi Kivity 1 sibling, 0 replies; 24+ messages in thread From: Rusty Russell @ 2008-01-17 5:59 UTC (permalink / raw) To: Anthony Liguori; +Cc: Marcelo Tosatti, kvm-devel, virtualization On Thursday 17 January 2008 15:01:46 Anthony Liguori wrote: > Rusty Russell wrote: > > OTOH it's currently pretty obvious (and usually fatal) if the guest has > > trouble meeting the balloon requirements. A serious host needs a way of > > detecting stress in the guest anyway, which this doesn't offer until it's > > too late... > > The question I'm interested in answering though is not if but when. I > would like to know when the guest has reached it's target. I'm saying that it will be v. quickly in all but "too much squeeze" case. > And while we do get the madvise call outs, it's possible that pages have > been faulted in since then. But that's exactly what the balloon number *doesn't* tell you. It can tell you that it's released pages back to be used by the OS, but not whether the OS has used them. I think this number is good for debugging the balloon driver, but for anything else it's a false friend. Rusty. PS. Please cut down mails when you reply. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver [not found] ` <478ED32A.1060803@codemonkey.ws> 2008-01-17 5:59 ` Rusty Russell @ 2008-01-19 7:05 ` Avi Kivity 1 sibling, 0 replies; 24+ messages in thread From: Avi Kivity @ 2008-01-19 7:05 UTC (permalink / raw) To: Anthony Liguori; +Cc: Marcelo Tosatti, kvm-devel, David Miller, virtualization Anthony Liguori wrote: >> >> >>> It would be very useful too to write vb->num_pages into the config space >>> whenever it was updated. This way, the host can easily keep track of >>> where the guest is at in terms of ballooning. >>> >>> >> OTOH it's currently pretty obvious (and usually fatal) if the guest has >> trouble meeting the balloon requirements. A serious host needs a way of >> detecting stress in the guest anyway, which this doesn't offer until it's too >> late... >> >> > > The question I'm interested in answering though is not if but when. I > would like to know when the guest has reached it's target. > > Yes. We don't want to activate the RSS controller immediately; give the guest some time to adjust voluntarily. -- Any sufficiently difficult bug is indistinguishable from a feature. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] KVM simplified virtio balloon driver [not found] ` <200801171245.59510.rusty@rustcorp.com.au> 2008-01-17 2:14 ` [kvm-devel] " Anthony Liguori [not found] ` <478EBA22.30301@codemonkey.ws> @ 2008-01-17 9:32 ` Christian Borntraeger [not found] ` <200801171032.26198.borntraeger@de.ibm.com> 2008-01-19 7:02 ` [kvm-devel] " Avi Kivity 4 siblings, 0 replies; 24+ messages in thread From: Christian Borntraeger @ 2008-01-17 9:32 UTC (permalink / raw) To: virtualization Cc: Anthony Liguori, Marcelo Tosatti, kvm-devel, Martin Schwidefsky Am Donnerstag, 17. Januar 2008 schrieb Rusty Russell: > Since the balloon requires Guest cooperation anyway, there seems > little reason to force it to tell the Host when it wants to reuse a > page. It can simply fault it in. Yes, thats what we do in the s390 cmm driver. All in all the driver has similarities with cmm. I dont know, if we can consolidate some code. Besides the hypervisor, we have a additional user interface: /proc/sys/vm/. The root user can specify the amount of pages in the balloon via /proc/sys/vm/cmm_pages. Another idea: Martin added an oom notifier to the cmm driver. Before the oom-killer kicks in cmm will try to free 256 pages. I think your virtio balloon driver should do the same - it seems to be the correct tradeoff. Christian ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <200801171032.26198.borntraeger@de.ibm.com>]
* Re: [PATCH] KVM simplified virtio balloon driver [not found] ` <200801171032.26198.borntraeger@de.ibm.com> @ 2008-01-17 10:25 ` Martin Schwidefsky [not found] ` <1200565558.22385.13.camel@localhost> 1 sibling, 0 replies; 24+ messages in thread From: Martin Schwidefsky @ 2008-01-17 10:25 UTC (permalink / raw) To: Christian Borntraeger Cc: Anthony Liguori, Marcelo Tosatti, virtualization, kvm-devel On Thu, 2008-01-17 at 10:32 +0100, Christian Borntraeger wrote: > Am Donnerstag, 17. Januar 2008 schrieb Rusty Russell: > > Since the balloon requires Guest cooperation anyway, there seems > > little reason to force it to tell the Host when it wants to reuse a > > page. It can simply fault it in. > Yes, thats what we do in the s390 cmm driver. > > All in all the driver has similarities with cmm. I dont know, if we can > consolidate some code. > Besides the hypervisor, we have a additional user interface: /proc/sys/vm/. > The root user can specify the amount of pages in the balloon > via /proc/sys/vm/cmm_pages. > > Another idea: Martin added an oom notifier to the cmm driver. Before the > oom-killer kicks in cmm will try to free 256 pages. I think your virtio > balloon driver should do the same - it seems to be the correct tradeoff. Nod, you definitly want to add an oom notifier. If 256 pages is the correct number of pages to free is debatable. We have seen long delays for a process that quickly eats up memory if there are lots of pages in the balloon. The problem is that the memory management tries hard to find memory until it decides to oom kill a process, only to be stopped in the last second by the oom notifier. The 1MB is quickly eaten up again so the whole things starts again. The profile of such a scenario shows that almost all cpu is burned in the page reclaim code. -- blue skies, Martin. "Reality continues to ruin my life." - Calvin. ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <1200565558.22385.13.camel@localhost>]
* Re: [PATCH] KVM simplified virtio balloon driver [not found] ` <1200565558.22385.13.camel@localhost> @ 2008-01-17 11:40 ` Dor Laor [not found] ` <1200570025.26281.141.camel@localhost.localdomain> 1 sibling, 0 replies; 24+ messages in thread From: Dor Laor @ 2008-01-17 11:40 UTC (permalink / raw) To: schwidefsky Cc: Anthony Liguori, Marcelo Tosatti, virtualization, Christian Borntraeger, kvm-devel On Thu, 2008-01-17 at 11:25 +0100, Martin Schwidefsky wrote: > > > > Another idea: Martin added an oom notifier to the cmm driver. Before the > > oom-killer kicks in cmm will try to free 256 pages. I think your virtio > > balloon driver should do the same - it seems to be the correct tradeoff. > > Nod, you definitly want to add an oom notifier. If 256 pages is the > correct number of pages to free is debatable. We have seen long delays > for a process that quickly eats up memory if there are lots of pages in > the balloon. The problem is that the memory management tries hard to > find memory until it decides to oom kill a process, only to be stopped > in the last second by the oom notifier. The 1MB is quickly eaten up > again so the whole things starts again. The profile of such a scenario > shows that almost all cpu is burned in the page reclaim code. > Seconded, in that case we can add a config space notification from the guest to the host that will be triggered by the oom. The host will get this notification and will decide whether to allow the guest to deflate the balloon or to keep the current balloon size because the whole host is over committed. Regards, Dor. ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <1200570025.26281.141.camel@localhost.localdomain>]
* Re: [PATCH] KVM simplified virtio balloon driver [not found] ` <1200570025.26281.141.camel@localhost.localdomain> @ 2008-01-17 13:56 ` Anthony Liguori 2008-01-17 23:01 ` Dor Laor [not found] ` <1200610891.26281.171.camel@localhost.localdomain> 0 siblings, 2 replies; 24+ messages in thread From: Anthony Liguori @ 2008-01-17 13:56 UTC (permalink / raw) To: dor.laor Cc: Marcelo Tosatti, virtualization, Christian Borntraeger, schwidefsky, kvm-devel Dor Laor wrote: > On Thu, 2008-01-17 at 11:25 +0100, Martin Schwidefsky wrote: > >>> Another idea: Martin added an oom notifier to the cmm driver. Before the >>> oom-killer kicks in cmm will try to free 256 pages. I think your virtio >>> balloon driver should do the same - it seems to be the correct tradeoff. >>> >> Nod, you definitly want to add an oom notifier. If 256 pages is the >> correct number of pages to free is debatable. We have seen long delays >> for a process that quickly eats up memory if there are lots of pages in >> the balloon. The problem is that the memory management tries hard to >> find memory until it decides to oom kill a process, only to be stopped >> in the last second by the oom notifier. The 1MB is quickly eaten up >> again so the whole things starts again. The profile of such a scenario >> shows that almost all cpu is burned in the page reclaim code. >> >> > > Seconded, in that case we can add a config space notification from the > guest to the host that will be triggered by the oom. > The host will get this notification and will decide whether to allow the > guest to deflate the balloon or to keep the current balloon size because > the whole host is over committed. > The host doesn't decide whether to allow the guest to deflate. Virtual memory size and resident memory side are independent in KVM. The host decides what the RSS is but the guest is free to determine it's VSS. The host provides hints to the guest about it's RSS size (via ballooning) so the guest can optimize it's VSS size. If guest chooses to make it's VSS size to large, it will only hurt itself (by being forced to swap by the host). Regards, Anthony Liguori > Regards, > Dor. > > ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] KVM simplified virtio balloon driver 2008-01-17 13:56 ` Anthony Liguori @ 2008-01-17 23:01 ` Dor Laor [not found] ` <1200610891.26281.171.camel@localhost.localdomain> 1 sibling, 0 replies; 24+ messages in thread From: Dor Laor @ 2008-01-17 23:01 UTC (permalink / raw) To: Anthony Liguori Cc: Marcelo Tosatti, virtualization, Christian Borntraeger, schwidefsky, kvm-devel On Thu, 2008-01-17 at 07:56 -0600, Anthony Liguori wrote: > Dor Laor wrote: > > On Thu, 2008-01-17 at 11:25 +0100, Martin Schwidefsky wrote: > > > >>> Another idea: Martin added an oom notifier to the cmm driver. Before the > >>> oom-killer kicks in cmm will try to free 256 pages. I think your virtio > >>> balloon driver should do the same - it seems to be the correct tradeoff. > >>> > >> Nod, you definitly want to add an oom notifier. If 256 pages is the > >> correct number of pages to free is debatable. We have seen long delays > >> for a process that quickly eats up memory if there are lots of pages in > >> the balloon. The problem is that the memory management tries hard to > >> find memory until it decides to oom kill a process, only to be stopped > >> in the last second by the oom notifier. The 1MB is quickly eaten up > >> again so the whole things starts again. The profile of such a scenario > >> shows that almost all cpu is burned in the page reclaim code. > >> > >> > > > > Seconded, in that case we can add a config space notification from the > > guest to the host that will be triggered by the oom. > > The host will get this notification and will decide whether to allow the > > guest to deflate the balloon or to keep the current balloon size because > > the whole host is over committed. > > > > The host doesn't decide whether to allow the guest to deflate. Virtual > memory size and resident memory side are independent in KVM. The host > decides what the RSS is but the guest is free to determine it's VSS. > The host provides hints to the guest about it's RSS size (via > ballooning) so the guest can optimize it's VSS size. If guest chooses > to make it's VSS size to large, it will only hurt itself (by being > forced to swap by the host). ok, this is true when the new memory controller targeted for the next kernel.In that case I more then agree. Nevertheless, the guest should pass the host data about it's current memory usage (working set size, oom triggering, etc) so the host would change the rss limit dynamically. Thanks, Dor ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <1200610891.26281.171.camel@localhost.localdomain>]
* Re: [PATCH] KVM simplified virtio balloon driver [not found] ` <1200610891.26281.171.camel@localhost.localdomain> @ 2008-01-17 23:35 ` Anthony Liguori 0 siblings, 0 replies; 24+ messages in thread From: Anthony Liguori @ 2008-01-17 23:35 UTC (permalink / raw) To: dor.laor Cc: Marcelo Tosatti, virtualization, Christian Borntraeger, schwidefsky, kvm-devel Dor Laor wrote: > On Thu, 2008-01-17 at 07:56 -0600, Anthony Liguori wrote: > >> Dor Laor wrote: >> >>> On Thu, 2008-01-17 at 11:25 +0100, Martin Schwidefsky wrote: >>> >>> >>>>> Another idea: Martin added an oom notifier to the cmm driver. Before the >>>>> oom-killer kicks in cmm will try to free 256 pages. I think your virtio >>>>> balloon driver should do the same - it seems to be the correct tradeoff. >>>>> >>>>> >>>> Nod, you definitly want to add an oom notifier. If 256 pages is the >>>> correct number of pages to free is debatable. We have seen long delays >>>> for a process that quickly eats up memory if there are lots of pages in >>>> the balloon. The problem is that the memory management tries hard to >>>> find memory until it decides to oom kill a process, only to be stopped >>>> in the last second by the oom notifier. The 1MB is quickly eaten up >>>> again so the whole things starts again. The profile of such a scenario >>>> shows that almost all cpu is burned in the page reclaim code. >>>> >>>> >>>> >>> Seconded, in that case we can add a config space notification from the >>> guest to the host that will be triggered by the oom. >>> The host will get this notification and will decide whether to allow the >>> guest to deflate the balloon or to keep the current balloon size because >>> the whole host is over committed. >>> >>> >> The host doesn't decide whether to allow the guest to deflate. Virtual >> memory size and resident memory side are independent in KVM. The host >> decides what the RSS is but the guest is free to determine it's VSS. >> The host provides hints to the guest about it's RSS size (via >> ballooning) so the guest can optimize it's VSS size. If guest chooses >> to make it's VSS size to large, it will only hurt itself (by being >> forced to swap by the host). >> > > ok, this is true when the new memory controller targeted for the next > kernel.In that case I more then agree. > Nevertheless, the guest should pass the host data about it's current > memory usage (working set size, oom triggering, etc) so the host would > change the rss limit dynamically. > What is the host going to do if the guest is OOMing? There is no way that the host can *verify* that the guest is OOMing so nothing prevents the guest from lying about the fact that it's OOMing in an attempt to get more memory. More importantly though, the RSS limit has nothing to do with the guest OOMing. Reducing the guest's RSS limit will result in the guest swapping, not OOMing. Only the guest can prevent itself from OOMing by free'ing pages that were previously ballooned and that is what is being recommended here. Regards, Anthony Liguori > Thanks, > Dor > > ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver [not found] ` <200801171245.59510.rusty@rustcorp.com.au> ` (3 preceding siblings ...) [not found] ` <200801171032.26198.borntraeger@de.ibm.com> @ 2008-01-19 7:02 ` Avi Kivity 2008-01-19 22:37 ` Anthony Liguori ` (2 more replies) 4 siblings, 3 replies; 24+ messages in thread From: Avi Kivity @ 2008-01-19 7:02 UTC (permalink / raw) To: Rusty Russell; +Cc: Marcelo Tosatti, kvm-devel, virtualization Rusty Russell wrote: > After discussions with Anthony Liguori, it seems that the virtio > balloon can be made even simpler. Here's my attempt. > > Since the balloon requires Guest cooperation anyway, there seems > little reason to force it to tell the Host when it wants to reuse a > page. It can simply fault it in. > > Faulting is synchronous, while deflating is (or can be made) asynchronous. If the host needs to do some work to get the memory, the guest will be slowed down considerably. If we have explicit deflate, the host can call madvise(MADV_WILLNEED) or actually touch the pages before the guest accesses them. -- Any sufficiently difficult bug is indistinguishable from a feature. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver 2008-01-19 7:02 ` [kvm-devel] " Avi Kivity @ 2008-01-19 22:37 ` Anthony Liguori 2008-01-19 22:37 ` Anthony Liguori [not found] ` <47927BB7.7060805@codemonkey.ws> 2 siblings, 0 replies; 24+ messages in thread From: Anthony Liguori @ 2008-01-19 22:37 UTC (permalink / raw) To: Avi Kivity; +Cc: Marcelo Tosatti, kvm-devel, virtualization Avi Kivity wrote: > Rusty Russell wrote: >> After discussions with Anthony Liguori, it seems that the virtio >> balloon can be made even simpler. Here's my attempt. >> >> Since the balloon requires Guest cooperation anyway, there seems >> little reason to force it to tell the Host when it wants to reuse a >> page. It can simply fault it in. >> >> > > Faulting is synchronous, while deflating is (or can be made) > asynchronous. If the host needs to do some work to get the memory, the > guest will be slowed down considerably. Good point. Basically, we have two page hinting operations which roughly correspond to madvise(MADV_DONTNEED) and madvise(MADV_WILLNEED). Regards, Anthony Liguori > If we have explicit deflate, the host can call madvise(MADV_WILLNEED) or > actually touch the pages before the guest accesses them. > > ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver 2008-01-19 7:02 ` [kvm-devel] " Avi Kivity 2008-01-19 22:37 ` Anthony Liguori @ 2008-01-19 22:37 ` Anthony Liguori [not found] ` <47927BB7.7060805@codemonkey.ws> 2 siblings, 0 replies; 24+ messages in thread From: Anthony Liguori @ 2008-01-19 22:37 UTC (permalink / raw) To: virtualization; +Cc: Marcelo Tosatti, kvm-devel, virtualization Avi Kivity wrote: > Rusty Russell wrote: >> After discussions with Anthony Liguori, it seems that the virtio >> balloon can be made even simpler. Here's my attempt. >> >> Since the balloon requires Guest cooperation anyway, there seems >> little reason to force it to tell the Host when it wants to reuse a >> page. It can simply fault it in. >> >> > > Faulting is synchronous, while deflating is (or can be made) > asynchronous. If the host needs to do some work to get the memory, the > guest will be slowed down considerably. Good point. Basically, we have two page hinting operations which roughly correspond to madvise(MADV_DONTNEED) and madvise(MADV_WILLNEED). Regards, Anthony Liguori > If we have explicit deflate, the host can call madvise(MADV_WILLNEED) or > actually touch the pages before the guest accesses them. > > ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <47927BB7.7060805@codemonkey.ws>]
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver [not found] ` <47927BB7.7060805@codemonkey.ws> @ 2008-01-20 0:24 ` Marcelo Tosatti [not found] ` <20080120002433.GA6880@dmt> 1 sibling, 0 replies; 24+ messages in thread From: Marcelo Tosatti @ 2008-01-20 0:24 UTC (permalink / raw) To: Anthony Liguori; +Cc: Marcelo Tosatti, kvm-devel, virtualization On Sat, Jan 19, 2008 at 04:37:43PM -0600, Anthony Liguori wrote: > Avi Kivity wrote: > >Rusty Russell wrote: > >>After discussions with Anthony Liguori, it seems that the virtio > >>balloon can be made even simpler. Here's my attempt. > >> > >>Since the balloon requires Guest cooperation anyway, there seems > >>little reason to force it to tell the Host when it wants to reuse a > >>page. It can simply fault it in. > >> > >> > > > >Faulting is synchronous, while deflating is (or can be made) > >asynchronous. If the host needs to do some work to get the memory, the > >guest will be slowed down considerably. > > Good point. Basically, we have two page hinting operations which > roughly correspond to madvise(MADV_DONTNEED) and madvise(MADV_WILLNEED). Also, the simplified driver does not handle errors at all. I don't think that assuming madvise() can't fail is a good thing. ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <20080120002433.GA6880@dmt>]
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver [not found] ` <20080120002433.GA6880@dmt> @ 2008-01-20 0:40 ` Anthony Liguori [not found] ` <47929884.2010908@codemonkey.ws> 1 sibling, 0 replies; 24+ messages in thread From: Anthony Liguori @ 2008-01-20 0:40 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: kvm-devel, virtualization Marcelo Tosatti wrote: > On Sat, Jan 19, 2008 at 04:37:43PM -0600, Anthony Liguori wrote: > >> Avi Kivity wrote: >> >>> Rusty Russell wrote: >>> >>>> After discussions with Anthony Liguori, it seems that the virtio >>>> balloon can be made even simpler. Here's my attempt. >>>> >>>> Since the balloon requires Guest cooperation anyway, there seems >>>> little reason to force it to tell the Host when it wants to reuse a >>>> page. It can simply fault it in. >>>> >>>> >>>> >>> Faulting is synchronous, while deflating is (or can be made) >>> asynchronous. If the host needs to do some work to get the memory, the >>> guest will be slowed down considerably. >>> >> Good point. Basically, we have two page hinting operations which >> roughly correspond to madvise(MADV_DONTNEED) and madvise(MADV_WILLNEED). >> > > Also, the simplified driver does not handle errors at all. > > I don't think that assuming madvise() can't fail is a good thing. > I don't think the host should be communicating to the guest if madvise() fails. The notification is a hint to the host. The guest doesn't know what the host is doing in response to that hint. Regards, Anthony Liguori ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <47929884.2010908@codemonkey.ws>]
* Re: [kvm-devel] [PATCH] KVM simplified virtio balloon driver [not found] ` <47929884.2010908@codemonkey.ws> @ 2008-01-24 1:58 ` Rusty Russell 0 siblings, 0 replies; 24+ messages in thread From: Rusty Russell @ 2008-01-24 1:58 UTC (permalink / raw) To: Anthony Liguori; +Cc: Marcelo Tosatti, kvm-devel, virtualization Here's the latest. Hope this works for everyone (putting in a oom handler or shrinker requires a lock, but can be done quite easily). Untested. === After discussions with Anthony Liguori, it seems that the virtio balloon can be made even simpler. Here's my attempt. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> --- drivers/virtio/Kconfig | 10 + drivers/virtio/Makefile | 1 drivers/virtio/virtio_balloon.c | 284 ++++++++++++++++++++++++++++++++++++++++ include/linux/virtio_balloon.h | 18 ++ 4 files changed, 313 insertions(+) diff -r e977a88ffefd drivers/virtio/Kconfig --- a/drivers/virtio/Kconfig Thu Jan 24 12:34:05 2008 +1100 +++ b/drivers/virtio/Kconfig Thu Jan 24 12:40:07 2008 +1100 @@ -23,3 +23,13 @@ config VIRTIO_PCI If unsure, say M. +config VIRTIO_BALLOON + tristate "Virtio balloon driver (EXPERIMENTAL)" + select VIRTIO + select VIRTIO_RING + ---help--- + This driver supports increasing and decreasing the amount + of memory within a KVM guest. + + If unsure, say M. + diff -r e977a88ffefd drivers/virtio/Makefile --- a/drivers/virtio/Makefile Thu Jan 24 12:34:05 2008 +1100 +++ b/drivers/virtio/Makefile Thu Jan 24 12:40:07 2008 +1100 @@ -1,3 +1,4 @@ obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o +obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff -r e977a88ffefd drivers/virtio/virtio_balloon.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/virtio/virtio_balloon.c Thu Jan 24 12:40:07 2008 +1100 @@ -0,0 +1,284 @@ +/* Virtio balloon implementation, inspired by Dor Loar and Marcelo + * Tosatti's implementations. + * + * Copyright 2008 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#define DEBUG +#include <linux/virtio.h> +#include <linux/virtio_balloon.h> +#include <linux/swap.h> +#include <linux/kthread.h> +#include <linux/freezer.h> + +struct virtio_balloon +{ + struct virtio_device *vdev; + struct virtqueue *inflate_vq, *deflate_vq; + + /* Where the ballooning thread waits for config to change. */ + wait_queue_head_t config_change; + + /* The thread servicing the balloon. */ + struct task_struct *thread; + + /* Waiting for host to ack the pages we released. */ + struct completion acked; + + /* Do we have to tell Host *before* we reuse pages? */ + bool tell_host_first; + + /* The pages we've told the Host we're not using. */ + unsigned int num_pages; + struct list_head pages; + + /* The array of pfns we tell the Host about. */ + unsigned int num_pfns; + u32 pfns[256]; +}; + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static void balloon_ack(struct virtqueue *vq) +{ + struct virtio_balloon *vb; + unsigned int len; + + vb = vq->vq_ops->get_buf(vq, &len); + if (vb) + complete(&vb->acked); +} + +static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) +{ + struct scatterlist sg; + + sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); + + init_completion(&vb->acked); + + /* We should always be able to add one buffer to an empty queue. */ + if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) != 0) + BUG(); + vq->vq_ops->kick(vq); + + /* When host has read buffer, this completes via balloon_ack */ + wait_for_completion(&vb->acked); +} + +static void fill_balloon(struct virtio_balloon *vb, unsigned int num) +{ + /* We can only do one array worth at a time. */ + num = min(num, ARRAY_SIZE(vb->pfns)); + + for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { + struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); + if (!page) { + if (printk_ratelimit()) + dev_printk(KERN_INFO, &vb->vdev->dev, + "Out of puff! Can't get %u pages\n", + num); + /* Sleep for at least 1/5 of a second before retry. */ + msleep(200); + break; + } + vb->pfns[vb->num_pfns] = page_to_pfn(page); + totalram_pages--; + vb->num_pages++; + list_add(&page->lru, &vb->pages); + } + + /* Didn't get any? Oh well. */ + if (vb->num_pfns == 0) + return; + + tell_host(vb, vb->inflate_vq); +} + +static void release_pages_by_pfn(const u32 pfns[], unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + __free_page(pfn_to_page(pfns[i])); + totalram_pages++; + } +} + +static void leak_balloon(struct virtio_balloon *vb, unsigned int num) +{ + struct page *page; + + /* We can only do one array worth at a time. */ + num = min(num, ARRAY_SIZE(vb->pfns)); + + for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { + page = list_first_entry(&vb->pages, struct page, lru); + list_del(&page->lru); + vb->pfns[vb->num_pfns] = page_to_pfn(page); + vb->num_pages--; + } + + if (vb->tell_host_first) { + tell_host(vb, vb->deflate_vq); + release_pages_by_pfn(vb->pfns, vb->num_pfns); + } else { + release_pages_by_pfn(vb->pfns, vb->num_pfns); + tell_host(vb, vb->deflate_vq); + } +} + +static void virtballoon_changed(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + + wake_up(&vb->config_change); +} + +static inline int towards_target(struct virtio_balloon *vb) +{ + u32 v; + __virtio_config_val(vb->vdev, + offsetof(struct virtio_balloon_config, num_pages), + &v); + return v - vb->num_pages; +} + +static void update_balloon_size(struct virtio_balloon *vb) +{ + __le32 actual = cpu_to_le32(vb->num_pages); + + vb->vdev->config->set(vb->vdev, + offsetof(struct virtio_balloon_config, actual), + &actual, sizeof(actual)); +} + +static int balloon(void *_vballoon) +{ + struct virtio_balloon *vb = _vballoon; + + set_freezable(); + while (!kthread_should_stop()) { + int diff; + + try_to_freeze(); + wait_event_interruptible(vb->config_change, + (diff = towards_target(vb)) != 0 + || kthread_should_stop()); + if (diff > 0) + fill_balloon(vb, diff); + else if (diff < 0) + leak_balloon(vb, -diff); + update_balloon_size(vb); + } + return 0; +} + +static int virtballoon_probe(struct virtio_device *vdev) +{ + struct virtio_balloon *vb; + int err; + + vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); + if (!vb) { + err = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&vb->pages); + vb->num_pages = 0; + init_waitqueue_head(&vb->config_change); + vb->vdev = vdev; + + /* We expect two virtqueues. */ + vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack); + if (IS_ERR(vb->inflate_vq)) { + err = PTR_ERR(vb->inflate_vq); + goto out_free_vb; + } + + vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack); + if (IS_ERR(vb->deflate_vq)) { + err = PTR_ERR(vb->deflate_vq); + goto out_del_inflate_vq; + } + + vb->thread = kthread_run(balloon, vb, "vballoon"); + if (IS_ERR(vb->thread)) { + err = PTR_ERR(vb->thread); + goto out_del_deflate_vq; + } + + vb->tell_host_first + = vdev->config->feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); + + return 0; + +out_del_deflate_vq: + vdev->config->del_vq(vb->deflate_vq); +out_del_inflate_vq: + vdev->config->del_vq(vb->inflate_vq); +out_free_vb: + kfree(vb); +out: + return err; +} + +static void virtballoon_remove(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + + kthread_stop(vb->thread); + + /* There might be pages left in the balloon: free them. */ + while (vb->num_pages) + leak_balloon(vb, vb->num_pages); + + /* Now we reset the device so we can clean up the queues. */ + vdev->config->reset(vdev); + + vdev->config->del_vq(vb->deflate_vq); + vdev->config->del_vq(vb->inflate_vq); + kfree(vb); +} + +static struct virtio_driver virtio_balloon = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtballoon_probe, + .remove = __devexit_p(virtballoon_remove), + .config_changed = virtballoon_changed, +}; + +static int __init init(void) +{ + return register_virtio_driver(&virtio_balloon); +} + +static void __exit fini(void) +{ + unregister_virtio_driver(&virtio_balloon); +} +module_init(init); +module_exit(fini); + +MODULE_DEVICE_TABLE(virtio, id_table); +MODULE_DESCRIPTION("Virtio balloon driver"); +MODULE_LICENSE("GPL"); diff -r e977a88ffefd include/linux/virtio_balloon.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/linux/virtio_balloon.h Thu Jan 24 12:40:07 2008 +1100 @@ -0,0 +1,18 @@ +#ifndef _LINUX_VIRTIO_BALLOON_H +#define _LINUX_VIRTIO_BALLOON_H +#include <linux/virtio_config.h> + +/* The ID for virtio_balloon */ +#define VIRTIO_ID_BALLOON 5 + +/* The feature bitmap for virtio balloon */ +#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ + +struct virtio_balloon_config +{ + /* Number of pages host wants Guest to give up. */ + __le32 num_pages; + /* Number of pages we've actually got in balloon. */ + __le32 actual; +}; +#endif /* _LINUX_VIRTIO_BALLOON_H */ ^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2008-01-24 1:58 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-14 20:03 [PATCH] KVM virtio balloon driver Marcelo Tosatti
2008-01-14 21:29 ` Anthony Liguori
2008-01-15 14:22 ` Marcelo Tosatti
2008-01-14 23:32 ` Rusty Russell
2008-01-15 19:01 ` Marcelo Tosatti
2008-01-16 23:12 ` Dor Laor
[not found] ` <1200525166.26281.103.camel@localhost.localdomain>
2008-01-17 1:45 ` [PATCH] KVM simplified " Rusty Russell
[not found] ` <200801171245.59510.rusty@rustcorp.com.au>
2008-01-17 2:14 ` [kvm-devel] " Anthony Liguori
[not found] ` <478EBA22.30301@codemonkey.ws>
2008-01-17 3:29 ` Rusty Russell
[not found] ` <200801171429.32888.rusty@rustcorp.com.au>
2008-01-17 4:01 ` Anthony Liguori
[not found] ` <478ED32A.1060803@codemonkey.ws>
2008-01-17 5:59 ` Rusty Russell
2008-01-19 7:05 ` Avi Kivity
2008-01-17 9:32 ` Christian Borntraeger
[not found] ` <200801171032.26198.borntraeger@de.ibm.com>
2008-01-17 10:25 ` Martin Schwidefsky
[not found] ` <1200565558.22385.13.camel@localhost>
2008-01-17 11:40 ` Dor Laor
[not found] ` <1200570025.26281.141.camel@localhost.localdomain>
2008-01-17 13:56 ` Anthony Liguori
2008-01-17 23:01 ` Dor Laor
[not found] ` <1200610891.26281.171.camel@localhost.localdomain>
2008-01-17 23:35 ` Anthony Liguori
2008-01-19 7:02 ` [kvm-devel] " Avi Kivity
2008-01-19 22:37 ` Anthony Liguori
2008-01-19 22:37 ` Anthony Liguori
[not found] ` <47927BB7.7060805@codemonkey.ws>
2008-01-20 0:24 ` Marcelo Tosatti
[not found] ` <20080120002433.GA6880@dmt>
2008-01-20 0:40 ` Anthony Liguori
[not found] ` <47929884.2010908@codemonkey.ws>
2008-01-24 1:58 ` Rusty Russell
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).