* [PATCH] QEMU support for virtio balloon driver
@ 2008-01-24 21:23 Anthony Liguori
[not found] ` <1201209786831-git-send-email-aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 14+ messages in thread
From: Anthony Liguori @ 2008-01-24 21:23 UTC (permalink / raw)
To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Cc: Anthony Liguori, Andrea Arcangeli, Avi Kivity
This patch adds support to QEMU for Rusty's recently introduce virtio balloon
driver. The user-facing portions of this are the introduction of a "balloon"
and "info balloon" command in the monitor.
I think using madvise unconditionally is okay but I am not sure. If madvise
is called on memory that is essentially locked (which is what pre-MM notifiers
is like) then it should just be a nop right?
Signed-off-by: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index bb7be0f..d6b4f46 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -464,7 +464,7 @@ VL_OBJS += rtl8139.o
VL_OBJS+= hypercall.o
# virtio devices
-VL_OBJS += virtio.o virtio-net.o virtio-blk.o
+VL_OBJS += virtio.o virtio-net.o virtio-blk.o virtio-balloon.o
ifeq ($(TARGET_BASE_ARCH), i386)
# Hardware support
diff --git a/qemu/balloon.h b/qemu/balloon.h
new file mode 100644
index 0000000..ffce1fa
--- /dev/null
+++ b/qemu/balloon.h
@@ -0,0 +1,14 @@
+#ifndef _QEMU_BALLOON_H
+#define _QEMU_BALLOON_H
+
+#include "cpu-defs.h"
+
+typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+
+void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
+
+void qemu_balloon(ram_addr_t target);
+
+ram_addr_t qemu_balloon_status(void);
+
+#endif
diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
index 652b263..6c8d907 100644
--- a/qemu/hw/pc.c
+++ b/qemu/hw/pc.c
@@ -1122,6 +1122,9 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
}
}
+ if (pci_enabled)
+ virtio_balloon_init(pci_bus, 0x1AF4, 0x1002);
+
if (extboot_drive != -1) {
DriveInfo *info = &drives_table[extboot_drive];
int cyls, heads, secs;
diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h
index f640395..1899c11 100644
--- a/qemu/hw/pc.h
+++ b/qemu/hw/pc.h
@@ -152,6 +152,9 @@ void virtio_net_poll(void);
void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
BlockDriverState *bs);
+/* virtio-balloon.h */
+void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device);
+
/* extboot.c */
void extboot_init(BlockDriverState *bs, int cmd);
diff --git a/qemu/hw/virtio-balloon.c b/qemu/hw/virtio-balloon.c
new file mode 100644
index 0000000..1b5a689
--- /dev/null
+++ b/qemu/hw/virtio-balloon.c
@@ -0,0 +1,142 @@
+/*
+ * Virtio Block Device
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "virtio.h"
+#include "block.h"
+#include "pc.h"
+#include "balloon.h"
+#include "sysemu.h"
+
+#include <sys/mman.h>
+
+/* from Linux's linux/virtio_blk.h */
+
+/* The ID for virtio_balloon */
+#define VIRTIO_ID_BALLOON 5
+
+/* The feature bitmap for virtio balloon */
+#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
+
+struct virtio_balloon_config
+{
+ /* Number of pages host wants Guest to give up. */
+ uint32_t num_pages;
+ /* Number of pages we've actually got in balloon. */
+ uint32_t actual;
+};
+
+typedef struct VirtIOBalloon
+{
+ VirtIODevice vdev;
+ VirtQueue *ivq, *dvq;
+ uint32_t num_pages;
+ uint32_t actual;
+} VirtIOBalloon;
+
+static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
+{
+ return (VirtIOBalloon *)vdev;
+}
+
+static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOBalloon *s = to_virtio_balloon(vdev);
+ VirtQueueElement elem;
+ unsigned int count;
+
+ while ((count = virtqueue_pop(vq, &elem)) != 0) {
+ int i;
+ unsigned int wlen = 0;
+
+ for (i = 0; i < elem.out_num; i++) {
+ int flags;
+ uint32_t *pfns = elem.out_sg[i].iov_base;
+ unsigned int n_pfns = elem.out_sg[i].iov_len / 4;
+ int j;
+
+ for (j = 0; j < n_pfns; j++) {
+ if (vq == s->dvq) /* deflate */
+ flags = MADV_WILLNEED;
+ else /* inflate */
+ flags = MADV_DONTNEED;
+
+ madvise(phys_ram_base + (pfns[j] << TARGET_PAGE_BITS),
+ TARGET_PAGE_SIZE, flags);
+ }
+
+ wlen += elem.out_sg[i].iov_len;
+ }
+
+ virtqueue_push(vq, &elem, wlen);
+ virtio_notify(vdev, vq);
+ }
+}
+
+static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+ VirtIOBalloon *dev = to_virtio_balloon(vdev);
+ struct virtio_balloon_config config;
+
+ config.num_pages = dev->num_pages;
+ config.actual = dev->actual;
+
+ memcpy(config_data, &config, 8);
+}
+
+static void virtio_balloon_set_config(VirtIODevice *vdev,
+ const uint8_t *config_data)
+{
+ VirtIOBalloon *dev = to_virtio_balloon(vdev);
+ struct virtio_balloon_config config;
+ memcpy(&config, config_data, 8);
+ dev->actual = config.actual;
+}
+
+static uint32_t virtio_balloon_get_features(VirtIODevice *vdev)
+{
+ return 0;
+}
+
+static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
+{
+ VirtIOBalloon *dev = opaque;
+
+ if (target) {
+ dev->num_pages = (ram_size - target) >> TARGET_PAGE_BITS;
+ virtio_notify_config(&dev->vdev);
+ }
+
+ return ram_size - (dev->actual << TARGET_PAGE_BITS);
+}
+
+void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device)
+{
+ VirtIOBalloon *s;
+
+ s = (VirtIOBalloon *)virtio_init_pci(bus, "virtio-balloon",
+ vendor, device,
+ 0, VIRTIO_ID_BALLOON,
+ 0x05, 0x00, 0x00,
+ 8, sizeof(VirtIOBalloon));
+
+ s->vdev.get_config = virtio_balloon_get_config;
+ s->vdev.set_config = virtio_balloon_set_config;
+ s->vdev.get_features = virtio_balloon_get_features;
+
+ s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
+ s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
+
+ qemu_add_balloon_handler(virtio_balloon_to_target, s);
+
+ return &s->vdev;
+}
diff --git a/qemu/hw/virtio-blk.c b/qemu/hw/virtio-blk.c
index 301b5a1..c8c3233 100644
--- a/qemu/hw/virtio-blk.c
+++ b/qemu/hw/virtio-blk.c
@@ -153,7 +153,7 @@ void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
0x01, 0x80, 0x00,
16, sizeof(VirtIOBlock));
- s->vdev.update_config = virtio_blk_update_config;
+ s->vdev.get_config = virtio_blk_update_config;
s->vdev.get_features = virtio_blk_get_features;
s->bs = bs;
diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 86f9e5a..8c1f953 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -288,7 +288,7 @@ void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
0x02, 0x00, 0x00,
6, sizeof(VirtIONet));
- n->vdev.update_config = virtio_net_update_config;
+ n->vdev.get_config = virtio_net_update_config;
n->vdev.get_features = virtio_net_get_features;
n->rx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_rx);
n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx);
diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index bbcb44c..1e8fb44 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -304,6 +304,9 @@ static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data)
return;
memcpy(vdev->config + addr, &val, sizeof(val));
+
+ if (vdev->set_config)
+ vdev->set_config(vdev, vdev->config);
}
static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
@@ -316,6 +319,9 @@ static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
return;
memcpy(vdev->config + addr, &val, sizeof(val));
+
+ if (vdev->set_config)
+ vdev->set_config(vdev, vdev->config);
}
static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
@@ -328,6 +334,9 @@ static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
return;
memcpy(vdev->config + addr, &val, sizeof(val));
+
+ if (vdev->set_config)
+ vdev->set_config(vdev, vdev->config);
}
static void virtio_map(PCIDevice *pci_dev, int region_num,
@@ -356,7 +365,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
register_ioport_read(addr + 20, vdev->config_len, 4,
virtio_config_readl, vdev);
- vdev->update_config(vdev, vdev->config);
+ vdev->get_config(vdev, vdev->config);
}
}
@@ -380,6 +389,14 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
return &vdev->vq[i];
}
+void virtio_notify_config(VirtIODevice *vdev)
+{
+ /* make sure we have the latest config */
+ vdev->get_config(vdev, vdev->config);
+ vdev->isr = 3;
+ virtio_update_irq(vdev);
+}
+
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
/* Always notify when queue is empty */
diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h
index dee97ba..ed8bebd 100644
--- a/qemu/hw/virtio.h
+++ b/qemu/hw/virtio.h
@@ -118,7 +118,8 @@ struct VirtIODevice
void *config;
uint32_t (*get_features)(VirtIODevice *vdev);
void (*set_features)(VirtIODevice *vdev, uint32_t val);
- void (*update_config)(VirtIODevice *vdev, uint8_t *config);
+ void (*get_config)(VirtIODevice *vdev, uint8_t *config);
+ void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
VirtQueue vq[VIRTIO_PCI_QUEUE_MAX];
};
@@ -140,4 +141,6 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
+void virtio_notify_config(VirtIODevice *vdev);
+
#endif
diff --git a/qemu/monitor.c b/qemu/monitor.c
index e03c473..e05f717 100644
--- a/qemu/monitor.c
+++ b/qemu/monitor.c
@@ -35,6 +35,7 @@
#include "audio/audio.h"
#include "disas.h"
#include "migration.h"
+#include "balloon.h"
#include <dirent.h>
#include "qemu-kvm.h"
@@ -1262,6 +1263,23 @@ static void do_wav_capture (const char *path,
}
#endif
+static void do_balloon(int value)
+{
+ ram_addr_t target = value;
+ qemu_balloon(target << 20);
+}
+
+static void do_info_balloon(void)
+{
+ ram_addr_t actual;
+
+ actual = qemu_balloon_status();
+ if (actual == 0)
+ term_printf("Ballooning not activated in VM\n");
+ else
+ term_printf("balloon: actual=%d\n", (int)(actual >> 20));
+}
+
static term_cmd_t term_cmds[] = {
{ "help|?", "s?", do_help,
"[cmd]", "show the help" },
@@ -1339,6 +1357,8 @@ static term_cmd_t term_cmds[] = {
"", "cancel the current VM migration" },
{ "migrate_set_speed", "s", do_migrate_set_speed,
"value", "set maximum speed (in bytes) for migrations" },
+ { "balloon", "i", do_balloon,
+ "target", "request VM to change it's memory allocation (in MB)" },
{ NULL, NULL, },
};
@@ -1401,6 +1421,8 @@ static term_cmd_t info_cmds[] = {
#endif
{ "migration", "", do_info_migration,
"", "show migration information" },
+ { "balloon", "", do_info_balloon,
+ "", "show balloon information" },
{ NULL, NULL, },
};
diff --git a/qemu/vl.c b/qemu/vl.c
index 756e13d..d339eb2 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -38,6 +38,7 @@
#include "block.h"
#include "audio/audio.h"
#include "migration.h"
+#include "balloon.h"
#include "qemu-kvm.h"
#include <unistd.h>
@@ -511,6 +512,31 @@ void hw_error(const char *fmt, ...)
abort();
}
+/***************/
+/* ballooning */
+
+static QEMUBalloonEvent *qemu_balloon_event;
+void *qemu_balloon_event_opaque;
+
+void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque)
+{
+ qemu_balloon_event = func;
+ qemu_balloon_event_opaque = opaque;
+}
+
+void qemu_balloon(ram_addr_t target)
+{
+ if (qemu_balloon_event)
+ qemu_balloon_event(qemu_balloon_event_opaque, target);
+}
+
+ram_addr_t qemu_balloon_status(void)
+{
+ if (qemu_balloon_event)
+ return qemu_balloon_event(qemu_balloon_event_opaque, 0);
+ return 0;
+}
+
/***********************************************************/
/* keyboard/mouse */
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply related [flat|nested] 14+ messages in thread[parent not found: <1201209786831-git-send-email-aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] QEMU support for virtio balloon driver [not found] ` <1201209786831-git-send-email-aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2008-01-24 22:29 ` Anthony Liguori [not found] ` <4799115F.8010506-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 0 siblings, 1 reply; 14+ messages in thread From: Anthony Liguori @ 2008-01-24 22:29 UTC (permalink / raw) To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f Cc: Marcelo Tosatti, Andrea Arcangeli, Avi Kivity Anthony Liguori wrote: > This patch adds support to QEMU for Rusty's recently introduce virtio balloon > driver. The user-facing portions of this are the introduction of a "balloon" > and "info balloon" command in the monitor. > > I think using madvise unconditionally is okay but I am not sure. Looks like it's not. I just hung my host system after doing a bunch of ballooning with a kernel that doesn't have MM notifiers. I'm inclined to think that we should have a capability check for MM notifiers and just not do madvise if they aren't present. I don't think the ioctl approach that Marcelo took is sufficient as a malicious guest could possibly hose the host. Having the guest allocate and not touch memory means that it should eventually be removed from the shadow page cache and eventually swapped out so ballooning isn't totally useless in the absence of MM notifiers. Regards, Anthony Liguori > If madvise > is called on memory that is essentially locked (which is what pre-MM notifiers > is like) then it should just be a nop right? > > Signed-off-by: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> > > diff --git a/qemu/Makefile.target b/qemu/Makefile.target > index bb7be0f..d6b4f46 100644 > --- a/qemu/Makefile.target > +++ b/qemu/Makefile.target > @@ -464,7 +464,7 @@ VL_OBJS += rtl8139.o > VL_OBJS+= hypercall.o > > # virtio devices > -VL_OBJS += virtio.o virtio-net.o virtio-blk.o > +VL_OBJS += virtio.o virtio-net.o virtio-blk.o virtio-balloon.o > > ifeq ($(TARGET_BASE_ARCH), i386) > # Hardware support > diff --git a/qemu/balloon.h b/qemu/balloon.h > new file mode 100644 > index 0000000..ffce1fa > --- /dev/null > +++ b/qemu/balloon.h > @@ -0,0 +1,14 @@ > +#ifndef _QEMU_BALLOON_H > +#define _QEMU_BALLOON_H > + > +#include "cpu-defs.h" > + > +typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target); > + > +void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque); > + > +void qemu_balloon(ram_addr_t target); > + > +ram_addr_t qemu_balloon_status(void); > + > +#endif > diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c > index 652b263..6c8d907 100644 > --- a/qemu/hw/pc.c > +++ b/qemu/hw/pc.c > @@ -1122,6 +1122,9 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size, > } > } > > + if (pci_enabled) > + virtio_balloon_init(pci_bus, 0x1AF4, 0x1002); > + > if (extboot_drive != -1) { > DriveInfo *info = &drives_table[extboot_drive]; > int cyls, heads, secs; > diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h > index f640395..1899c11 100644 > --- a/qemu/hw/pc.h > +++ b/qemu/hw/pc.h > @@ -152,6 +152,9 @@ void virtio_net_poll(void); > void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, > BlockDriverState *bs); > > +/* virtio-balloon.h */ > +void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device); > + > /* extboot.c */ > > void extboot_init(BlockDriverState *bs, int cmd); > diff --git a/qemu/hw/virtio-balloon.c b/qemu/hw/virtio-balloon.c > new file mode 100644 > index 0000000..1b5a689 > --- /dev/null > +++ b/qemu/hw/virtio-balloon.c > @@ -0,0 +1,142 @@ > +/* > + * Virtio Block Device > + * > + * Copyright IBM, Corp. 2008 > + * > + * Authors: > + * Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + */ > + > +#include "virtio.h" > +#include "block.h" > +#include "pc.h" > +#include "balloon.h" > +#include "sysemu.h" > + > +#include <sys/mman.h> > + > +/* from Linux's linux/virtio_blk.h */ > + > +/* The ID for virtio_balloon */ > +#define VIRTIO_ID_BALLOON 5 > + > +/* The feature bitmap for virtio balloon */ > +#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ > + > +struct virtio_balloon_config > +{ > + /* Number of pages host wants Guest to give up. */ > + uint32_t num_pages; > + /* Number of pages we've actually got in balloon. */ > + uint32_t actual; > +}; > + > +typedef struct VirtIOBalloon > +{ > + VirtIODevice vdev; > + VirtQueue *ivq, *dvq; > + uint32_t num_pages; > + uint32_t actual; > +} VirtIOBalloon; > + > +static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev) > +{ > + return (VirtIOBalloon *)vdev; > +} > + > +static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) > +{ > + VirtIOBalloon *s = to_virtio_balloon(vdev); > + VirtQueueElement elem; > + unsigned int count; > + > + while ((count = virtqueue_pop(vq, &elem)) != 0) { > + int i; > + unsigned int wlen = 0; > + > + for (i = 0; i < elem.out_num; i++) { > + int flags; > + uint32_t *pfns = elem.out_sg[i].iov_base; > + unsigned int n_pfns = elem.out_sg[i].iov_len / 4; > + int j; > + > + for (j = 0; j < n_pfns; j++) { > + if (vq == s->dvq) /* deflate */ > + flags = MADV_WILLNEED; > + else /* inflate */ > + flags = MADV_DONTNEED; > + > + madvise(phys_ram_base + (pfns[j] << TARGET_PAGE_BITS), > + TARGET_PAGE_SIZE, flags); > + } > + > + wlen += elem.out_sg[i].iov_len; > + } > + > + virtqueue_push(vq, &elem, wlen); > + virtio_notify(vdev, vq); > + } > +} > + > +static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) > +{ > + VirtIOBalloon *dev = to_virtio_balloon(vdev); > + struct virtio_balloon_config config; > + > + config.num_pages = dev->num_pages; > + config.actual = dev->actual; > + > + memcpy(config_data, &config, 8); > +} > + > +static void virtio_balloon_set_config(VirtIODevice *vdev, > + const uint8_t *config_data) > +{ > + VirtIOBalloon *dev = to_virtio_balloon(vdev); > + struct virtio_balloon_config config; > + memcpy(&config, config_data, 8); > + dev->actual = config.actual; > +} > + > +static uint32_t virtio_balloon_get_features(VirtIODevice *vdev) > +{ > + return 0; > +} > + > +static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target) > +{ > + VirtIOBalloon *dev = opaque; > + > + if (target) { > + dev->num_pages = (ram_size - target) >> TARGET_PAGE_BITS; > + virtio_notify_config(&dev->vdev); > + } > + > + return ram_size - (dev->actual << TARGET_PAGE_BITS); > +} > + > +void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device) > +{ > + VirtIOBalloon *s; > + > + s = (VirtIOBalloon *)virtio_init_pci(bus, "virtio-balloon", > + vendor, device, > + 0, VIRTIO_ID_BALLOON, > + 0x05, 0x00, 0x00, > + 8, sizeof(VirtIOBalloon)); > + > + s->vdev.get_config = virtio_balloon_get_config; > + s->vdev.set_config = virtio_balloon_set_config; > + s->vdev.get_features = virtio_balloon_get_features; > + > + s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); > + s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); > + > + qemu_add_balloon_handler(virtio_balloon_to_target, s); > + > + return &s->vdev; > +} > diff --git a/qemu/hw/virtio-blk.c b/qemu/hw/virtio-blk.c > index 301b5a1..c8c3233 100644 > --- a/qemu/hw/virtio-blk.c > +++ b/qemu/hw/virtio-blk.c > @@ -153,7 +153,7 @@ void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, > 0x01, 0x80, 0x00, > 16, sizeof(VirtIOBlock)); > > - s->vdev.update_config = virtio_blk_update_config; > + s->vdev.get_config = virtio_blk_update_config; > s->vdev.get_features = virtio_blk_get_features; > s->bs = bs; > > diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c > index 86f9e5a..8c1f953 100644 > --- a/qemu/hw/virtio-net.c > +++ b/qemu/hw/virtio-net.c > @@ -288,7 +288,7 @@ void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn) > 0x02, 0x00, 0x00, > 6, sizeof(VirtIONet)); > > - n->vdev.update_config = virtio_net_update_config; > + n->vdev.get_config = virtio_net_update_config; > n->vdev.get_features = virtio_net_get_features; > n->rx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_rx); > n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx); > diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c > index bbcb44c..1e8fb44 100644 > --- a/qemu/hw/virtio.c > +++ b/qemu/hw/virtio.c > @@ -304,6 +304,9 @@ static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data) > return; > > memcpy(vdev->config + addr, &val, sizeof(val)); > + > + if (vdev->set_config) > + vdev->set_config(vdev, vdev->config); > } > > static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data) > @@ -316,6 +319,9 @@ static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data) > return; > > memcpy(vdev->config + addr, &val, sizeof(val)); > + > + if (vdev->set_config) > + vdev->set_config(vdev, vdev->config); > } > > static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data) > @@ -328,6 +334,9 @@ static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data) > return; > > memcpy(vdev->config + addr, &val, sizeof(val)); > + > + if (vdev->set_config) > + vdev->set_config(vdev, vdev->config); > } > > static void virtio_map(PCIDevice *pci_dev, int region_num, > @@ -356,7 +365,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num, > register_ioport_read(addr + 20, vdev->config_len, 4, > virtio_config_readl, vdev); > > - vdev->update_config(vdev, vdev->config); > + vdev->get_config(vdev, vdev->config); > } > } > > @@ -380,6 +389,14 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, > return &vdev->vq[i]; > } > > +void virtio_notify_config(VirtIODevice *vdev) > +{ > + /* make sure we have the latest config */ > + vdev->get_config(vdev, vdev->config); > + vdev->isr = 3; > + virtio_update_irq(vdev); > +} > + > void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) > { > /* Always notify when queue is empty */ > diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h > index dee97ba..ed8bebd 100644 > --- a/qemu/hw/virtio.h > +++ b/qemu/hw/virtio.h > @@ -118,7 +118,8 @@ struct VirtIODevice > void *config; > uint32_t (*get_features)(VirtIODevice *vdev); > void (*set_features)(VirtIODevice *vdev, uint32_t val); > - void (*update_config)(VirtIODevice *vdev, uint8_t *config); > + void (*get_config)(VirtIODevice *vdev, uint8_t *config); > + void (*set_config)(VirtIODevice *vdev, const uint8_t *config); > VirtQueue vq[VIRTIO_PCI_QUEUE_MAX]; > }; > > @@ -140,4 +141,6 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem); > > void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); > > +void virtio_notify_config(VirtIODevice *vdev); > + > #endif > diff --git a/qemu/monitor.c b/qemu/monitor.c > index e03c473..e05f717 100644 > --- a/qemu/monitor.c > +++ b/qemu/monitor.c > @@ -35,6 +35,7 @@ > #include "audio/audio.h" > #include "disas.h" > #include "migration.h" > +#include "balloon.h" > #include <dirent.h> > > #include "qemu-kvm.h" > @@ -1262,6 +1263,23 @@ static void do_wav_capture (const char *path, > } > #endif > > +static void do_balloon(int value) > +{ > + ram_addr_t target = value; > + qemu_balloon(target << 20); > +} > + > +static void do_info_balloon(void) > +{ > + ram_addr_t actual; > + > + actual = qemu_balloon_status(); > + if (actual == 0) > + term_printf("Ballooning not activated in VM\n"); > + else > + term_printf("balloon: actual=%d\n", (int)(actual >> 20)); > +} > + > static term_cmd_t term_cmds[] = { > { "help|?", "s?", do_help, > "[cmd]", "show the help" }, > @@ -1339,6 +1357,8 @@ static term_cmd_t term_cmds[] = { > "", "cancel the current VM migration" }, > { "migrate_set_speed", "s", do_migrate_set_speed, > "value", "set maximum speed (in bytes) for migrations" }, > + { "balloon", "i", do_balloon, > + "target", "request VM to change it's memory allocation (in MB)" }, > { NULL, NULL, }, > }; > > @@ -1401,6 +1421,8 @@ static term_cmd_t info_cmds[] = { > #endif > { "migration", "", do_info_migration, > "", "show migration information" }, > + { "balloon", "", do_info_balloon, > + "", "show balloon information" }, > { NULL, NULL, }, > }; > > diff --git a/qemu/vl.c b/qemu/vl.c > index 756e13d..d339eb2 100644 > --- a/qemu/vl.c > +++ b/qemu/vl.c > @@ -38,6 +38,7 @@ > #include "block.h" > #include "audio/audio.h" > #include "migration.h" > +#include "balloon.h" > #include "qemu-kvm.h" > > #include <unistd.h> > @@ -511,6 +512,31 @@ void hw_error(const char *fmt, ...) > abort(); > } > > +/***************/ > +/* ballooning */ > + > +static QEMUBalloonEvent *qemu_balloon_event; > +void *qemu_balloon_event_opaque; > + > +void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque) > +{ > + qemu_balloon_event = func; > + qemu_balloon_event_opaque = opaque; > +} > + > +void qemu_balloon(ram_addr_t target) > +{ > + if (qemu_balloon_event) > + qemu_balloon_event(qemu_balloon_event_opaque, target); > +} > + > +ram_addr_t qemu_balloon_status(void) > +{ > + if (qemu_balloon_event) > + return qemu_balloon_event(qemu_balloon_event_opaque, 0); > + return 0; > +} > + > /***********************************************************/ > /* keyboard/mouse */ > > ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
[parent not found: <4799115F.8010506-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] QEMU support for virtio balloon driver [not found] ` <4799115F.8010506-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2008-01-25 16:08 ` Marcelo Tosatti 2008-01-25 17:02 ` Anthony Liguori 2008-01-26 3:35 ` Rusty Russell 2008-01-25 23:08 ` Dor Laor 1 sibling, 2 replies; 14+ messages in thread From: Marcelo Tosatti @ 2008-01-25 16:08 UTC (permalink / raw) To: Anthony Liguori Cc: Andrea Arcangeli, kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti, Avi Kivity On Thu, Jan 24, 2008 at 04:29:51PM -0600, Anthony Liguori wrote: > Anthony Liguori wrote: > >This patch adds support to QEMU for Rusty's recently introduce virtio > >balloon > >driver. The user-facing portions of this are the introduction of a > >"balloon" > >and "info balloon" command in the monitor. > > > >I think using madvise unconditionally is okay but I am not sure. > > Looks like it's not. I just hung my host system after doing a bunch of > ballooning with a kernel that doesn't have MM notifiers. Thats strange, lack of MMU notifiers will crash the guest (by use of a stale shadow entry), but not the host. What are the symptoms? > I'm inclined to think that we should have a capability check for MM > notifiers and just not do madvise if they aren't present. I don't think > the ioctl approach that Marcelo took is sufficient as a malicious guest > could possibly hose the host. How's that? The ioctl damage is contained to the guest (other than CPU processing time, which the guest can cause in other ways). Anyway, don't see the need for back compat with older hosts. > > Having the guest allocate and not touch memory means that it should > eventually be removed from the shadow page cache and eventually swapped > out so ballooning isn't totally useless in the absence of MM notifiers. > > Regards, > > Anthony Liguori > > > If madvise > >is called on memory that is essentially locked (which is what pre-MM > >notifiers > >is like) then it should just be a nop right? > > > >Signed-off-by: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> > > > >diff --git a/qemu/Makefile.target b/qemu/Makefile.target > >index bb7be0f..d6b4f46 100644 > >--- a/qemu/Makefile.target > >+++ b/qemu/Makefile.target > >@@ -464,7 +464,7 @@ VL_OBJS += rtl8139.o > > VL_OBJS+= hypercall.o > > > > # virtio devices > >-VL_OBJS += virtio.o virtio-net.o virtio-blk.o > >+VL_OBJS += virtio.o virtio-net.o virtio-blk.o virtio-balloon.o > > > > ifeq ($(TARGET_BASE_ARCH), i386) > > # Hardware support > >diff --git a/qemu/balloon.h b/qemu/balloon.h > >new file mode 100644 > >index 0000000..ffce1fa > >--- /dev/null > >+++ b/qemu/balloon.h > >@@ -0,0 +1,14 @@ > >+#ifndef _QEMU_BALLOON_H > >+#define _QEMU_BALLOON_H > >+ > >+#include "cpu-defs.h" > >+ > >+typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target); > >+ > >+void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque); > >+ > >+void qemu_balloon(ram_addr_t target); > >+ > >+ram_addr_t qemu_balloon_status(void); > >+ > >+#endif > >diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c > >index 652b263..6c8d907 100644 > >--- a/qemu/hw/pc.c > >+++ b/qemu/hw/pc.c > >@@ -1122,6 +1122,9 @@ static void pc_init1(ram_addr_t ram_size, int > >vga_ram_size, > > } > > } > > > >+ if (pci_enabled) > >+ virtio_balloon_init(pci_bus, 0x1AF4, 0x1002); > >+ > > if (extboot_drive != -1) { > > DriveInfo *info = &drives_table[extboot_drive]; > > int cyls, heads, secs; > >diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h > >index f640395..1899c11 100644 > >--- a/qemu/hw/pc.h > >+++ b/qemu/hw/pc.h > >@@ -152,6 +152,9 @@ void virtio_net_poll(void); > > void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, > > BlockDriverState *bs); > > > >+/* virtio-balloon.h */ > >+void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device); > >+ > > /* extboot.c */ > > > > void extboot_init(BlockDriverState *bs, int cmd); > >diff --git a/qemu/hw/virtio-balloon.c b/qemu/hw/virtio-balloon.c > >new file mode 100644 > >index 0000000..1b5a689 > >--- /dev/null > >+++ b/qemu/hw/virtio-balloon.c > >@@ -0,0 +1,142 @@ > >+/* > >+ * Virtio Block Device > >+ * > >+ * Copyright IBM, Corp. 2008 > >+ * > >+ * Authors: > >+ * Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> > >+ * > >+ * This work is licensed under the terms of the GNU GPL, version 2. See > >+ * the COPYING file in the top-level directory. > >+ * > >+ */ > >+ > >+#include "virtio.h" > >+#include "block.h" > >+#include "pc.h" > >+#include "balloon.h" > >+#include "sysemu.h" > >+ > >+#include <sys/mman.h> > >+ > >+/* from Linux's linux/virtio_blk.h */ > >+ > >+/* The ID for virtio_balloon */ > >+#define VIRTIO_ID_BALLOON 5 > >+ > >+/* The feature bitmap for virtio balloon */ > >+#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming > >pages */ > >+ > >+struct virtio_balloon_config > >+{ > >+ /* Number of pages host wants Guest to give up. */ > >+ uint32_t num_pages; > >+ /* Number of pages we've actually got in balloon. */ > >+ uint32_t actual; > >+}; > >+ > >+typedef struct VirtIOBalloon > >+{ > >+ VirtIODevice vdev; > >+ VirtQueue *ivq, *dvq; > >+ uint32_t num_pages; > >+ uint32_t actual; > >+} VirtIOBalloon; > >+ > >+static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev) > >+{ > >+ return (VirtIOBalloon *)vdev; > >+} > >+ > >+static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue > >*vq) > >+{ > >+ VirtIOBalloon *s = to_virtio_balloon(vdev); > >+ VirtQueueElement elem; > >+ unsigned int count; > >+ > >+ while ((count = virtqueue_pop(vq, &elem)) != 0) { > >+ int i; > >+ unsigned int wlen = 0; > >+ > >+ for (i = 0; i < elem.out_num; i++) { > >+ int flags; > >+ uint32_t *pfns = elem.out_sg[i].iov_base; > >+ unsigned int n_pfns = elem.out_sg[i].iov_len / 4; > >+ int j; > >+ > >+ for (j = 0; j < n_pfns; j++) { > >+ if (vq == s->dvq) /* deflate */ > >+ flags = MADV_WILLNEED; > >+ else /* inflate */ > >+ flags = MADV_DONTNEED; > >+ > >+ madvise(phys_ram_base + (pfns[j] << TARGET_PAGE_BITS), > >+ TARGET_PAGE_SIZE, flags); > >+ } > >+ > >+ wlen += elem.out_sg[i].iov_len; > >+ } > >+ > >+ virtqueue_push(vq, &elem, wlen); > >+ virtio_notify(vdev, vq); > >+ } > >+} > >+ > >+static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t > >*config_data) > >+{ > >+ VirtIOBalloon *dev = to_virtio_balloon(vdev); > >+ struct virtio_balloon_config config; > >+ > >+ config.num_pages = dev->num_pages; > >+ config.actual = dev->actual; > >+ > >+ memcpy(config_data, &config, 8); > >+} > >+ > >+static void virtio_balloon_set_config(VirtIODevice *vdev, > >+ const uint8_t *config_data) > >+{ > >+ VirtIOBalloon *dev = to_virtio_balloon(vdev); > >+ struct virtio_balloon_config config; > >+ memcpy(&config, config_data, 8); > >+ dev->actual = config.actual; > >+} > >+ > >+static uint32_t virtio_balloon_get_features(VirtIODevice *vdev) > >+{ > >+ return 0; > >+} > >+ > >+static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t > >target) > >+{ > >+ VirtIOBalloon *dev = opaque; > >+ > >+ if (target) { > >+ dev->num_pages = (ram_size - target) >> TARGET_PAGE_BITS; > >+ virtio_notify_config(&dev->vdev); > >+ } > >+ > >+ return ram_size - (dev->actual << TARGET_PAGE_BITS); > >+} > >+ > >+void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device) > >+{ > >+ VirtIOBalloon *s; > >+ > >+ s = (VirtIOBalloon *)virtio_init_pci(bus, "virtio-balloon", > >+ vendor, device, > >+ 0, VIRTIO_ID_BALLOON, > >+ 0x05, 0x00, 0x00, > >+ 8, sizeof(VirtIOBalloon)); > >+ > >+ s->vdev.get_config = virtio_balloon_get_config; > >+ s->vdev.set_config = virtio_balloon_set_config; > >+ s->vdev.get_features = virtio_balloon_get_features; > >+ > >+ s->ivq = virtio_add_queue(&s->vdev, 128, > >virtio_balloon_handle_output); > >+ s->dvq = virtio_add_queue(&s->vdev, 128, > >virtio_balloon_handle_output); > >+ > >+ qemu_add_balloon_handler(virtio_balloon_to_target, s); > >+ > >+ return &s->vdev; > >+} > >diff --git a/qemu/hw/virtio-blk.c b/qemu/hw/virtio-blk.c > >index 301b5a1..c8c3233 100644 > >--- a/qemu/hw/virtio-blk.c > >+++ b/qemu/hw/virtio-blk.c > >@@ -153,7 +153,7 @@ void *virtio_blk_init(PCIBus *bus, uint16_t vendor, > >uint16_t device, > > 0x01, 0x80, 0x00, > > 16, sizeof(VirtIOBlock)); > > > >- s->vdev.update_config = virtio_blk_update_config; > >+ s->vdev.get_config = virtio_blk_update_config; > > s->vdev.get_features = virtio_blk_get_features; > > s->bs = bs; > > > >diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c > >index 86f9e5a..8c1f953 100644 > >--- a/qemu/hw/virtio-net.c > >+++ b/qemu/hw/virtio-net.c > >@@ -288,7 +288,7 @@ void *virtio_net_init(PCIBus *bus, NICInfo *nd, int > >devfn) > > 0x02, 0x00, 0x00, > > 6, sizeof(VirtIONet)); > > > >- n->vdev.update_config = virtio_net_update_config; > >+ n->vdev.get_config = virtio_net_update_config; > > n->vdev.get_features = virtio_net_get_features; > > n->rx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_rx); > > n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx); > >diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c > >index bbcb44c..1e8fb44 100644 > >--- a/qemu/hw/virtio.c > >+++ b/qemu/hw/virtio.c > >@@ -304,6 +304,9 @@ static void virtio_config_writeb(void *opaque, > >uint32_t addr, uint32_t data) > > return; > > > > memcpy(vdev->config + addr, &val, sizeof(val)); > >+ > >+ if (vdev->set_config) > >+ vdev->set_config(vdev, vdev->config); > > } > > > > static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t > > data) > >@@ -316,6 +319,9 @@ static void virtio_config_writew(void *opaque, > >uint32_t addr, uint32_t data) > > return; > > > > memcpy(vdev->config + addr, &val, sizeof(val)); > >+ > >+ if (vdev->set_config) > >+ vdev->set_config(vdev, vdev->config); > > } > > > > static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t > > data) > >@@ -328,6 +334,9 @@ static void virtio_config_writel(void *opaque, > >uint32_t addr, uint32_t data) > > return; > > > > memcpy(vdev->config + addr, &val, sizeof(val)); > >+ > >+ if (vdev->set_config) > >+ vdev->set_config(vdev, vdev->config); > > } > > > > static void virtio_map(PCIDevice *pci_dev, int region_num, > >@@ -356,7 +365,7 @@ static void virtio_map(PCIDevice *pci_dev, int > >region_num, > > register_ioport_read(addr + 20, vdev->config_len, 4, > > virtio_config_readl, vdev); > > > >- vdev->update_config(vdev, vdev->config); > >+ vdev->get_config(vdev, vdev->config); > > } > > } > > > >@@ -380,6 +389,14 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int > >queue_size, > > return &vdev->vq[i]; > > } > > > >+void virtio_notify_config(VirtIODevice *vdev) > >+{ > >+ /* make sure we have the latest config */ > >+ vdev->get_config(vdev, vdev->config); > >+ vdev->isr = 3; > >+ virtio_update_irq(vdev); > >+} > >+ > > void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) > > { > > /* Always notify when queue is empty */ > >diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h > >index dee97ba..ed8bebd 100644 > >--- a/qemu/hw/virtio.h > >+++ b/qemu/hw/virtio.h > >@@ -118,7 +118,8 @@ struct VirtIODevice > > void *config; > > uint32_t (*get_features)(VirtIODevice *vdev); > > void (*set_features)(VirtIODevice *vdev, uint32_t val); > >- void (*update_config)(VirtIODevice *vdev, uint8_t *config); > >+ void (*get_config)(VirtIODevice *vdev, uint8_t *config); > >+ void (*set_config)(VirtIODevice *vdev, const uint8_t *config); > > VirtQueue vq[VIRTIO_PCI_QUEUE_MAX]; > > }; > > > >@@ -140,4 +141,6 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement > >*elem); > > > > void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); > > > >+void virtio_notify_config(VirtIODevice *vdev); > >+ > > #endif > >diff --git a/qemu/monitor.c b/qemu/monitor.c > >index e03c473..e05f717 100644 > >--- a/qemu/monitor.c > >+++ b/qemu/monitor.c > >@@ -35,6 +35,7 @@ > > #include "audio/audio.h" > > #include "disas.h" > > #include "migration.h" > >+#include "balloon.h" > > #include <dirent.h> > > > > #include "qemu-kvm.h" > >@@ -1262,6 +1263,23 @@ static void do_wav_capture (const char *path, > > } > > #endif > > > >+static void do_balloon(int value) > >+{ > >+ ram_addr_t target = value; > >+ qemu_balloon(target << 20); > >+} > >+ > >+static void do_info_balloon(void) > >+{ > >+ ram_addr_t actual; > >+ > >+ actual = qemu_balloon_status(); > >+ if (actual == 0) > >+ term_printf("Ballooning not activated in VM\n"); > >+ else > >+ term_printf("balloon: actual=%d\n", (int)(actual >> 20)); > >+} > >+ > > static term_cmd_t term_cmds[] = { > > { "help|?", "s?", do_help, > > "[cmd]", "show the help" }, > >@@ -1339,6 +1357,8 @@ static term_cmd_t term_cmds[] = { > > "", "cancel the current VM migration" }, > > { "migrate_set_speed", "s", do_migrate_set_speed, > > "value", "set maximum speed (in bytes) for migrations" }, > >+ { "balloon", "i", do_balloon, > >+ "target", "request VM to change it's memory allocation (in MB)" }, > > { NULL, NULL, }, > > }; > > > >@@ -1401,6 +1421,8 @@ static term_cmd_t info_cmds[] = { > > #endif > > { "migration", "", do_info_migration, > > "", "show migration information" }, > >+ { "balloon", "", do_info_balloon, > >+ "", "show balloon information" }, > > { NULL, NULL, }, > > }; > > > >diff --git a/qemu/vl.c b/qemu/vl.c > >index 756e13d..d339eb2 100644 > >--- a/qemu/vl.c > >+++ b/qemu/vl.c > >@@ -38,6 +38,7 @@ > > #include "block.h" > > #include "audio/audio.h" > > #include "migration.h" > >+#include "balloon.h" > > #include "qemu-kvm.h" > > > > #include <unistd.h> > >@@ -511,6 +512,31 @@ void hw_error(const char *fmt, ...) > > abort(); > > } > > > >+/***************/ > >+/* ballooning */ > >+ > >+static QEMUBalloonEvent *qemu_balloon_event; > >+void *qemu_balloon_event_opaque; > >+ > >+void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque) > >+{ > >+ qemu_balloon_event = func; > >+ qemu_balloon_event_opaque = opaque; > >+} > >+ > >+void qemu_balloon(ram_addr_t target) > >+{ > >+ if (qemu_balloon_event) > >+ qemu_balloon_event(qemu_balloon_event_opaque, target); > >+} > >+ > >+ram_addr_t qemu_balloon_status(void) > >+{ > >+ if (qemu_balloon_event) > >+ return qemu_balloon_event(qemu_balloon_event_opaque, 0); > >+ return 0; > >+} > >+ > > /***********************************************************/ > > /* keyboard/mouse */ > > > > ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] QEMU support for virtio balloon driver 2008-01-25 16:08 ` Marcelo Tosatti @ 2008-01-25 17:02 ` Anthony Liguori [not found] ` <479A162C.1060209-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 2008-01-26 3:35 ` Rusty Russell 1 sibling, 1 reply; 14+ messages in thread From: Anthony Liguori @ 2008-01-25 17:02 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Andrea Arcangeli, Avi Kivity Marcelo Tosatti wrote: > On Thu, Jan 24, 2008 at 04:29:51PM -0600, Anthony Liguori wrote: > >> Anthony Liguori wrote: >> >>> This patch adds support to QEMU for Rusty's recently introduce virtio >>> balloon >>> driver. The user-facing portions of this are the introduction of a >>> "balloon" >>> and "info balloon" command in the monitor. >>> >>> I think using madvise unconditionally is okay but I am not sure. >>> >> Looks like it's not. I just hung my host system after doing a bunch of >> ballooning with a kernel that doesn't have MM notifiers. >> > > Thats strange, lack of MMU notifiers will crash the guest (by use of a > stale shadow entry), but not the host. > > What are the symptoms? > It's only happened to me once because I stopped testing with madvise afterward. The guest spontaneously restarted, and a few seconds later, the machine hung. It shouldn't be hard to reproduce by just repeatedly ballooning up and down a guest. Do others expect KVM to just cope with the virtual mapping being changed out from underneath of it? >> I'm inclined to think that we should have a capability check for MM >> notifiers and just not do madvise if they aren't present. I don't think >> the ioctl approach that Marcelo took is sufficient as a malicious guest >> could possibly hose the host. >> > > How's that? The ioctl damage is contained to the guest (other than CPU > processing time, which the guest can cause in other ways). > > Anyway, don't see the need for back compat with older hosts. > Well, I'm unsure if this is a bug or expected behavior. If it's the later, then the ioctl approach just introduces a race condition. If the guest can fault in a page after the ioctl but before the madvise(), then it can trigger the bug. Regards, Anthony Liguori >> Having the guest allocate and not touch memory means that it should >> eventually be removed from the shadow page cache and eventually swapped >> out so ballooning isn't totally useless in the absence of MM notifiers. >> >> Regards, >> >> Anthony Liguori >> >> >>> If madvise >>> is called on memory that is essentially locked (which is what pre-MM >>> notifiers >>> is like) then it should just be a nop right? >>> >>> Signed-off-by: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> >>> >>> diff --git a/qemu/Makefile.target b/qemu/Makefile.target >>> index bb7be0f..d6b4f46 100644 >>> --- a/qemu/Makefile.target >>> +++ b/qemu/Makefile.target >>> @@ -464,7 +464,7 @@ VL_OBJS += rtl8139.o >>> VL_OBJS+= hypercall.o >>> >>> # virtio devices >>> -VL_OBJS += virtio.o virtio-net.o virtio-blk.o >>> +VL_OBJS += virtio.o virtio-net.o virtio-blk.o virtio-balloon.o >>> >>> ifeq ($(TARGET_BASE_ARCH), i386) >>> # Hardware support >>> diff --git a/qemu/balloon.h b/qemu/balloon.h >>> new file mode 100644 >>> index 0000000..ffce1fa >>> --- /dev/null >>> +++ b/qemu/balloon.h >>> @@ -0,0 +1,14 @@ >>> +#ifndef _QEMU_BALLOON_H >>> +#define _QEMU_BALLOON_H >>> + >>> +#include "cpu-defs.h" >>> + >>> +typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target); >>> + >>> +void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque); >>> + >>> +void qemu_balloon(ram_addr_t target); >>> + >>> +ram_addr_t qemu_balloon_status(void); >>> + >>> +#endif >>> diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c >>> index 652b263..6c8d907 100644 >>> --- a/qemu/hw/pc.c >>> +++ b/qemu/hw/pc.c >>> @@ -1122,6 +1122,9 @@ static void pc_init1(ram_addr_t ram_size, int >>> vga_ram_size, >>> } >>> } >>> >>> + if (pci_enabled) >>> + virtio_balloon_init(pci_bus, 0x1AF4, 0x1002); >>> + >>> if (extboot_drive != -1) { >>> DriveInfo *info = &drives_table[extboot_drive]; >>> int cyls, heads, secs; >>> diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h >>> index f640395..1899c11 100644 >>> --- a/qemu/hw/pc.h >>> +++ b/qemu/hw/pc.h >>> @@ -152,6 +152,9 @@ void virtio_net_poll(void); >>> void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, >>> BlockDriverState *bs); >>> >>> +/* virtio-balloon.h */ >>> +void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device); >>> + >>> /* extboot.c */ >>> >>> void extboot_init(BlockDriverState *bs, int cmd); >>> diff --git a/qemu/hw/virtio-balloon.c b/qemu/hw/virtio-balloon.c >>> new file mode 100644 >>> index 0000000..1b5a689 >>> --- /dev/null >>> +++ b/qemu/hw/virtio-balloon.c >>> @@ -0,0 +1,142 @@ >>> +/* >>> + * Virtio Block Device >>> + * >>> + * Copyright IBM, Corp. 2008 >>> + * >>> + * Authors: >>> + * Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> >>> + * >>> + * This work is licensed under the terms of the GNU GPL, version 2. See >>> + * the COPYING file in the top-level directory. >>> + * >>> + */ >>> + >>> +#include "virtio.h" >>> +#include "block.h" >>> +#include "pc.h" >>> +#include "balloon.h" >>> +#include "sysemu.h" >>> + >>> +#include <sys/mman.h> >>> + >>> +/* from Linux's linux/virtio_blk.h */ >>> + >>> +/* The ID for virtio_balloon */ >>> +#define VIRTIO_ID_BALLOON 5 >>> + >>> +/* The feature bitmap for virtio balloon */ >>> +#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming >>> pages */ >>> + >>> +struct virtio_balloon_config >>> +{ >>> + /* Number of pages host wants Guest to give up. */ >>> + uint32_t num_pages; >>> + /* Number of pages we've actually got in balloon. */ >>> + uint32_t actual; >>> +}; >>> + >>> +typedef struct VirtIOBalloon >>> +{ >>> + VirtIODevice vdev; >>> + VirtQueue *ivq, *dvq; >>> + uint32_t num_pages; >>> + uint32_t actual; >>> +} VirtIOBalloon; >>> + >>> +static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev) >>> +{ >>> + return (VirtIOBalloon *)vdev; >>> +} >>> + >>> +static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue >>> *vq) >>> +{ >>> + VirtIOBalloon *s = to_virtio_balloon(vdev); >>> + VirtQueueElement elem; >>> + unsigned int count; >>> + >>> + while ((count = virtqueue_pop(vq, &elem)) != 0) { >>> + int i; >>> + unsigned int wlen = 0; >>> + >>> + for (i = 0; i < elem.out_num; i++) { >>> + int flags; >>> + uint32_t *pfns = elem.out_sg[i].iov_base; >>> + unsigned int n_pfns = elem.out_sg[i].iov_len / 4; >>> + int j; >>> + >>> + for (j = 0; j < n_pfns; j++) { >>> + if (vq == s->dvq) /* deflate */ >>> + flags = MADV_WILLNEED; >>> + else /* inflate */ >>> + flags = MADV_DONTNEED; >>> + >>> + madvise(phys_ram_base + (pfns[j] << TARGET_PAGE_BITS), >>> + TARGET_PAGE_SIZE, flags); >>> + } >>> + >>> + wlen += elem.out_sg[i].iov_len; >>> + } >>> + >>> + virtqueue_push(vq, &elem, wlen); >>> + virtio_notify(vdev, vq); >>> + } >>> +} >>> + >>> +static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t >>> *config_data) >>> +{ >>> + VirtIOBalloon *dev = to_virtio_balloon(vdev); >>> + struct virtio_balloon_config config; >>> + >>> + config.num_pages = dev->num_pages; >>> + config.actual = dev->actual; >>> + >>> + memcpy(config_data, &config, 8); >>> +} >>> + >>> +static void virtio_balloon_set_config(VirtIODevice *vdev, >>> + const uint8_t *config_data) >>> +{ >>> + VirtIOBalloon *dev = to_virtio_balloon(vdev); >>> + struct virtio_balloon_config config; >>> + memcpy(&config, config_data, 8); >>> + dev->actual = config.actual; >>> +} >>> + >>> +static uint32_t virtio_balloon_get_features(VirtIODevice *vdev) >>> +{ >>> + return 0; >>> +} >>> + >>> +static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t >>> target) >>> +{ >>> + VirtIOBalloon *dev = opaque; >>> + >>> + if (target) { >>> + dev->num_pages = (ram_size - target) >> TARGET_PAGE_BITS; >>> + virtio_notify_config(&dev->vdev); >>> + } >>> + >>> + return ram_size - (dev->actual << TARGET_PAGE_BITS); >>> +} >>> + >>> +void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device) >>> +{ >>> + VirtIOBalloon *s; >>> + >>> + s = (VirtIOBalloon *)virtio_init_pci(bus, "virtio-balloon", >>> + vendor, device, >>> + 0, VIRTIO_ID_BALLOON, >>> + 0x05, 0x00, 0x00, >>> + 8, sizeof(VirtIOBalloon)); >>> + >>> + s->vdev.get_config = virtio_balloon_get_config; >>> + s->vdev.set_config = virtio_balloon_set_config; >>> + s->vdev.get_features = virtio_balloon_get_features; >>> + >>> + s->ivq = virtio_add_queue(&s->vdev, 128, >>> virtio_balloon_handle_output); >>> + s->dvq = virtio_add_queue(&s->vdev, 128, >>> virtio_balloon_handle_output); >>> + >>> + qemu_add_balloon_handler(virtio_balloon_to_target, s); >>> + >>> + return &s->vdev; >>> +} >>> diff --git a/qemu/hw/virtio-blk.c b/qemu/hw/virtio-blk.c >>> index 301b5a1..c8c3233 100644 >>> --- a/qemu/hw/virtio-blk.c >>> +++ b/qemu/hw/virtio-blk.c >>> @@ -153,7 +153,7 @@ void *virtio_blk_init(PCIBus *bus, uint16_t vendor, >>> uint16_t device, >>> 0x01, 0x80, 0x00, >>> 16, sizeof(VirtIOBlock)); >>> >>> - s->vdev.update_config = virtio_blk_update_config; >>> + s->vdev.get_config = virtio_blk_update_config; >>> s->vdev.get_features = virtio_blk_get_features; >>> s->bs = bs; >>> >>> diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c >>> index 86f9e5a..8c1f953 100644 >>> --- a/qemu/hw/virtio-net.c >>> +++ b/qemu/hw/virtio-net.c >>> @@ -288,7 +288,7 @@ void *virtio_net_init(PCIBus *bus, NICInfo *nd, int >>> devfn) >>> 0x02, 0x00, 0x00, >>> 6, sizeof(VirtIONet)); >>> >>> - n->vdev.update_config = virtio_net_update_config; >>> + n->vdev.get_config = virtio_net_update_config; >>> n->vdev.get_features = virtio_net_get_features; >>> n->rx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_rx); >>> n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx); >>> diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c >>> index bbcb44c..1e8fb44 100644 >>> --- a/qemu/hw/virtio.c >>> +++ b/qemu/hw/virtio.c >>> @@ -304,6 +304,9 @@ static void virtio_config_writeb(void *opaque, >>> uint32_t addr, uint32_t data) >>> return; >>> >>> memcpy(vdev->config + addr, &val, sizeof(val)); >>> + >>> + if (vdev->set_config) >>> + vdev->set_config(vdev, vdev->config); >>> } >>> >>> static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t >>> data) >>> @@ -316,6 +319,9 @@ static void virtio_config_writew(void *opaque, >>> uint32_t addr, uint32_t data) >>> return; >>> >>> memcpy(vdev->config + addr, &val, sizeof(val)); >>> + >>> + if (vdev->set_config) >>> + vdev->set_config(vdev, vdev->config); >>> } >>> >>> static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t >>> data) >>> @@ -328,6 +334,9 @@ static void virtio_config_writel(void *opaque, >>> uint32_t addr, uint32_t data) >>> return; >>> >>> memcpy(vdev->config + addr, &val, sizeof(val)); >>> + >>> + if (vdev->set_config) >>> + vdev->set_config(vdev, vdev->config); >>> } >>> >>> static void virtio_map(PCIDevice *pci_dev, int region_num, >>> @@ -356,7 +365,7 @@ static void virtio_map(PCIDevice *pci_dev, int >>> region_num, >>> register_ioport_read(addr + 20, vdev->config_len, 4, >>> virtio_config_readl, vdev); >>> >>> - vdev->update_config(vdev, vdev->config); >>> + vdev->get_config(vdev, vdev->config); >>> } >>> } >>> >>> @@ -380,6 +389,14 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int >>> queue_size, >>> return &vdev->vq[i]; >>> } >>> >>> +void virtio_notify_config(VirtIODevice *vdev) >>> +{ >>> + /* make sure we have the latest config */ >>> + vdev->get_config(vdev, vdev->config); >>> + vdev->isr = 3; >>> + virtio_update_irq(vdev); >>> +} >>> + >>> void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) >>> { >>> /* Always notify when queue is empty */ >>> diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h >>> index dee97ba..ed8bebd 100644 >>> --- a/qemu/hw/virtio.h >>> +++ b/qemu/hw/virtio.h >>> @@ -118,7 +118,8 @@ struct VirtIODevice >>> void *config; >>> uint32_t (*get_features)(VirtIODevice *vdev); >>> void (*set_features)(VirtIODevice *vdev, uint32_t val); >>> - void (*update_config)(VirtIODevice *vdev, uint8_t *config); >>> + void (*get_config)(VirtIODevice *vdev, uint8_t *config); >>> + void (*set_config)(VirtIODevice *vdev, const uint8_t *config); >>> VirtQueue vq[VIRTIO_PCI_QUEUE_MAX]; >>> }; >>> >>> @@ -140,4 +141,6 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement >>> *elem); >>> >>> void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); >>> >>> +void virtio_notify_config(VirtIODevice *vdev); >>> + >>> #endif >>> diff --git a/qemu/monitor.c b/qemu/monitor.c >>> index e03c473..e05f717 100644 >>> --- a/qemu/monitor.c >>> +++ b/qemu/monitor.c >>> @@ -35,6 +35,7 @@ >>> #include "audio/audio.h" >>> #include "disas.h" >>> #include "migration.h" >>> +#include "balloon.h" >>> #include <dirent.h> >>> >>> #include "qemu-kvm.h" >>> @@ -1262,6 +1263,23 @@ static void do_wav_capture (const char *path, >>> } >>> #endif >>> >>> +static void do_balloon(int value) >>> +{ >>> + ram_addr_t target = value; >>> + qemu_balloon(target << 20); >>> +} >>> + >>> +static void do_info_balloon(void) >>> +{ >>> + ram_addr_t actual; >>> + >>> + actual = qemu_balloon_status(); >>> + if (actual == 0) >>> + term_printf("Ballooning not activated in VM\n"); >>> + else >>> + term_printf("balloon: actual=%d\n", (int)(actual >> 20)); >>> +} >>> + >>> static term_cmd_t term_cmds[] = { >>> { "help|?", "s?", do_help, >>> "[cmd]", "show the help" }, >>> @@ -1339,6 +1357,8 @@ static term_cmd_t term_cmds[] = { >>> "", "cancel the current VM migration" }, >>> { "migrate_set_speed", "s", do_migrate_set_speed, >>> "value", "set maximum speed (in bytes) for migrations" }, >>> + { "balloon", "i", do_balloon, >>> + "target", "request VM to change it's memory allocation (in MB)" }, >>> { NULL, NULL, }, >>> }; >>> >>> @@ -1401,6 +1421,8 @@ static term_cmd_t info_cmds[] = { >>> #endif >>> { "migration", "", do_info_migration, >>> "", "show migration information" }, >>> + { "balloon", "", do_info_balloon, >>> + "", "show balloon information" }, >>> { NULL, NULL, }, >>> }; >>> >>> diff --git a/qemu/vl.c b/qemu/vl.c >>> index 756e13d..d339eb2 100644 >>> --- a/qemu/vl.c >>> +++ b/qemu/vl.c >>> @@ -38,6 +38,7 @@ >>> #include "block.h" >>> #include "audio/audio.h" >>> #include "migration.h" >>> +#include "balloon.h" >>> #include "qemu-kvm.h" >>> >>> #include <unistd.h> >>> @@ -511,6 +512,31 @@ void hw_error(const char *fmt, ...) >>> abort(); >>> } >>> >>> +/***************/ >>> +/* ballooning */ >>> + >>> +static QEMUBalloonEvent *qemu_balloon_event; >>> +void *qemu_balloon_event_opaque; >>> + >>> +void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque) >>> +{ >>> + qemu_balloon_event = func; >>> + qemu_balloon_event_opaque = opaque; >>> +} >>> + >>> +void qemu_balloon(ram_addr_t target) >>> +{ >>> + if (qemu_balloon_event) >>> + qemu_balloon_event(qemu_balloon_event_opaque, target); >>> +} >>> + >>> +ram_addr_t qemu_balloon_status(void) >>> +{ >>> + if (qemu_balloon_event) >>> + return qemu_balloon_event(qemu_balloon_event_opaque, 0); >>> + return 0; >>> +} >>> + >>> /***********************************************************/ >>> /* keyboard/mouse */ >>> >>> >>> ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
[parent not found: <479A162C.1060209-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] QEMU support for virtio balloon driver [not found] ` <479A162C.1060209-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2008-01-26 18:47 ` Avi Kivity 2008-03-08 19:27 ` Marcelo Tosatti 0 siblings, 1 reply; 14+ messages in thread From: Avi Kivity @ 2008-01-26 18:47 UTC (permalink / raw) To: Anthony Liguori Cc: Marcelo Tosatti, kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Andrea Arcangeli Anthony Liguori wrote: > Do others expect KVM to just cope with the virtual mapping being changed > out from underneath of it? > kvm should cope with both malicious guests and malicious (or buggy) host userspace. It's difficuly to analyze, but mmu notifiers might be necessary for the latter. -- Any sufficiently difficult bug is indistinguishable from a feature. ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] QEMU support for virtio balloon driver 2008-01-26 18:47 ` Avi Kivity @ 2008-03-08 19:27 ` Marcelo Tosatti 2008-03-08 20:51 ` Marcelo Tosatti 0 siblings, 1 reply; 14+ messages in thread From: Marcelo Tosatti @ 2008-03-08 19:27 UTC (permalink / raw) To: Avi Kivity; +Cc: Marcelo Tosatti, kvm-devel, Andrea Arcangeli On Sat, Jan 26, 2008 at 08:47:28PM +0200, Avi Kivity wrote: > Anthony Liguori wrote: > >Do others expect KVM to just cope with the virtual mapping being changed > >out from underneath of it? > > > > kvm should cope with both malicious guests and malicious (or buggy) host > userspace. It's difficuly to analyze, but mmu notifiers might be > necessary for the latter. The reason for the host crash with madvise is that the rmap code relies on the guest process virtual mappings from not disappearing while there are active shadow mappings. How to proceed now? Do we want to efficiently support ballooning without mmu notifiers? If so, an ioctl to zap the mmu is necessary as discussed before. diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f0cdfba..4c93b79 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1009,6 +1009,21 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) return page; } +static int was_spte_rmapped(struct kvm *kvm, u64 *spte, struct page *page) +{ + int ret = 0; + unsigned long host_pfn = (*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; + + if (is_rmap_pte(*spte)) { + if (host_pfn != page_to_pfn(page)) + rmap_remove(kvm, spte); + else + ret = 1; + } + + return ret; +} + static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, @@ -1016,7 +1031,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, struct page *page) { u64 spte; - int was_rmapped = is_rmap_pte(*shadow_pte); + int was_rmapped = was_spte_rmapped(vcpu->kvm, shadow_pte, page); int was_writeble = is_writeble_pte(*shadow_pte); /* ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH] QEMU support for virtio balloon driver 2008-03-08 19:27 ` Marcelo Tosatti @ 2008-03-08 20:51 ` Marcelo Tosatti 2008-03-09 2:46 ` Anthony Liguori 0 siblings, 1 reply; 14+ messages in thread From: Marcelo Tosatti @ 2008-03-08 20:51 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel, Andrea Arcangeli On Sat, Mar 08, 2008 at 04:27:13PM -0300, Marcelo Tosatti wrote: > On Sat, Jan 26, 2008 at 08:47:28PM +0200, Avi Kivity wrote: > > Anthony Liguori wrote: > > >Do others expect KVM to just cope with the virtual mapping being changed > > >out from underneath of it? > > > > > > > kvm should cope with both malicious guests and malicious (or buggy) host > > userspace. It's difficuly to analyze, but mmu notifiers might be > > necessary for the latter. > > The reason for the host crash with madvise is that the rmap code relies > on the guest process virtual mappings from not disappearing while there > are active shadow mappings. > > How to proceed now? Do we want to efficiently support ballooning without > mmu notifiers? If so, an ioctl to zap the mmu is necessary as discussed > before. Err, actually zapping the mmu is necessary to guarantee guest will not use stale shadow entries vs entries pointing to old unreacheable pages. ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] QEMU support for virtio balloon driver 2008-03-08 20:51 ` Marcelo Tosatti @ 2008-03-09 2:46 ` Anthony Liguori 0 siblings, 0 replies; 14+ messages in thread From: Anthony Liguori @ 2008-03-09 2:46 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: kvm-devel, Andrea Arcangeli, Avi Kivity Marcelo Tosatti wrote: >> How to proceed now? Do we want to efficiently support ballooning without >> mmu notifiers? If so, an ioctl to zap the mmu is necessary as discussed >> before. >> > > Err, actually zapping the mmu is necessary to guarantee guest will not > use stale shadow entries vs entries pointing to old unreacheable pages. > I'm happy to just not madvise() with older host userspaces. Doing an ioctl and pausing all the VCPUs is pretty ugly. Regards, Anthony Liguori ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] QEMU support for virtio balloon driver 2008-01-25 16:08 ` Marcelo Tosatti 2008-01-25 17:02 ` Anthony Liguori @ 2008-01-26 3:35 ` Rusty Russell 1 sibling, 0 replies; 14+ messages in thread From: Rusty Russell @ 2008-01-26 3:35 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Andrea Arcangeli, Avi Kivity On Saturday 26 January 2008 03:08:57 Marcelo Tosatti wrote: > On Thu, Jan 24, 2008 at 04:29:51PM -0600, Anthony Liguori wrote: > > I'm inclined to think that we should have a capability check for MM > > notifiers and just not do madvise if they aren't present. I don't think > > the ioctl approach that Marcelo took is sufficient as a malicious guest > > could possibly hose the host. > > How's that? The ioctl damage is contained to the guest (other than CPU > processing time, which the guest can cause in other ways). > > Anyway, don't see the need for back compat with older hosts. > > > Having the guest allocate and not touch memory means that it should [Cut last 523 lines of email, all of which was quoting] Look, I realize that Anthony is unable to control his massive quoting diarrhea, but I expect better from you Marcelo. It wastes everyone elses' time scanning it for new content, and this habit seems to be spreading. If this keeps up, I'll have no choice but to start sending spams with such a massive quote ratio just so everyone's filters start dropping them... Rusty. ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] QEMU support for virtio balloon driver [not found] ` <4799115F.8010506-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 2008-01-25 16:08 ` Marcelo Tosatti @ 2008-01-25 23:08 ` Dor Laor [not found] ` <1201302492.2944.8.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org> 1 sibling, 1 reply; 14+ messages in thread From: Dor Laor @ 2008-01-25 23:08 UTC (permalink / raw) To: Anthony Liguori Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti, Andrea Arcangeli, Avi Kivity On Thu, 2008-01-24 at 16:29 -0600, Anthony Liguori wrote: > Anthony Liguori wrote: > > This patch adds support to QEMU for Rusty's recently introduce virtio balloon > > driver. The user-facing portions of this are the introduction of a "balloon" > > and "info balloon" command in the monitor. > > > > I think using madvise unconditionally is okay but I am not sure. > > Looks like it's not. I just hung my host system after doing a bunch of > ballooning with a kernel that doesn't have MM notifiers. > > I'm inclined to think that we should have a capability check for MM > notifiers and just not do madvise if they aren't present. I don't think > the ioctl approach that Marcelo took is sufficient as a malicious guest > could possibly hose the host. > The ioctl to zap the shadow pages is needed in order to free memory fast. Without it the balloon will evacuate memory to slow for common mgmt application (running additional VMs). This ioctl (on older kernels only) can hose the host but so can malicious guests that do dummy cr3 switching and other hackry. If one really insist he can always add a timer to this ioctl to slow potential malicious guests. > Having the guest allocate and not touch memory means that it should > eventually be removed from the shadow page cache and eventually swapped > out so ballooning isn't totally useless in the absence of MM notifiers. > > Regards, > > Anthony Liguori ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
[parent not found: <1201302492.2944.8.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]
* Re: [PATCH] QEMU support for virtio balloon driver [not found] ` <1201302492.2944.8.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org> @ 2008-01-26 0:10 ` Anthony Liguori [not found] ` <479A7A5C.6030005-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 0 siblings, 1 reply; 14+ messages in thread From: Anthony Liguori @ 2008-01-26 0:10 UTC (permalink / raw) To: dor.laor-atKUWr5tajBWk0Htik3J/w Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti, Andrea Arcangeli, Avi Kivity Dor Laor wrote: > On Thu, 2008-01-24 at 16:29 -0600, Anthony Liguori wrote: > >> Anthony Liguori wrote: >> >>> This patch adds support to QEMU for Rusty's recently introduce virtio balloon >>> driver. The user-facing portions of this are the introduction of a "balloon" >>> and "info balloon" command in the monitor. >>> >>> I think using madvise unconditionally is okay but I am not sure. >>> >> Looks like it's not. I just hung my host system after doing a bunch of >> ballooning with a kernel that doesn't have MM notifiers. >> >> I'm inclined to think that we should have a capability check for MM >> notifiers and just not do madvise if they aren't present. I don't think >> the ioctl approach that Marcelo took is sufficient as a malicious guest >> could possibly hose the host. >> >> > > The ioctl to zap the shadow pages is needed in order to free memory > fast. Without it the balloon will evacuate memory to slow for common > mgmt application (running additional VMs). > I think that assertion needs some performance numbers to back it up. Linux will write unused pages to swap such that when it does need to obtain memory, it can easily just reclaim pages without doing any disk IO. The real advantage with using madvise() is that it doesn't use any swap space (at least, on Linux). > This ioctl (on older kernels only) can hose the host but so can > malicious guests that do dummy cr3 switching and other hackry. > What do you mean by that? The guest really shouldn't be able to hose the host regardless of what it puts in cr3. If it can, then that's a very serious bug. > If one really insist he can always add a timer to this ioctl to slow > potential malicious guests. > The issue is the atomicity of removing some from the shadow MMU cache and then madvise()'ing (since madvise is incapable of evicting from the shadow MMU cache w/o MMU notifiers). The only real solution I know of would be to also introduce an ioctl that's essentially, MADVISE_AND_REMOVE_FROM_SHADOW_MMU ioctl(). Regards, Anthony Liguori > >> Having the guest allocate and not touch memory means that it should >> eventually be removed from the shadow page cache and eventually swapped >> out so ballooning isn't totally useless in the absence of MM notifiers. >> >> Regards, >> >> Anthony Liguori >> > > > ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
[parent not found: <479A7A5C.6030005-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] QEMU support for virtio balloon driver [not found] ` <479A7A5C.6030005-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2008-01-26 18:35 ` Avi Kivity 0 siblings, 0 replies; 14+ messages in thread From: Avi Kivity @ 2008-01-26 18:35 UTC (permalink / raw) To: Anthony Liguori Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti, Andrea Arcangeli Anthony Liguori wrote: >>>> >>> Looks like it's not. I just hung my host system after doing a bunch >>> of ballooning with a kernel that doesn't have MM notifiers. >>> >>> I'm inclined to think that we should have a capability check for MM >>> notifiers and just not do madvise if they aren't present. I don't >>> think the ioctl approach that Marcelo took is sufficient as a >>> malicious guest could possibly hose the host. >>> >>> >> >> The ioctl to zap the shadow pages is needed in order to free memory >> fast. Without it the balloon will evacuate memory to slow for common >> mgmt application (running additional VMs). >> > > I think that assertion needs some performance numbers to back it up. > Linux will write unused pages to swap such that when it does need to > obtain memory, it can easily just reclaim pages without doing any disk > IO. > > The real advantage with using madvise() is that it doesn't use any > swap space (at least, on Linux). > Zapping the mmu is needed so the memory can actually be reclaimed in the absence of mmu notifiers. With mmu notifiers, it is unnecessary. > > The issue is the atomicity of removing some from the shadow MMU cache > and then madvise()'ing (since madvise is incapable of evicting from > the shadow MMU cache w/o MMU notifiers). The only real solution I > know of would be to also introduce an ioctl that's essentially, > MADVISE_AND_REMOVE_FROM_SHADOW_MMU ioctl(). > Or maybe stop all vcpus (in userspace) zap shadow madvise() resume all vcpus -- Any sufficiently difficult bug is indistinguishable from a feature. ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH] QEMU support for virtio balloon driver @ 2008-02-25 19:47 Anthony Liguori 2008-02-25 23:45 ` Dor Laor 0 siblings, 1 reply; 14+ messages in thread From: Anthony Liguori @ 2008-02-25 19:47 UTC (permalink / raw) To: kvm-devel; +Cc: Marcelo Tosatti, Anthony Liguori, Avi Kivity This patch adds support to QEMU for Rusty's recently introduce virtio balloon driver. The user-facing portions of this are the introduction of a "balloon" and "info balloon" command in the monitor. Right now madvise() is commented out since it causes host panics. Ballooning is still functional though--the host just doesn't reclaim the memory immediately. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> diff --git a/qemu/Makefile.target b/qemu/Makefile.target index d0cde54..c0205d7 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -574,7 +574,7 @@ OBJS += e1000.o OBJS+= hypercall.o # virtio devices -OBJS += virtio.o virtio-net.o virtio-blk.o +OBJS += virtio.o virtio-net.o virtio-blk.o virtio-balloon.o ifeq ($(TARGET_BASE_ARCH), i386) # Hardware support diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c index ef180ec..9cba6fc 100644 --- a/qemu/hw/pc.c +++ b/qemu/hw/pc.c @@ -1113,6 +1113,9 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size, } } + if (pci_enabled) + virtio_balloon_init(pci_bus, 0x1AF4, 0x1002); + if (extboot_drive != -1) { DriveInfo *info = &drives_table[extboot_drive]; int cyls, heads, secs; diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h index d109569..706881b 100644 --- a/qemu/hw/pc.h +++ b/qemu/hw/pc.h @@ -152,6 +152,9 @@ void virtio_net_poll(void); void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, BlockDriverState *bs); +/* virtio-balloon.h */ +void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device); + /* extboot.c */ void extboot_init(BlockDriverState *bs, int cmd); diff --git a/qemu/hw/virtio-balloon.c b/qemu/hw/virtio-balloon.c new file mode 100644 index 0000000..d7b61d0 --- /dev/null +++ b/qemu/hw/virtio-balloon.c @@ -0,0 +1,145 @@ +/* + * Virtio Block Device + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "virtio.h" +#include "block.h" +#include "pc.h" +#include "balloon.h" +#include "sysemu.h" + +#include <sys/mman.h> + +/* from Linux's linux/virtio_blk.h */ + +/* The ID for virtio_balloon */ +#define VIRTIO_ID_BALLOON 5 + +/* The feature bitmap for virtio balloon */ +#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ + +struct virtio_balloon_config +{ + /* Number of pages host wants Guest to give up. */ + uint32_t num_pages; + /* Number of pages we've actually got in balloon. */ + uint32_t actual; +}; + +typedef struct VirtIOBalloon +{ + VirtIODevice vdev; + VirtQueue *ivq, *dvq; + uint32_t num_pages; + uint32_t actual; +} VirtIOBalloon; + +static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev) +{ + return (VirtIOBalloon *)vdev; +} + +static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOBalloon *s = to_virtio_balloon(vdev); + VirtQueueElement elem; + unsigned int count; + + while ((count = virtqueue_pop(vq, &elem)) != 0) { + int i; + unsigned int wlen = 0; + + for (i = 0; i < elem.out_num; i++) { + int flags; + uint32_t *pfns = elem.out_sg[i].iov_base; + unsigned int n_pfns = elem.out_sg[i].iov_len / 4; + int j; + + for (j = 0; j < n_pfns; j++) { + if (vq == s->dvq) /* deflate */ + flags = MADV_WILLNEED; + else /* inflate */ + flags = MADV_DONTNEED; + +#if 0 + /* can't use this until we have mmu notifier support */ + madvise(phys_ram_base + (pfns[j] << TARGET_PAGE_BITS), + TARGET_PAGE_SIZE, flags); +#endif + } + + wlen += elem.out_sg[i].iov_len; + } + + virtqueue_push(vq, &elem, wlen); + virtio_notify(vdev, vq); + } +} + +static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) +{ + VirtIOBalloon *dev = to_virtio_balloon(vdev); + struct virtio_balloon_config config; + + config.num_pages = dev->num_pages; + config.actual = dev->actual; + + memcpy(config_data, &config, 8); +} + +static void virtio_balloon_set_config(VirtIODevice *vdev, + const uint8_t *config_data) +{ + VirtIOBalloon *dev = to_virtio_balloon(vdev); + struct virtio_balloon_config config; + memcpy(&config, config_data, 8); + dev->actual = config.actual; +} + +static uint32_t virtio_balloon_get_features(VirtIODevice *vdev) +{ + return 0; +} + +static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target) +{ + VirtIOBalloon *dev = opaque; + + if (target) { + dev->num_pages = (ram_size - target) >> TARGET_PAGE_BITS; + virtio_notify_config(&dev->vdev); + } + + return ram_size - (dev->actual << TARGET_PAGE_BITS); +} + +void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device) +{ + VirtIOBalloon *s; + + s = (VirtIOBalloon *)virtio_init_pci(bus, "virtio-balloon", + vendor, device, + 0, VIRTIO_ID_BALLOON, + 0x05, 0x00, 0x00, + 8, sizeof(VirtIOBalloon)); + + s->vdev.get_config = virtio_balloon_get_config; + s->vdev.set_config = virtio_balloon_set_config; + s->vdev.get_features = virtio_balloon_get_features; + + s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); + s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); + + qemu_add_balloon_handler(virtio_balloon_to_target, s); + + return &s->vdev; +} diff --git a/qemu/hw/virtio-blk.c b/qemu/hw/virtio-blk.c index 301b5a1..c8c3233 100644 --- a/qemu/hw/virtio-blk.c +++ b/qemu/hw/virtio-blk.c @@ -153,7 +153,7 @@ void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, 0x01, 0x80, 0x00, 16, sizeof(VirtIOBlock)); - s->vdev.update_config = virtio_blk_update_config; + s->vdev.get_config = virtio_blk_update_config; s->vdev.get_features = virtio_blk_get_features; s->bs = bs; diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c index 86f9e5a..8c1f953 100644 --- a/qemu/hw/virtio-net.c +++ b/qemu/hw/virtio-net.c @@ -288,7 +288,7 @@ void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn) 0x02, 0x00, 0x00, 6, sizeof(VirtIONet)); - n->vdev.update_config = virtio_net_update_config; + n->vdev.get_config = virtio_net_update_config; n->vdev.get_features = virtio_net_get_features; n->rx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_rx); n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx); diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c index 634f869..78d679f 100644 --- a/qemu/hw/virtio.c +++ b/qemu/hw/virtio.c @@ -307,6 +307,9 @@ static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data) return; memcpy(vdev->config + addr, &val, sizeof(val)); + + if (vdev->set_config) + vdev->set_config(vdev, vdev->config); } static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data) @@ -319,6 +322,9 @@ static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data) return; memcpy(vdev->config + addr, &val, sizeof(val)); + + if (vdev->set_config) + vdev->set_config(vdev, vdev->config); } static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data) @@ -331,6 +337,9 @@ static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data) return; memcpy(vdev->config + addr, &val, sizeof(val)); + + if (vdev->set_config) + vdev->set_config(vdev, vdev->config); } static void virtio_map(PCIDevice *pci_dev, int region_num, @@ -359,7 +368,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num, register_ioport_read(addr + 20, vdev->config_len, 4, virtio_config_readl, vdev); - vdev->update_config(vdev, vdev->config); + vdev->get_config(vdev, vdev->config); } } @@ -383,6 +392,14 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, return &vdev->vq[i]; } +void virtio_notify_config(VirtIODevice *vdev) +{ + /* make sure we have the latest config */ + vdev->get_config(vdev, vdev->config); + vdev->isr = 3; + virtio_update_irq(vdev); +} + void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) { /* Always notify when queue is empty */ diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h index dee97ba..ed8bebd 100644 --- a/qemu/hw/virtio.h +++ b/qemu/hw/virtio.h @@ -118,7 +118,8 @@ struct VirtIODevice void *config; uint32_t (*get_features)(VirtIODevice *vdev); void (*set_features)(VirtIODevice *vdev, uint32_t val); - void (*update_config)(VirtIODevice *vdev, uint8_t *config); + void (*get_config)(VirtIODevice *vdev, uint8_t *config); + void (*set_config)(VirtIODevice *vdev, const uint8_t *config); VirtQueue vq[VIRTIO_PCI_QUEUE_MAX]; }; @@ -140,4 +141,6 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem); void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); +void virtio_notify_config(VirtIODevice *vdev); + #endif diff --git a/qemu/monitor.c b/qemu/monitor.c index e8022c8..fb16566 100644 --- a/qemu/monitor.c +++ b/qemu/monitor.c @@ -35,6 +35,7 @@ #include "audio/audio.h" #include "disas.h" #include "migration.h" +#include "balloon.h" #include <dirent.h> #include "qemu-kvm.h" @@ -1261,6 +1262,23 @@ static void do_wav_capture (const char *path, } #endif +static void do_balloon(int value) +{ + ram_addr_t target = value; + qemu_balloon(target << 20); +} + +static void do_info_balloon(void) +{ + ram_addr_t actual; + + actual = qemu_balloon_status(); + if (actual == 0) + term_printf("Ballooning not activated in VM\n"); + else + term_printf("balloon: actual=%d\n", (int)(actual >> 20)); +} + static term_cmd_t term_cmds[] = { { "help|?", "s?", do_help, "[cmd]", "show the help" }, @@ -1338,6 +1356,8 @@ static term_cmd_t term_cmds[] = { "", "cancel the current VM migration" }, { "migrate_set_speed", "s", do_migrate_set_speed, "value", "set maximum speed (in bytes) for migrations" }, + { "balloon", "i", do_balloon, + "target", "request VM to change it's memory allocation (in MB)" }, { NULL, NULL, }, }; @@ -1400,6 +1420,8 @@ static term_cmd_t info_cmds[] = { #endif { "migration", "", do_info_migration, "", "show migration information" }, + { "balloon", "", do_info_balloon, + "", "show balloon information" }, { NULL, NULL, }, }; diff --git a/qemu/vl.c b/qemu/vl.c index c9ed3f0..141cc98 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -38,6 +38,7 @@ #include "block.h" #include "audio/audio.h" #include "migration.h" +#include "balloon.h" #include "qemu-kvm.h" #include <unistd.h> @@ -513,6 +514,31 @@ void hw_error(const char *fmt, ...) abort(); } +/***************/ +/* ballooning */ + +static QEMUBalloonEvent *qemu_balloon_event; +void *qemu_balloon_event_opaque; + +void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque) +{ + qemu_balloon_event = func; + qemu_balloon_event_opaque = opaque; +} + +void qemu_balloon(ram_addr_t target) +{ + if (qemu_balloon_event) + qemu_balloon_event(qemu_balloon_event_opaque, target); +} + +ram_addr_t qemu_balloon_status(void) +{ + if (qemu_balloon_event) + return qemu_balloon_event(qemu_balloon_event_opaque, 0); + return 0; +} + /***********************************************************/ /* keyboard/mouse */ ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH] QEMU support for virtio balloon driver 2008-02-25 19:47 Anthony Liguori @ 2008-02-25 23:45 ` Dor Laor 0 siblings, 0 replies; 14+ messages in thread From: Dor Laor @ 2008-02-25 23:45 UTC (permalink / raw) To: Anthony Liguori; +Cc: kvm-devel, Marcelo Tosatti, Avi Kivity On Mon, 2008-02-25 at 13:47 -0600, Anthony Liguori wrote: > This patch adds support to QEMU for Rusty's recently introduce virtio balloon > driver. The user-facing portions of this are the introduction of a "balloon" > and "info balloon" command in the monitor. > The patch looks good. Might be a good idea to split it into 2 (balloon vs s->vdev.get_config patch. > Right now madvise() is commented out since it causes host panics. Ballooning > is still functional though--the host just doesn't reclaim the memory > immediately. > Since the mmu notifiers are not merged into 2.6.25 even, we need a temporary solution for it. There are two+ options: 1. Use the ksm (shared pages) scan and zero ballooned pages. This way the memory will be quickly picked up by the kernel. 2. Add in-kernel atomic ioctl for zapping the mmu + madvise 2+. Some sort of mmu notifiers backport (hard and hacky). IMHO 2 is the best option. btw: Izik's ksm module enable running 50 1G xp guests over a 16G host even without the ballooning running! ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2008-03-09 2:46 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-24 21:23 [PATCH] QEMU support for virtio balloon driver Anthony Liguori
[not found] ` <1201209786831-git-send-email-aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-01-24 22:29 ` Anthony Liguori
[not found] ` <4799115F.8010506-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-01-25 16:08 ` Marcelo Tosatti
2008-01-25 17:02 ` Anthony Liguori
[not found] ` <479A162C.1060209-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-01-26 18:47 ` Avi Kivity
2008-03-08 19:27 ` Marcelo Tosatti
2008-03-08 20:51 ` Marcelo Tosatti
2008-03-09 2:46 ` Anthony Liguori
2008-01-26 3:35 ` Rusty Russell
2008-01-25 23:08 ` Dor Laor
[not found] ` <1201302492.2944.8.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2008-01-26 0:10 ` Anthony Liguori
[not found] ` <479A7A5C.6030005-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-01-26 18:35 ` Avi Kivity
-- strict thread matches above, loose matches on Subject: below --
2008-02-25 19:47 Anthony Liguori
2008-02-25 23:45 ` Dor Laor
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox