* [Qemu-devel] [5874] Add virtio-balloon support
@ 2008-12-04 20:33 Anthony Liguori
2008-12-04 21:34 ` Hollis Blanchard
0 siblings, 1 reply; 5+ messages in thread
From: Anthony Liguori @ 2008-12-04 20:33 UTC (permalink / raw)
To: qemu-devel
Revision: 5874
http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=5874
Author: aliguori
Date: 2008-12-04 20:33:06 +0000 (Thu, 04 Dec 2008)
Log Message:
-----------
Add virtio-balloon support
This adds a VirtIO based balloon driver. It uses madvise() to actually balloon
the memory when possible.
Until 2.6.27, KVM forced memory pinning so we must disable ballooning unless the
kernel actually supports it when using KVM. It's always safe when using TCG.
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Modified Paths:
--------------
trunk/Makefile.target
trunk/hw/pc.c
trunk/kvm-all.c
trunk/kvm.h
trunk/monitor.c
Added Paths:
-----------
trunk/hw/virtio-balloon.c
trunk/hw/virtio-balloon.h
Modified: trunk/Makefile.target
===================================================================
--- trunk/Makefile.target 2008-12-04 20:19:35 UTC (rev 5873)
+++ trunk/Makefile.target 2008-12-04 20:33:06 UTC (rev 5874)
@@ -665,7 +665,7 @@
OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
# virtio support
-OBJS+= virtio.o virtio-blk.o
+OBJS+= virtio.o virtio-blk.o virtio-balloon.o
CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
endif
ifeq ($(TARGET_BASE_ARCH), ppc)
@@ -684,7 +684,7 @@
# PowerPC 4xx boards
OBJS+= pflash_cfi02.o ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
# virtio support
-OBJS+= virtio.o virtio-blk.o
+OBJS+= virtio.o virtio-blk.o virtio-balloon.o
endif
ifeq ($(TARGET_BASE_ARCH), mips)
OBJS+= mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o
Modified: trunk/hw/pc.c
===================================================================
--- trunk/hw/pc.c 2008-12-04 20:19:35 UTC (rev 5873)
+++ trunk/hw/pc.c 2008-12-04 20:33:06 UTC (rev 5874)
@@ -34,6 +34,7 @@
#include "console.h"
#include "fw_cfg.h"
#include "virtio-blk.h"
+#include "virtio-balloon.h"
/* output Bochs bios info messages */
//#define DEBUG_BIOS
@@ -1105,6 +1106,10 @@
unit_id++;
}
}
+
+ /* Add virtio balloon device */
+ if (pci_enabled)
+ virtio_balloon_init(pci_bus);
}
static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
Added: trunk/hw/virtio-balloon.c
===================================================================
--- trunk/hw/virtio-balloon.c (rev 0)
+++ trunk/hw/virtio-balloon.c 2008-12-04 20:33:06 UTC (rev 5874)
@@ -0,0 +1,194 @@
+/*
+ * Virtio Block Device
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "virtio.h"
+#include "pc.h"
+#include "sysemu.h"
+#include "cpu.h"
+#include "balloon.h"
+#include "virtio-balloon.h"
+#include "kvm.h"
+
+#if defined(__linux__)
+#include <sys/mman.h>
+#endif
+
+typedef struct VirtIOBalloon
+{
+ VirtIODevice vdev;
+ VirtQueue *ivq, *dvq;
+ uint32_t num_pages;
+ uint32_t actual;
+} VirtIOBalloon;
+
+static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
+{
+ return (VirtIOBalloon *)vdev;
+}
+
+static void balloon_page(void *addr, int deflate)
+{
+#if defined(__linux__)
+ if (!kvm_enabled() || kvm_has_sync_mmu())
+ madvise(addr, TARGET_PAGE_SIZE,
+ deflate ? MADV_WILLNEED : MADV_DONTNEED);
+#endif
+}
+
+/* FIXME: once we do a virtio refactoring, this will get subsumed into common
+ * code */
+static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
+ struct iovec *iov, int iovlen)
+{
+ int i;
+ uint8_t *ptr = data;
+ size_t iov_off = 0;
+ size_t data_off = 0;
+
+ for (i = 0; i < iovlen && size; i++) {
+ if (offset < (iov_off + iov[i].iov_len)) {
+ size_t len = MIN((iov_off + iov[i].iov_len) - offset , size);
+
+ memcpy(ptr + data_off, iov[i].iov_base + (offset - iov_off), len);
+
+ data_off += len;
+ offset += len;
+ size -= len;
+ }
+
+ iov_off += iov[i].iov_len;
+ }
+
+ return data_off;
+}
+
+static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOBalloon *s = to_virtio_balloon(vdev);
+ VirtQueueElement elem;
+
+ while (virtqueue_pop(vq, &elem)) {
+ size_t offset = 0;
+ uint32_t pfn;
+
+ while (memcpy_from_iovector(&pfn, offset, 4,
+ elem.out_sg, elem.out_num) == 4) {
+ ram_addr_t pa;
+ ram_addr_t addr;
+
+ pa = (ram_addr_t)ldl_p(&pfn) << VIRTIO_BALLOON_PFN_SHIFT;
+ offset += 4;
+
+ addr = cpu_get_physical_page_desc(pa);
+ if ((addr & ~TARGET_PAGE_MASK) != IO_MEM_RAM)
+ continue;
+
+ balloon_page(phys_ram_base + addr, !!(vq == s->dvq));
+ }
+
+ virtqueue_push(vq, &elem, offset);
+ virtio_notify(vdev, vq);
+ }
+}
+
+static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+ VirtIOBalloon *dev = to_virtio_balloon(vdev);
+ struct virtio_balloon_config config;
+
+ config.num_pages = cpu_to_le32(dev->num_pages);
+ config.actual = cpu_to_le32(dev->actual);
+
+ memcpy(config_data, &config, 8);
+}
+
+static void virtio_balloon_set_config(VirtIODevice *vdev,
+ const uint8_t *config_data)
+{
+ VirtIOBalloon *dev = to_virtio_balloon(vdev);
+ struct virtio_balloon_config config;
+ memcpy(&config, config_data, 8);
+ dev->actual = config.actual;
+}
+
+static uint32_t virtio_balloon_get_features(VirtIODevice *vdev)
+{
+ return 0;
+}
+
+static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
+{
+ VirtIOBalloon *dev = opaque;
+
+ if (target > ram_size)
+ target = ram_size;
+
+ if (target) {
+ dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
+ virtio_notify_config(&dev->vdev);
+ }
+
+ return ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT);
+}
+
+static void virtio_balloon_save(QEMUFile *f, void *opaque)
+{
+ VirtIOBalloon *s = opaque;
+
+ virtio_save(&s->vdev, f);
+
+ qemu_put_be32(f, s->num_pages);
+ qemu_put_be32(f, s->actual);
+}
+
+static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
+{
+ VirtIOBalloon *s = opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ virtio_load(&s->vdev, f);
+
+ s->num_pages = qemu_get_be32(f);
+ s->actual = qemu_get_be32(f);
+
+ return 0;
+}
+
+void *virtio_balloon_init(PCIBus *bus)
+{
+ VirtIOBalloon *s;
+
+ s = (VirtIOBalloon *)virtio_init_pci(bus, "virtio-balloon",
+ 6900, 0x1002,
+ 0, VIRTIO_ID_BALLOON,
+ 0x05, 0x00, 0x00,
+ 8, sizeof(VirtIOBalloon));
+ if (s == NULL)
+ return NULL;
+
+ s->vdev.get_config = virtio_balloon_get_config;
+ s->vdev.set_config = virtio_balloon_set_config;
+ s->vdev.get_features = virtio_balloon_get_features;
+
+ s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
+ s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
+
+ qemu_add_balloon_handler(virtio_balloon_to_target, s);
+
+ register_savevm("virtio-balloon", -1, 1, virtio_balloon_save, virtio_balloon_load, s);
+
+ return &s->vdev;
+}
Added: trunk/hw/virtio-balloon.h
===================================================================
--- trunk/hw/virtio-balloon.h (rev 0)
+++ trunk/hw/virtio-balloon.h 2008-12-04 20:33:06 UTC (rev 5874)
@@ -0,0 +1,42 @@
+/*
+ * Virtio Support
+ *
+ * Copyright IBM, Corp. 2007-2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Rusty Russell <rusty@rustcorp.com.au>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _QEMU_VIRTIO_BALLOON_H
+#define _QEMU_VIRTIO_BALLOON_H
+
+#include "virtio.h"
+#include "pci.h"
+
+/* from Linux's linux/virtio_balloon.h */
+
+/* The ID for virtio_balloon */
+#define VIRTIO_ID_BALLOON 5
+
+/* The feature bitmap for virtio balloon */
+#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
+
+/* Size of a PFN in the balloon interface. */
+#define VIRTIO_BALLOON_PFN_SHIFT 12
+
+struct virtio_balloon_config
+{
+ /* Number of pages host wants Guest to give up. */
+ uint32_t num_pages;
+ /* Number of pages we've actually got in balloon. */
+ uint32_t actual;
+};
+
+void *virtio_balloon_init(PCIBus *bus);
+
+#endif
Modified: trunk/kvm-all.c
===================================================================
--- trunk/kvm-all.c 2008-12-04 20:19:35 UTC (rev 5873)
+++ trunk/kvm-all.c 2008-12-04 20:33:06 UTC (rev 5874)
@@ -549,3 +549,15 @@
return ret;
}
+
+int kvm_has_sync_mmu(void)
+{
+ KVMState *s = kvm_state;
+
+#ifdef KVM_CAP_SYNC_MMU
+ if (kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU) > 0)
+ return 1;
+#endif
+
+ return 0;
+}
Modified: trunk/kvm.h
===================================================================
--- trunk/kvm.h 2008-12-04 20:19:35 UTC (rev 5873)
+++ trunk/kvm.h 2008-12-04 20:33:06 UTC (rev 5874)
@@ -42,6 +42,9 @@
int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len);
int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len);
+
+int kvm_has_sync_mmu(void);
+
/* internal API */
struct KVMState;
Modified: trunk/monitor.c
===================================================================
--- trunk/monitor.c 2008-12-04 20:19:35 UTC (rev 5873)
+++ trunk/monitor.c 2008-12-04 20:33:06 UTC (rev 5874)
@@ -1402,7 +1402,9 @@
ram_addr_t actual;
actual = qemu_balloon_status();
- if (actual == 0)
+ if (kvm_enabled() && !kvm_has_sync_mmu())
+ term_printf("Using KVM without synchronous MMU, ballooning disabled\n");
+ else if (actual == 0)
term_printf("Ballooning not activated in VM\n");
else
term_printf("balloon: actual=%d\n", (int)(actual >> 20));
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [5874] Add virtio-balloon support
2008-12-04 20:33 [Qemu-devel] [5874] Add virtio-balloon support Anthony Liguori
@ 2008-12-04 21:34 ` Hollis Blanchard
2008-12-04 22:34 ` Anthony Liguori
0 siblings, 1 reply; 5+ messages in thread
From: Hollis Blanchard @ 2008-12-04 21:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Rusty Russell, kvm-devel
On Thu, 2008-12-04 at 20:33 +0000, Anthony Liguori wrote:
>
> +static void balloon_page(void *addr, int deflate)
> +{
> +#if defined(__linux__)
> + if (!kvm_enabled() || kvm_has_sync_mmu())
> + madvise(addr, TARGET_PAGE_SIZE,
> + deflate ? MADV_WILLNEED : MADV_DONTNEED);
> +#endif
> +}
Hmm, I just noticed this... we need to use VIRTIO_BALLOON_PFN_SHIFT like
Rusty did on the kernel side.
However, in general I'm not sure how this is supposed to work. Isn't it
true that madvise() is a no-op if 0 < length < getpagesize()? If so, how
should the guest know the chunk size needed on the host?
What happens when a guest tries to balloon 4K pages when it's backed on
the host by hugetlbfs? We can't even use getpagesize() there.
Maybe the virtio balloon interface needs to advertise a unit size from
the host, and use that size instead of alloc_page() in the guest?
--
Hollis Blanchard
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [5874] Add virtio-balloon support
2008-12-04 21:34 ` Hollis Blanchard
@ 2008-12-04 22:34 ` Anthony Liguori
2008-12-05 14:14 ` Paul Brook
0 siblings, 1 reply; 5+ messages in thread
From: Anthony Liguori @ 2008-12-04 22:34 UTC (permalink / raw)
To: Hollis Blanchard; +Cc: Rusty Russell, qemu-devel, kvm-devel
Hollis Blanchard wrote:
> On Thu, 2008-12-04 at 20:33 +0000, Anthony Liguori wrote:
>
>> +static void balloon_page(void *addr, int deflate)
>> +{
>> +#if defined(__linux__)
>> + if (!kvm_enabled() || kvm_has_sync_mmu())
>> + madvise(addr, TARGET_PAGE_SIZE,
>> + deflate ? MADV_WILLNEED : MADV_DONTNEED);
>> +#endif
>> +}
>>
>
> Hmm, I just noticed this... we need to use VIRTIO_BALLOON_PFN_SHIFT like
> Rusty did on the kernel side.
>
> However, in general I'm not sure how this is supposed to work. Isn't it
> true that madvise() is a no-op if 0 < length < getpagesize()? If so, how
> should the guest know the chunk size needed on the host?
>
We need to pass multiple of TARGET_PAGE_SIZE to madvise() but we can
certainly adjust that depending on VIRTIO_BALLOON_PFN_SHIFT. But
basically, if the two aren't equal, we shouldn't even try madvise().
> What happens when a guest tries to balloon 4K pages when it's backed on
> the host by hugetlbfs? We can't even use getpagesize() there.
>
Nothing. For ballooning to work in this circumstance, the guest would
have to balloon 2MB pages which isn't something that's reasonable for it
to do.
> Maybe the virtio balloon interface needs to advertise a unit size from
> the host, and use that size instead of alloc_page() in the guest?
>
That's a possibility. May make sense to give it the ability to balloon
memory up to unit size because I don't think it'll be able to meet the
reservation for large pages only.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [5874] Add virtio-balloon support
2008-12-04 22:34 ` Anthony Liguori
@ 2008-12-05 14:14 ` Paul Brook
2008-12-05 14:21 ` Anthony Liguori
0 siblings, 1 reply; 5+ messages in thread
From: Paul Brook @ 2008-12-05 14:14 UTC (permalink / raw)
To: qemu-devel; +Cc: Rusty Russell, Hollis Blanchard, kvm-devel
On Thursday 04 December 2008, Anthony Liguori wrote:
> Hollis Blanchard wrote:
> > On Thu, 2008-12-04 at 20:33 +0000, Anthony Liguori wrote:
> >> +static void balloon_page(void *addr, int deflate)
> >> +{
> >> +#if defined(__linux__)
> >> + if (!kvm_enabled() || kvm_has_sync_mmu())
> >> + madvise(addr, TARGET_PAGE_SIZE,
> >> + deflate ? MADV_WILLNEED : MADV_DONTNEED);
> >> +#endif
> >> +}
> >
> > Hmm, I just noticed this... we need to use VIRTIO_BALLOON_PFN_SHIFT like
> > Rusty did on the kernel side.
> >
> > However, in general I'm not sure how this is supposed to work. Isn't it
> > true that madvise() is a no-op if 0 < length < getpagesize()? If so, how
> > should the guest know the chunk size needed on the host?
>
> We need to pass multiple of TARGET_PAGE_SIZE to madvise()
You mean HOST_PAGE_SIZE?
My linux manpage says that the address must be page aligned.
Paul
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [5874] Add virtio-balloon support
2008-12-05 14:14 ` Paul Brook
@ 2008-12-05 14:21 ` Anthony Liguori
0 siblings, 0 replies; 5+ messages in thread
From: Anthony Liguori @ 2008-12-05 14:21 UTC (permalink / raw)
To: Paul Brook; +Cc: Rusty Russell, qemu-devel, Hollis Blanchard, kvm-devel
Paul Brook wrote:
> On Thursday 04 December 2008, Anthony Liguori wrote:
>
>> Hollis Blanchard wrote:
>>
>>> On Thu, 2008-12-04 at 20:33 +0000, Anthony Liguori wrote:
>>>
>>>> +static void balloon_page(void *addr, int deflate)
>>>> +{
>>>> +#if defined(__linux__)
>>>> + if (!kvm_enabled() || kvm_has_sync_mmu())
>>>> + madvise(addr, TARGET_PAGE_SIZE,
>>>> + deflate ? MADV_WILLNEED : MADV_DONTNEED);
>>>> +#endif
>>>> +}
>>>>
>>> Hmm, I just noticed this... we need to use VIRTIO_BALLOON_PFN_SHIFT like
>>> Rusty did on the kernel side.
>>>
>>> However, in general I'm not sure how this is supposed to work. Isn't it
>>> true that madvise() is a no-op if 0 < length < getpagesize()? If so, how
>>> should the guest know the chunk size needed on the host?
>>>
>> We need to pass multiple of TARGET_PAGE_SIZE to madvise()
>>
>
> You mean HOST_PAGE_SIZE?
>
I change my previous answer :-)
I think we should just pass addr and the appropriate size of the memory
the guest is ballooning. Whether the host can handle it is up to it.
Worst case scenario, madvise() returns an error and no harm is done.
Regards,
Anthony Liguori
> My linux manpage says that the address must be page aligned.
>
> Paul
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2008-12-05 14:21 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-12-04 20:33 [Qemu-devel] [5874] Add virtio-balloon support Anthony Liguori
2008-12-04 21:34 ` Hollis Blanchard
2008-12-04 22:34 ` Anthony Liguori
2008-12-05 14:14 ` Paul Brook
2008-12-05 14:21 ` Anthony Liguori
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).