From: Anthony Liguori <aliguori@us.ibm.com>
To: Avi Kivity <avi@qumranet.com>,
Marcelo Tosatti <mtosatti@redhat.com>,
Andrea Arcangeli <andrea@qumranet.com>,
kvm-devel <kvm@vger.kernel.org>
Subject: madvise() not triggering page discard with MMU-notifiers
Date: Mon, 18 Aug 2008 17:04:25 -0500 [thread overview]
Message-ID: <48A9F1E9.6040907@us.ibm.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 450 bytes --]
Attached is an updated migration patch. It does not appear, however,
that madvise() is triggering discarding of pages even with an
MMU-notifier enabled kernel. I take it that this is because we're still
holding a reference count to the page when it is in the shadow page
table? Any ETA on when this will change?
N.B. the virtio_balloon driver is broken in Linus' tree. Sending patch
for that following this one.
Regards,
Anthony Liguori
[-- Attachment #2: virtio-balloon-driver.patch --]
[-- Type: text/x-patch, Size: 11826 bytes --]
diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index 5edfad7..ebee5b8 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -1053,6 +1053,15 @@ int kvm_pit_in_kernel(kvm_context_t kvm)
return kvm->pit_in_kernel;
}
+int kvm_has_mmu_notifiers(kvm_context_t kvm)
+{
+ int r = 0;
+#ifdef KVM_CAP_SYNC_MMU
+ r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU);
+#endif
+ return r;
+}
+
int kvm_init_coalesced_mmio(kvm_context_t kvm)
{
int r = 0;
diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h
index 9f06fcc..fb51ee1 100644
--- a/libkvm/libkvm.h
+++ b/libkvm/libkvm.h
@@ -528,6 +528,8 @@ int kvm_dirty_pages_log_reset(kvm_context_t kvm);
*/
int kvm_irqchip_in_kernel(kvm_context_t kvm);
+int kvm_has_mmu_notifiers(kvm_context_t kvm);
+
#ifdef KVM_CAP_IRQCHIP
/*!
* \brief Dump in kernel IRQCHIP contents
diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index a86464f..cd985f6 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -598,7 +598,7 @@ OBJS += rtl8139.o
OBJS += e1000.o
# virtio devices
-OBJS += virtio.o virtio-net.o virtio-blk.o
+OBJS += virtio.o virtio-net.o virtio-blk.o virtio-balloon.o
OBJS += device-hotplug.o
diff --git a/qemu/balloon.h b/qemu/balloon.h
new file mode 100644
index 0000000..60b4a5d
--- /dev/null
+++ b/qemu/balloon.h
@@ -0,0 +1,27 @@
+/*
+ * Balloon
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _QEMU_BALLOON_H
+#define _QEMU_BALLOON_H
+
+#include "cpu-defs.h"
+
+typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+
+void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
+
+void qemu_balloon(ram_addr_t target);
+
+ram_addr_t qemu_balloon_status(void);
+
+#endif
diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
index 3a8269b..b19e558 100644
--- a/qemu/hw/pc.c
+++ b/qemu/hw/pc.c
@@ -1161,6 +1161,9 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
extboot_init(info->bdrv, 1);
}
+
+ if (pci_enabled)
+ virtio_balloon_init(pci_bus);
}
static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h
index c284bf1..5b68d69 100644
--- a/qemu/hw/pc.h
+++ b/qemu/hw/pc.h
@@ -164,4 +164,7 @@ void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
void extboot_init(BlockDriverState *bs, int cmd);
+/* virtio-balloon.h */
+void *virtio_balloon_init(PCIBus *bus);
+
#endif
diff --git a/qemu/hw/virtio-balloon.c b/qemu/hw/virtio-balloon.c
new file mode 100644
index 0000000..457b88f
--- /dev/null
+++ b/qemu/hw/virtio-balloon.c
@@ -0,0 +1,194 @@
+/*
+ * Virtio Block Device
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "virtio.h"
+#include "pc.h"
+#include "sysemu.h"
+#include "cpu.h"
+#include "balloon.h"
+#include "virtio-balloon.h"
+#include "qemu-kvm.h"
+
+#if defined(__linux__)
+#include <sys/mman.h>
+#endif
+
+typedef struct VirtIOBalloon
+{
+ VirtIODevice vdev;
+ VirtQueue *ivq, *dvq;
+ uint32_t num_pages;
+ uint32_t actual;
+} VirtIOBalloon;
+
+static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
+{
+ return (VirtIOBalloon *)vdev;
+}
+
+static void balloon_page(void *addr, int deflate)
+{
+#if defined(__linux__)
+ if (!kvm_enabled() || !qemu_kvm_has_mmu_notifiers())
+ madvise(addr, TARGET_PAGE_SIZE,
+ deflate ? MADV_WILLNEED : MADV_DONTNEED);
+#endif
+}
+
+/* FIXME: once we do a virtio refactoring, this will get subsumed into common
+ * code */
+static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
+ struct iovec *iov, int iovlen)
+{
+ int i;
+ uint8_t *ptr = data;
+ size_t iov_off = 0;
+ size_t data_off = 0;
+
+ for (i = 0; i < iovlen && size; i++) {
+ if (offset < (iov_off + iov[i].iov_len)) {
+ size_t len = MIN((iov_off + iov[i].iov_len) - offset , size);
+
+ memcpy(ptr + data_off, iov[i].iov_base + (offset - iov_off), len);
+
+ data_off += len;
+ offset += len;
+ size -= len;
+ }
+
+ iov_off += iov[i].iov_len;
+ }
+
+ return data_off;
+}
+
+static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOBalloon *s = to_virtio_balloon(vdev);
+ VirtQueueElement elem;
+
+ while (virtqueue_pop(vq, &elem)) {
+ size_t offset = 0;
+ uint32_t pfn;
+
+ while (memcpy_from_iovector(&pfn, offset, 4,
+ elem.out_sg, elem.out_num) == 4) {
+ ram_addr_t pa;
+ ram_addr_t addr;
+
+ pa = (ram_addr_t)ldl_p(&pfn) << TARGET_PAGE_BITS;
+ offset += 4;
+
+ addr = cpu_get_physical_page_desc(pa);
+ if ((addr & ~TARGET_PAGE_MASK) != IO_MEM_RAM)
+ continue;
+
+ balloon_page(phys_ram_base + addr, !!(vq == s->dvq));
+ }
+
+ virtqueue_push(vq, &elem, offset);
+ virtio_notify(vdev, vq);
+ }
+}
+
+static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+ VirtIOBalloon *dev = to_virtio_balloon(vdev);
+ struct virtio_balloon_config config;
+
+ config.num_pages = cpu_to_le32(dev->num_pages);
+ config.actual = cpu_to_le32(dev->actual);
+
+ memcpy(config_data, &config, 8);
+}
+
+static void virtio_balloon_set_config(VirtIODevice *vdev,
+ const uint8_t *config_data)
+{
+ VirtIOBalloon *dev = to_virtio_balloon(vdev);
+ struct virtio_balloon_config config;
+ memcpy(&config, config_data, 8);
+ dev->actual = config.actual;
+}
+
+static uint32_t virtio_balloon_get_features(VirtIODevice *vdev)
+{
+ return 0;
+}
+
+static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
+{
+ VirtIOBalloon *dev = opaque;
+
+ if (target > ram_size)
+ target = ram_size;
+
+ if (target) {
+ dev->num_pages = (ram_size - target) >> TARGET_PAGE_BITS;
+ virtio_notify_config(&dev->vdev);
+ }
+
+ return ram_size - (dev->actual << TARGET_PAGE_BITS);
+}
+
+static void virtio_balloon_save(QEMUFile *f, void *opaque)
+{
+ VirtIOBalloon *s = opaque;
+
+ virtio_save(&s->vdev, f);
+
+ qemu_put_be32(f, s->num_pages);
+ qemu_put_be32(f, s->actual);
+}
+
+static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
+{
+ VirtIOBalloon *s = opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ virtio_load(&s->vdev, f);
+
+ s->num_pages = qemu_get_be32(f);
+ s->actual = qemu_get_be32(f);
+
+ return 0;
+}
+
+void *virtio_balloon_init(PCIBus *bus)
+{
+ VirtIOBalloon *s;
+
+ s = (VirtIOBalloon *)virtio_init_pci(bus, "virtio-balloon",
+ 6900, 0x1002,
+ 0, VIRTIO_ID_BALLOON,
+ 0x05, 0x00, 0x00,
+ 8, sizeof(VirtIOBalloon));
+ if (s == NULL)
+ return NULL;
+
+ s->vdev.get_config = virtio_balloon_get_config;
+ s->vdev.set_config = virtio_balloon_set_config;
+ s->vdev.get_features = virtio_balloon_get_features;
+
+ s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
+ s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
+
+ qemu_add_balloon_handler(virtio_balloon_to_target, s);
+
+ register_savevm("virtio-balloon", -1, 1, virtio_balloon_save, virtio_balloon_load, s);
+
+ return &s->vdev;
+}
diff --git a/qemu/hw/virtio-balloon.h b/qemu/hw/virtio-balloon.h
new file mode 100644
index 0000000..27d6985
--- /dev/null
+++ b/qemu/hw/virtio-balloon.h
@@ -0,0 +1,34 @@
+/*
+ * Virtio Support
+ *
+ * Copyright IBM, Corp. 2007-2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Rusty Russell <rusty@rustcorp.com.au>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _QEMU_VIRTIO_BALLOON_H
+#define _QEMU_VIRTIO_BALLOON_H
+
+/* from Linux's linux/virtio_balloon.h */
+
+/* The ID for virtio_balloon */
+#define VIRTIO_ID_BALLOON 5
+
+/* The feature bitmap for virtio balloon */
+#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
+
+struct virtio_balloon_config
+{
+ /* Number of pages host wants Guest to give up. */
+ uint32_t num_pages;
+ /* Number of pages we've actually got in balloon. */
+ uint32_t actual;
+};
+
+#endif
diff --git a/qemu/monitor.c b/qemu/monitor.c
index 4acf346..b8c4fde 100644
--- a/qemu/monitor.c
+++ b/qemu/monitor.c
@@ -35,6 +35,7 @@
#include "audio/audio.h"
#include "disas.h"
#include "migration.h"
+#include "balloon.h"
#include <dirent.h>
#include "qemu-timer.h"
@@ -1399,6 +1400,25 @@ static void do_inject_nmi(int cpu_index)
}
#endif
+static void do_balloon(int value)
+{
+ ram_addr_t target = value;
+ qemu_balloon(target << 20);
+}
+
+static void do_info_balloon(void)
+{
+ ram_addr_t actual;
+
+ actual = qemu_balloon_status();
+ if (kvm_enabled() && !qemu_kvm_has_mmu_notifiers())
+ term_printf("Using KVM without MMU-notifiers, ballooning disabled\n");
+ else if (actual == 0)
+ term_printf("Ballooning not activated in VM\n");
+ else
+ term_printf("balloon: actual=%d\n", (int)(actual >> 20));
+}
+
static term_cmd_t term_cmds[] = {
{ "help|?", "s?", do_help,
"[cmd]", "show the help" },
@@ -1494,6 +1514,8 @@ static term_cmd_t term_cmds[] = {
{ "pci_add", "iss", device_hot_add, "bus nic|storage [[vlan=n][,macaddr=addr][,model=type]] [file=file][,if=type][,bus=nr]...", "hot-add PCI device" },
{ "pci_del", "ii", device_hot_remove, "bus slot-number", "hot remove PCI device" },
#endif
+ { "balloon", "i", do_balloon,
+ "target", "request VM to change it's memory allocation (in MB)" },
{ NULL, NULL, },
};
@@ -1558,6 +1580,8 @@ static term_cmd_t info_cmds[] = {
#endif
{ "migration", "", do_info_migration,
"", "show migration information" },
+ { "balloon", "", do_info_balloon,
+ "", "show balloon information" },
{ NULL, NULL, },
};
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index 7e28428..088bae3 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -110,10 +110,12 @@ extern kvm_context_t kvm_context;
#define kvm_enabled() (kvm_allowed)
#define qemu_kvm_irqchip_in_kernel() kvm_irqchip_in_kernel(kvm_context)
#define qemu_kvm_pit_in_kernel() kvm_pit_in_kernel(kvm_context)
+#define qemu_kvm_has_mmu_notifiers() kvm_has_mmu_notifiers(kvm_context)
#else
#define kvm_enabled() (0)
#define qemu_kvm_irqchip_in_kernel() (0)
#define qemu_kvm_pit_in_kernel() (0)
+#define qemu_kvm_has_mmu_notifiers() (0)
#endif
void kvm_mutex_unlock(void);
diff --git a/qemu/vl.c b/qemu/vl.c
index 2dc1311..ec89921 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -38,6 +38,7 @@
#include "block.h"
#include "audio/audio.h"
#include "migration.h"
+#include "balloon.h"
#include "qemu-kvm.h"
#include <unistd.h>
@@ -530,6 +531,31 @@ void hw_error(const char *fmt, ...)
va_end(ap);
abort();
}
+
+/***************/
+/* ballooning */
+
+static QEMUBalloonEvent *qemu_balloon_event;
+void *qemu_balloon_event_opaque;
+
+void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque)
+{
+ qemu_balloon_event = func;
+ qemu_balloon_event_opaque = opaque;
+}
+
+void qemu_balloon(ram_addr_t target)
+{
+ if (qemu_balloon_event)
+ qemu_balloon_event(qemu_balloon_event_opaque, target);
+}
+
+ram_addr_t qemu_balloon_status(void)
+{
+ if (qemu_balloon_event)
+ return qemu_balloon_event(qemu_balloon_event_opaque, 0);
+ return 0;
+}
/***********************************************************/
/* keyboard/mouse */
next reply other threads:[~2008-08-18 22:05 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-08-18 22:04 Anthony Liguori [this message]
2008-08-20 1:16 ` madvise() not triggering page discard with MMU-notifiers Marcelo Tosatti
2008-08-20 20:38 ` Anthony Liguori
2008-08-28 14:23 ` Andrea Arcangeli
2008-08-28 22:55 ` Anthony Liguori
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=48A9F1E9.6040907@us.ibm.com \
--to=aliguori@us.ibm.com \
--cc=andrea@qumranet.com \
--cc=avi@qumranet.com \
--cc=kvm@vger.kernel.org \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.