From mboxrd@z Thu Jan 1 00:00:00 1970 From: Anthony Liguori Subject: madvise() not triggering page discard with MMU-notifiers Date: Mon, 18 Aug 2008 17:04:25 -0500 Message-ID: <48A9F1E9.6040907@us.ibm.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------070705020009010304080809" To: Avi Kivity , Marcelo Tosatti , Andrea Arcangeli , kvm-devel Return-path: Received: from e5.ny.us.ibm.com ([32.97.182.145]:36003 "EHLO e5.ny.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754312AbYHRWFJ (ORCPT ); Mon, 18 Aug 2008 18:05:09 -0400 Received: from d01relay04.pok.ibm.com (d01relay04.pok.ibm.com [9.56.227.236]) by e5.ny.us.ibm.com (8.13.8/8.13.8) with ESMTP id m7IM55jY003281 for ; Mon, 18 Aug 2008 18:05:05 -0400 Received: from d01av04.pok.ibm.com (d01av04.pok.ibm.com [9.56.224.64]) by d01relay04.pok.ibm.com (8.13.8/8.13.8/NCO v9.0) with ESMTP id m7IM55XT240730 for ; Mon, 18 Aug 2008 18:05:05 -0400 Received: from d01av04.pok.ibm.com (loopback [127.0.0.1]) by d01av04.pok.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id m7IM54r3018994 for ; Mon, 18 Aug 2008 18:05:05 -0400 Sender: kvm-owner@vger.kernel.org List-ID: This is a multi-part message in MIME format. --------------070705020009010304080809 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Attached is an updated migration patch. It does not appear, however, that madvise() is triggering discarding of pages even with an MMU-notifier enabled kernel. I take it that this is because we're still holding a reference count to the page when it is in the shadow page table? Any ETA on when this will change? N.B. the virtio_balloon driver is broken in Linus' tree. Sending patch for that following this one. Regards, Anthony Liguori --------------070705020009010304080809 Content-Type: text/x-patch; name="virtio-balloon-driver.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="virtio-balloon-driver.patch" diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c index 5edfad7..ebee5b8 100644 --- a/libkvm/libkvm.c +++ b/libkvm/libkvm.c @@ -1053,6 +1053,15 @@ int kvm_pit_in_kernel(kvm_context_t kvm) return kvm->pit_in_kernel; } +int kvm_has_mmu_notifiers(kvm_context_t kvm) +{ + int r = 0; +#ifdef KVM_CAP_SYNC_MMU + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU); +#endif + return r; +} + int kvm_init_coalesced_mmio(kvm_context_t kvm) { int r = 0; diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h index 9f06fcc..fb51ee1 100644 --- a/libkvm/libkvm.h +++ b/libkvm/libkvm.h @@ -528,6 +528,8 @@ int kvm_dirty_pages_log_reset(kvm_context_t kvm); */ int kvm_irqchip_in_kernel(kvm_context_t kvm); +int kvm_has_mmu_notifiers(kvm_context_t kvm); + #ifdef KVM_CAP_IRQCHIP /*! * \brief Dump in kernel IRQCHIP contents diff --git a/qemu/Makefile.target b/qemu/Makefile.target index a86464f..cd985f6 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -598,7 +598,7 @@ OBJS += rtl8139.o OBJS += e1000.o # virtio devices -OBJS += virtio.o virtio-net.o virtio-blk.o +OBJS += virtio.o virtio-net.o virtio-blk.o virtio-balloon.o OBJS += device-hotplug.o diff --git a/qemu/balloon.h b/qemu/balloon.h new file mode 100644 index 0000000..60b4a5d --- /dev/null +++ b/qemu/balloon.h @@ -0,0 +1,27 @@ +/* + * Balloon + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef _QEMU_BALLOON_H +#define _QEMU_BALLOON_H + +#include "cpu-defs.h" + +typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target); + +void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque); + +void qemu_balloon(ram_addr_t target); + +ram_addr_t qemu_balloon_status(void); + +#endif diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c index 3a8269b..b19e558 100644 --- a/qemu/hw/pc.c +++ b/qemu/hw/pc.c @@ -1161,6 +1161,9 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size, extboot_init(info->bdrv, 1); } + + if (pci_enabled) + virtio_balloon_init(pci_bus); } static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size, diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h index c284bf1..5b68d69 100644 --- a/qemu/hw/pc.h +++ b/qemu/hw/pc.h @@ -164,4 +164,7 @@ void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, void extboot_init(BlockDriverState *bs, int cmd); +/* virtio-balloon.h */ +void *virtio_balloon_init(PCIBus *bus); + #endif diff --git a/qemu/hw/virtio-balloon.c b/qemu/hw/virtio-balloon.c new file mode 100644 index 0000000..457b88f --- /dev/null +++ b/qemu/hw/virtio-balloon.c @@ -0,0 +1,194 @@ +/* + * Virtio Block Device + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "virtio.h" +#include "pc.h" +#include "sysemu.h" +#include "cpu.h" +#include "balloon.h" +#include "virtio-balloon.h" +#include "qemu-kvm.h" + +#if defined(__linux__) +#include +#endif + +typedef struct VirtIOBalloon +{ + VirtIODevice vdev; + VirtQueue *ivq, *dvq; + uint32_t num_pages; + uint32_t actual; +} VirtIOBalloon; + +static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev) +{ + return (VirtIOBalloon *)vdev; +} + +static void balloon_page(void *addr, int deflate) +{ +#if defined(__linux__) + if (!kvm_enabled() || !qemu_kvm_has_mmu_notifiers()) + madvise(addr, TARGET_PAGE_SIZE, + deflate ? MADV_WILLNEED : MADV_DONTNEED); +#endif +} + +/* FIXME: once we do a virtio refactoring, this will get subsumed into common + * code */ +static size_t memcpy_from_iovector(void *data, size_t offset, size_t size, + struct iovec *iov, int iovlen) +{ + int i; + uint8_t *ptr = data; + size_t iov_off = 0; + size_t data_off = 0; + + for (i = 0; i < iovlen && size; i++) { + if (offset < (iov_off + iov[i].iov_len)) { + size_t len = MIN((iov_off + iov[i].iov_len) - offset , size); + + memcpy(ptr + data_off, iov[i].iov_base + (offset - iov_off), len); + + data_off += len; + offset += len; + size -= len; + } + + iov_off += iov[i].iov_len; + } + + return data_off; +} + +static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOBalloon *s = to_virtio_balloon(vdev); + VirtQueueElement elem; + + while (virtqueue_pop(vq, &elem)) { + size_t offset = 0; + uint32_t pfn; + + while (memcpy_from_iovector(&pfn, offset, 4, + elem.out_sg, elem.out_num) == 4) { + ram_addr_t pa; + ram_addr_t addr; + + pa = (ram_addr_t)ldl_p(&pfn) << TARGET_PAGE_BITS; + offset += 4; + + addr = cpu_get_physical_page_desc(pa); + if ((addr & ~TARGET_PAGE_MASK) != IO_MEM_RAM) + continue; + + balloon_page(phys_ram_base + addr, !!(vq == s->dvq)); + } + + virtqueue_push(vq, &elem, offset); + virtio_notify(vdev, vq); + } +} + +static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) +{ + VirtIOBalloon *dev = to_virtio_balloon(vdev); + struct virtio_balloon_config config; + + config.num_pages = cpu_to_le32(dev->num_pages); + config.actual = cpu_to_le32(dev->actual); + + memcpy(config_data, &config, 8); +} + +static void virtio_balloon_set_config(VirtIODevice *vdev, + const uint8_t *config_data) +{ + VirtIOBalloon *dev = to_virtio_balloon(vdev); + struct virtio_balloon_config config; + memcpy(&config, config_data, 8); + dev->actual = config.actual; +} + +static uint32_t virtio_balloon_get_features(VirtIODevice *vdev) +{ + return 0; +} + +static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target) +{ + VirtIOBalloon *dev = opaque; + + if (target > ram_size) + target = ram_size; + + if (target) { + dev->num_pages = (ram_size - target) >> TARGET_PAGE_BITS; + virtio_notify_config(&dev->vdev); + } + + return ram_size - (dev->actual << TARGET_PAGE_BITS); +} + +static void virtio_balloon_save(QEMUFile *f, void *opaque) +{ + VirtIOBalloon *s = opaque; + + virtio_save(&s->vdev, f); + + qemu_put_be32(f, s->num_pages); + qemu_put_be32(f, s->actual); +} + +static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id) +{ + VirtIOBalloon *s = opaque; + + if (version_id != 1) + return -EINVAL; + + virtio_load(&s->vdev, f); + + s->num_pages = qemu_get_be32(f); + s->actual = qemu_get_be32(f); + + return 0; +} + +void *virtio_balloon_init(PCIBus *bus) +{ + VirtIOBalloon *s; + + s = (VirtIOBalloon *)virtio_init_pci(bus, "virtio-balloon", + 6900, 0x1002, + 0, VIRTIO_ID_BALLOON, + 0x05, 0x00, 0x00, + 8, sizeof(VirtIOBalloon)); + if (s == NULL) + return NULL; + + s->vdev.get_config = virtio_balloon_get_config; + s->vdev.set_config = virtio_balloon_set_config; + s->vdev.get_features = virtio_balloon_get_features; + + s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); + s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); + + qemu_add_balloon_handler(virtio_balloon_to_target, s); + + register_savevm("virtio-balloon", -1, 1, virtio_balloon_save, virtio_balloon_load, s); + + return &s->vdev; +} diff --git a/qemu/hw/virtio-balloon.h b/qemu/hw/virtio-balloon.h new file mode 100644 index 0000000..27d6985 --- /dev/null +++ b/qemu/hw/virtio-balloon.h @@ -0,0 +1,34 @@ +/* + * Virtio Support + * + * Copyright IBM, Corp. 2007-2008 + * + * Authors: + * Anthony Liguori + * Rusty Russell + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef _QEMU_VIRTIO_BALLOON_H +#define _QEMU_VIRTIO_BALLOON_H + +/* from Linux's linux/virtio_balloon.h */ + +/* The ID for virtio_balloon */ +#define VIRTIO_ID_BALLOON 5 + +/* The feature bitmap for virtio balloon */ +#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ + +struct virtio_balloon_config +{ + /* Number of pages host wants Guest to give up. */ + uint32_t num_pages; + /* Number of pages we've actually got in balloon. */ + uint32_t actual; +}; + +#endif diff --git a/qemu/monitor.c b/qemu/monitor.c index 4acf346..b8c4fde 100644 --- a/qemu/monitor.c +++ b/qemu/monitor.c @@ -35,6 +35,7 @@ #include "audio/audio.h" #include "disas.h" #include "migration.h" +#include "balloon.h" #include #include "qemu-timer.h" @@ -1399,6 +1400,25 @@ static void do_inject_nmi(int cpu_index) } #endif +static void do_balloon(int value) +{ + ram_addr_t target = value; + qemu_balloon(target << 20); +} + +static void do_info_balloon(void) +{ + ram_addr_t actual; + + actual = qemu_balloon_status(); + if (kvm_enabled() && !qemu_kvm_has_mmu_notifiers()) + term_printf("Using KVM without MMU-notifiers, ballooning disabled\n"); + else if (actual == 0) + term_printf("Ballooning not activated in VM\n"); + else + term_printf("balloon: actual=%d\n", (int)(actual >> 20)); +} + static term_cmd_t term_cmds[] = { { "help|?", "s?", do_help, "[cmd]", "show the help" }, @@ -1494,6 +1514,8 @@ static term_cmd_t term_cmds[] = { { "pci_add", "iss", device_hot_add, "bus nic|storage [[vlan=n][,macaddr=addr][,model=type]] [file=file][,if=type][,bus=nr]...", "hot-add PCI device" }, { "pci_del", "ii", device_hot_remove, "bus slot-number", "hot remove PCI device" }, #endif + { "balloon", "i", do_balloon, + "target", "request VM to change it's memory allocation (in MB)" }, { NULL, NULL, }, }; @@ -1558,6 +1580,8 @@ static term_cmd_t info_cmds[] = { #endif { "migration", "", do_info_migration, "", "show migration information" }, + { "balloon", "", do_info_balloon, + "", "show balloon information" }, { NULL, NULL, }, }; diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h index 7e28428..088bae3 100644 --- a/qemu/qemu-kvm.h +++ b/qemu/qemu-kvm.h @@ -110,10 +110,12 @@ extern kvm_context_t kvm_context; #define kvm_enabled() (kvm_allowed) #define qemu_kvm_irqchip_in_kernel() kvm_irqchip_in_kernel(kvm_context) #define qemu_kvm_pit_in_kernel() kvm_pit_in_kernel(kvm_context) +#define qemu_kvm_has_mmu_notifiers() kvm_has_mmu_notifiers(kvm_context) #else #define kvm_enabled() (0) #define qemu_kvm_irqchip_in_kernel() (0) #define qemu_kvm_pit_in_kernel() (0) +#define qemu_kvm_has_mmu_notifiers() (0) #endif void kvm_mutex_unlock(void); diff --git a/qemu/vl.c b/qemu/vl.c index 2dc1311..ec89921 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -38,6 +38,7 @@ #include "block.h" #include "audio/audio.h" #include "migration.h" +#include "balloon.h" #include "qemu-kvm.h" #include @@ -530,6 +531,31 @@ void hw_error(const char *fmt, ...) va_end(ap); abort(); } + +/***************/ +/* ballooning */ + +static QEMUBalloonEvent *qemu_balloon_event; +void *qemu_balloon_event_opaque; + +void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque) +{ + qemu_balloon_event = func; + qemu_balloon_event_opaque = opaque; +} + +void qemu_balloon(ram_addr_t target) +{ + if (qemu_balloon_event) + qemu_balloon_event(qemu_balloon_event_opaque, target); +} + +ram_addr_t qemu_balloon_status(void) +{ + if (qemu_balloon_event) + return qemu_balloon_event(qemu_balloon_event_opaque, 0); + return 0; +} /***********************************************************/ /* keyboard/mouse */ --------------070705020009010304080809--