From: nilal@redhat.com
To: kvm@vger.kernel.org, pbonzini@redhat.com, pagupta@redhat.com,
wei.w.wang@intel.com, yang.zhang.wz@gmail.com, riel@redhat.com,
david@redhat.com, mst@redhat.com, dodgen@google.com,
konrad.wilk@oracle.com
Subject: [QEMU PATCH] kvm: Support for guest page hinting
Date: Tue, 28 Nov 2017 15:04:33 -0500 [thread overview]
Message-ID: <20171128200433.5443-1-nilal@redhat.com> (raw)
In-Reply-To: <20171128200324.4432-1-nilal@redhat.com>
From: Nitesh Narayan Lal <nilal@redhat.com>
This patch enables QEMU to handle page hinting requests
from the guest. Once the guest kicks QEMU to free a list of
page, QEMU retrives the guest physical address in the list
and converts each to host virtual address and then
MADVISE that memory.
Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
---
hw/virtio/virtio-balloon.c | 117 +++++++++++++++++++++++-
hw/virtio/virtio.c | 23 +++++
include/hw/virtio/virtio-access.h | 1 +
include/hw/virtio/virtio-balloon.h | 2 +-
include/qemu/osdep.h | 7 ++
include/standard-headers/linux/virtio_balloon.h | 1 +
6 files changed, 149 insertions(+), 2 deletions(-)
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 37cde38..c2d005d 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -33,6 +33,8 @@
#define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT)
+void page_hinting_request(uint64_t addr, uint32_t len);
+
static void balloon_page(void *addr, int deflate)
{
if (!qemu_balloon_is_inhibited() && (!kvm_enabled() ||
@@ -73,11 +75,22 @@ static bool balloon_stats_supported(const VirtIOBalloon *s)
return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ);
}
+static bool balloon_hinting_supported(const VirtIOBalloon *s)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ return virtio_vdev_has_feature(vdev, VIRTIO_GUEST_PAGE_HINTING_VQ);
+}
+
static bool balloon_stats_enabled(const VirtIOBalloon *s)
{
return s->stats_poll_interval > 0;
}
+static bool page_hinting_enabled(const VirtIOBalloon *s)
+{
+ return s->stats_poll_interval > 0;
+}
+
static void balloon_stats_destroy_timer(VirtIOBalloon *s)
{
if (balloon_stats_enabled(s)) {
@@ -93,14 +106,20 @@ static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
}
+static void page_hinting_change_timer(VirtIOBalloon *s, int64_t secs)
+{
+ timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
+}
+
static void balloon_stats_poll_cb(void *opaque)
{
VirtIOBalloon *s = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(s);
- if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) {
+ if (s->stats_vq_elem == NULL || !balloon_stats_supported(s) || !balloon_hinting_supported(s)) {
/* re-schedule */
balloon_stats_change_timer(s, s->stats_poll_interval);
+ page_hinting_change_timer(s, s->stats_poll_interval);
return;
}
@@ -197,12 +216,101 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
balloon_stats_change_timer(s, value);
return;
}
+
+ if (page_hinting_enabled(s)) {
+ /* timer interval change */
+ s->stats_poll_interval = value;
+ page_hinting_change_timer(s, value);
+ return;
+ }
/* create a new timer */
g_assert(s->stats_timer == NULL);
s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s);
s->stats_poll_interval = value;
balloon_stats_change_timer(s, 0);
+ /* create a new timer */
+ g_assert(s->stats_timer == NULL);
+ s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s);
+ s->stats_poll_interval = value;
+ page_hinting_change_timer(s, 0);
+}
+
+static void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, Error **errp)
+{
+ MemoryRegionSection mrs = memory_region_find(get_system_memory(),
+ addr, 1);
+
+ if (!mrs.mr) {
+ error_setg(errp, "No memory is mapped at address 0x%" HWADDR_PRIx, addr);
+ return NULL;
+ }
+
+ if (!memory_region_is_ram(mrs.mr) && !memory_region_is_romd(mrs.mr)) {
+ error_setg(errp, "Memory at address 0x%" HWADDR_PRIx "is not RAM", addr);
+ memory_region_unref(mrs.mr);
+ return NULL;
+ }
+
+ *p_mr = mrs.mr;
+ return qemu_map_ram_ptr(mrs.mr->ram_block, mrs.offset_within_region);
+}
+
+struct guest_pages {
+ unsigned long pfn;
+ unsigned int pages;
+};
+
+
+void page_hinting_request(uint64_t addr, uint32_t len)
+{
+ Error *local_err = NULL;
+ MemoryRegion *mr = NULL;
+ void *hvaddr;
+ int ret = 0;
+ struct guest_pages *guest_obj;
+ int i = 0;
+ void *hvaddr_to_free;
+ unsigned long pfn, pfn_end;
+ uint64_t gpaddr_to_free;
+
+ hvaddr = gpa2hva(&mr, addr, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return;
+ }
+ guest_obj = hvaddr;
+
+ while (i < len) {
+ pfn = guest_obj[i].pfn;
+ pfn_end = guest_obj[i].pfn + guest_obj[i].pages - 1;
+ while (pfn <= pfn_end) {
+ gpaddr_to_free = pfn << VIRTIO_BALLOON_PFN_SHIFT;
+ hvaddr_to_free = gpa2hva(&mr, gpaddr_to_free, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return;
+ }
+ ret = qemu_madvise((void *)hvaddr_to_free, 4096, QEMU_MADV_FREE);
+ if (ret == -1)
+ printf("\n%d:%s Error: Madvise failed with error:%d\n", __LINE__, __func__, ret);
+ pfn++;
+ }
+ i++;
+ }
+}
+
+
+static void virtio_balloon_page_hinting(VirtIODevice *vdev, VirtQueue *vq)
+{
+ uint64_t addr;
+ uint32_t len;
+ VirtQueueElement elem = {};
+
+ pop_hinting_addr(vq, &addr, &len);
+ page_hinting_request(addr, len);
+ virtqueue_push(vq, &elem, 0);
+ virtio_notify(vdev, vq);
}
static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
@@ -374,6 +482,7 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
f |= dev->host_features;
virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
+ virtio_add_feature(&f, VIRTIO_GUEST_PAGE_HINTING_VQ);
return f;
}
@@ -407,6 +516,9 @@ static int virtio_balloon_post_load_device(void *opaque, int version_id)
if (balloon_stats_enabled(s)) {
balloon_stats_change_timer(s, s->stats_poll_interval);
}
+ if (page_hinting_enabled(s)) {
+ page_hinting_change_timer(s, s->stats_poll_interval);
+ }
return 0;
}
@@ -443,6 +555,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
+ s->hvq = virtio_add_queue(vdev, 128, virtio_balloon_page_hinting);
reset_stats(s);
}
@@ -486,6 +599,8 @@ static void virtio_balloon_instance_init(Object *obj)
object_property_add(obj, "guest-stats", "guest statistics",
balloon_stats_get_all, NULL, NULL, s, NULL);
+ object_property_add(obj, "guest-page-hinting", "guest page hinting",
+ NULL, NULL, NULL, s, NULL);
object_property_add(obj, "guest-stats-polling-interval", "int",
balloon_stats_get_poll_interval,
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 311929e..bd45135 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -825,6 +825,29 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu
return elem;
}
+void pop_hinting_addr(VirtQueue *vq, uint64_t *addr, uint32_t *len)
+{
+ VRingMemoryRegionCaches *caches;
+ VRingDesc desc;
+ MemoryRegionCache *desc_cache;
+ VirtIODevice *vdev = vq->vdev;
+ unsigned int head, max;
+
+ max = vq->vring.num;
+ if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
+ printf("\n%d:%sError: Unable to read head\n", __LINE__, __func__);
+ }
+
+ caches = vring_get_region_caches(vq);
+ if (caches->desc.len < max * sizeof(VRingDesc)) {
+ virtio_error(vdev, "Cannot map descriptor ring");
+ }
+ desc_cache = &caches->desc;
+ vring_desc_read(vdev, &desc, desc_cache, head);
+ *addr = desc.addr;
+ *len = desc.len;
+}
+
void *virtqueue_pop(VirtQueue *vq, size_t sz)
{
unsigned int i, head, max;
diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
index 2e92074..568d71f 100644
--- a/include/hw/virtio/virtio-access.h
+++ b/include/hw/virtio/virtio-access.h
@@ -24,6 +24,7 @@
#define LEGACY_VIRTIO_IS_BIENDIAN 1
#endif
+void pop_hinting_addr(VirtQueue *vq, uint64_t *addr, uint32_t *len);
static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
{
#if defined(LEGACY_VIRTIO_IS_BIENDIAN)
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 1ea13bd..dfb5782 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -33,7 +33,7 @@ typedef struct virtio_balloon_stat_modern {
typedef struct VirtIOBalloon {
VirtIODevice parent_obj;
- VirtQueue *ivq, *dvq, *svq;
+ VirtQueue *ivq, *dvq, *svq, *hvq;
uint32_t num_pages;
uint32_t actual;
uint64_t stats[VIRTIO_BALLOON_S_NR];
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 9dd318a..033d64c 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -278,6 +278,11 @@ void qemu_anon_ram_free(void *ptr, size_t size);
#else
#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
#endif
+#ifdef MADV_FREE
+#define QEMU_MADV_FREE MADV_FREE
+#else
+#define QEMU_MADV_FREE QEMU_MAD_INVALID
+#endif
#elif defined(CONFIG_POSIX_MADVISE)
@@ -291,6 +296,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_FREE QEMU_MAD_INVALID
#else /* no-op */
@@ -304,6 +310,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_FREE QEMU_MAD_INVALID
#endif
diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h
index 9d06ccd..41766e1 100644
--- a/include/standard-headers/linux/virtio_balloon.h
+++ b/include/standard-headers/linux/virtio_balloon.h
@@ -34,6 +34,7 @@
#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */
+#define VIRTIO_GUEST_PAGE_HINTING_VQ 3 /* Page hinting virtqueue */
/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12
--
2.9.4
next prev parent reply other threads:[~2017-11-28 20:04 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-28 20:03 [Patch v5 0/7] KVM: Guest page hinting nilal
2017-11-28 20:03 ` [Patch v5 1/7] KVM: Support for guest " nilal
2017-11-28 20:03 ` [Patch v5 2/7] KVM: Guest page hinting functionality nilal
2017-11-28 20:03 ` [Patch v5 3/7] KVM: Adding tracepoints for guest page hinting nilal
2017-11-28 20:03 ` [Patch v5 4/7] virtio: Exposes added descriptor to the other side synchronously nilal
2017-11-28 20:03 ` [Patch v5 5/7] KVM: Sending hyperlist to the host via hinting_vq nilal
2017-11-29 13:01 ` kbuild test robot
2017-11-29 13:42 ` kbuild test robot
2017-11-28 20:03 ` [Patch v5 6/7] KVM: Enabling guest page hinting via static key nilal
2017-11-29 13:01 ` kbuild test robot
2017-11-29 15:45 ` kbuild test robot
2017-11-28 20:03 ` [Patch v5 7/7] KVM: Disabling page poisoning to avoid memory corruption errors nilal
2017-11-28 20:04 ` nilal [this message]
-- strict thread matches above, loose matches on Subject: below --
2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
2017-12-01 17:32 ` [QEMU PATCH] kvm: Support for guest " nilal
2017-11-03 20:30 [Patch v4 0/6] KVM: Guest " nilal
2017-11-03 20:37 ` [QEMU PATCH] kvm: Support for guest " nilal
2017-11-03 20:37 ` nilal
2017-11-06 11:21 ` Pankaj Gupta
2017-11-06 14:21 ` Nitesh Narayan Lal
2017-10-10 18:39 [Patch v3 0/5] KVM: Guest " nilal
2017-10-10 18:41 ` [QEMU PATCH] kvm: Support for guest " nilal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171128200433.5443-1-nilal@redhat.com \
--to=nilal@redhat.com \
--cc=david@redhat.com \
--cc=dodgen@google.com \
--cc=konrad.wilk@oracle.com \
--cc=kvm@vger.kernel.org \
--cc=mst@redhat.com \
--cc=pagupta@redhat.com \
--cc=pbonzini@redhat.com \
--cc=riel@redhat.com \
--cc=wei.w.wang@intel.com \
--cc=yang.zhang.wz@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.