[Patch v6 0/7] KVM: Guest page hinting

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [Patch v6 0/7] KVM: Guest page hinting
@ 2017-12-01 17:31 nilal
  2017-12-01 17:31 ` [Patch v6 1/7] KVM: Support for guest " nilal
                   ` (8 more replies)
  0 siblings, 9 replies; 10+ messages in thread
From: nilal @ 2017-12-01 17:31 UTC (permalink / raw)
  To: kvm, pbonzini, pagupta, wei.w.wang, yang.zhang.wz, riel, david,
	mst, dodgen, konrad.wilk

Changelog in v6:
	-Moved CONFIG_KVM_FREE_PAGE_HINTING from virt/kvm/Kconfig to arch/x86/kvm/Kconfig in order to resolve conflicts with other architectures:
		*s390 has its own page hinting functionality
		*powerpc has its own arch_free_page implementation
	-Fixed the typo in virtio_balloon structure which caused compilation issues when guest page hinting was disabled
	-Separated init_vqs implementation with and without hinting_vq enablement based on the CONFIG_KVM_FREE_PAGE_HINTING ifdef

Virtio interface changes are picked up from Wei's patch-set for Virtio-balloon enhancement[3]. "Wei, How would you like me to credit you in the final patch?")

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Patch v6 1/7] KVM: Support for guest page hinting
  2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
@ 2017-12-01 17:31 ` nilal
  2017-12-01 17:31 ` [Patch v6 2/7] KVM: Guest page hinting functionality nilal
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: nilal @ 2017-12-01 17:31 UTC (permalink / raw)
  To: kvm, pbonzini, pagupta, wei.w.wang, yang.zhang.wz, riel, david,
	mst, dodgen, konrad.wilk

From: Nitesh Narayan Lal <nilal@redhat.com>

This patch includes the following:
1. Basic skeleton for the support
2. Enablement of x86 platform to use the same

Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
---
 arch/x86/Kbuild         |  2 +-
 arch/x86/kvm/Kconfig    |  6 ++++++
 arch/x86/kvm/Makefile   |  2 ++
 include/linux/gfp.h     |  7 +++++++
 virt/kvm/page_hinting.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 virt/kvm/page_hinting.c

diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 0038a2d..7d39d7d 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -2,7 +2,7 @@ obj-y += entry/
 
 obj-$(CONFIG_PERF_EVENTS) += events/
 
-obj-$(CONFIG_KVM) += kvm/
+obj-$(subst m,y,$(CONFIG_KVM)) += kvm/
 
 # Xen paravirtualization support
 obj-$(CONFIG_XEN) += xen/
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3df51c2..a282713 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -88,6 +88,12 @@ config KVM_MMU_AUDIT
 	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
 	 auditing of KVM MMU events at runtime.
 
+config KVM_FREE_PAGE_HINTING
+       def_bool y
+       depends on KVM
+       select VIRTIO
+       select VIRTIO_BALLOON
+
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/vhost/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index dc4f2fd..866dd56 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -16,6 +16,8 @@ kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
 			   hyperv.o page_track.o debugfs.o
 
+obj-$(CONFIG_KVM_FREE_PAGE_HINTING)	+= $(KVM)/page_hinting.o
+
 kvm-intel-y		+= vmx.o pmu_intel.o
 kvm-amd-y		+= svm.o pmu_amd.o
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b..e02369b 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -439,6 +439,13 @@ static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
 	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
 }
 
+#ifdef	CONFIG_KVM_FREE_PAGE_HINTING
+#define HAVE_ARCH_ALLOC_PAGE
+#define HAVE_ARCH_FREE_PAGE
+void arch_free_page(struct page *page, int order);
+void arch_alloc_page(struct page *page, int order);
+#endif
+
 #ifndef HAVE_ARCH_FREE_PAGE
 static inline void arch_free_page(struct page *page, int order) { }
 #endif
diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
new file mode 100644
index 0000000..39d2b1d
--- /dev/null
+++ b/virt/kvm/page_hinting.c
@@ -0,0 +1,46 @@
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/page_ref.h>
+#include <linux/kvm_host.h>
+#include <linux/sort.h>
+
+#include <trace/events/kvm.h>
+
+#define MAX_FGPT_ENTRIES	1000
+#define HYPERLIST_THRESHOLD	500
+/*
+ * struct kvm_free_pages - Tracks the pages which are freed by the guest.
+ * @pfn	- page frame number for the page which is to be freed
+ * @pages - number of pages which are supposed to be freed.
+ * A global array object is used to hold the list of pfn and number of pages
+ * which are freed by the guest. This list may also have fragmentated pages so
+ * defragmentation is a must prior to the hypercall.
+ */
+struct kvm_free_pages {
+	unsigned long pfn;
+	unsigned int pages;
+};
+
+/*
+ * hypervisor_pages - It is a dummy structure passed with the hypercall.
+ * @pfn - page frame number for the page which is to be freed.
+ * @pages - number of pages which are supposed to be freed.
+ * A global array object is used to to hold the list of pfn and pages and is
+ * passed as part of the hypercall.
+ */
+struct hypervisor_pages {
+	unsigned long pfn;
+	unsigned int pages;
+};
+
+DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
+DEFINE_PER_CPU(int, kvm_pt_idx);
+struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
+
+void arch_alloc_page(struct page *page, int order)
+{
+}
+
+void arch_free_page(struct page *page, int order)
+{
+}
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Patch v6 2/7] KVM: Guest page hinting functionality
  2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
  2017-12-01 17:31 ` [Patch v6 1/7] KVM: Support for guest " nilal
@ 2017-12-01 17:31 ` nilal
  2017-12-01 17:31 ` [Patch v6 3/7] KVM: Adding tracepoints for guest page hinting nilal
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: nilal @ 2017-12-01 17:31 UTC (permalink / raw)
  To: kvm, pbonzini, pagupta, wei.w.wang, yang.zhang.wz, riel, david,
	mst, dodgen, konrad.wilk

From: Nitesh Narayan Lal <niteshnarayanlalleo@gmail.com>

This patch adds the guest implementation in order to maintain the list of
pages which are freed by the guest and are not reused. To avoid any
reallocation it includes seqlock once the list is completely filled.
Though it doesn't carries the hypercall related changes.

Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
---
 virt/kvm/page_hinting.c | 245 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 243 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
index 39d2b1d..658856d 100644
--- a/virt/kvm/page_hinting.c
+++ b/virt/kvm/page_hinting.c
@@ -3,8 +3,7 @@
 #include <linux/page_ref.h>
 #include <linux/kvm_host.h>
 #include <linux/sort.h>
-
-#include <trace/events/kvm.h>
+#include <linux/kernel.h>
 
 #define MAX_FGPT_ENTRIES	1000
 #define HYPERLIST_THRESHOLD	500
@@ -33,14 +32,256 @@ struct hypervisor_pages {
 	unsigned int pages;
 };
 
+static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock);
 DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
 DEFINE_PER_CPU(int, kvm_pt_idx);
 struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
 
+static void empty_hyperlist(void)
+{
+	int i = 0;
+
+	while (i < MAX_FGPT_ENTRIES) {
+		hypervisor_pagelist[i].pfn = 0;
+		hypervisor_pagelist[i].pages = 0;
+		i++;
+	}
+}
+
+static void make_hypercall(void)
+{
+	/*
+	 * Dummy function: Tobe filled later.
+	 */
+	empty_hyperlist();
+}
+
+static int sort_pfn(const void *a1, const void *b1)
+{
+	const struct hypervisor_pages *a = a1;
+	const struct hypervisor_pages *b = b1;
+
+	if (a->pfn > b->pfn)
+		return 1;
+
+	if (a->pfn < b->pfn)
+		return -1;
+
+	return 0;
+}
+
+static int pack_hyperlist(void)
+{
+	int i = 0, j = 0;
+
+	while (i < MAX_FGPT_ENTRIES) {
+		if (hypervisor_pagelist[i].pfn != 0) {
+			if (i != j) {
+				hypervisor_pagelist[j].pfn =
+						hypervisor_pagelist[i].pfn;
+				hypervisor_pagelist[j].pages =
+						hypervisor_pagelist[i].pages;
+			}
+			j++;
+		}
+		i++;
+	}
+	i = j;
+	while (j < MAX_FGPT_ENTRIES) {
+		hypervisor_pagelist[j].pfn = 0;
+		hypervisor_pagelist[j].pages = 0;
+		j++;
+	}
+	return i;
+}
+
+int compress_hyperlist(void)
+{
+	int i = 0, j = 1, merge_counter = 0, ret = 0;
+
+	sort(hypervisor_pagelist, MAX_FGPT_ENTRIES,
+	     sizeof(struct hypervisor_pages), sort_pfn, NULL);
+	while (i < MAX_FGPT_ENTRIES && j < MAX_FGPT_ENTRIES) {
+		unsigned long pfni = hypervisor_pagelist[i].pfn;
+		unsigned int pagesi = hypervisor_pagelist[i].pages;
+		unsigned long pfnj = hypervisor_pagelist[j].pfn;
+		unsigned int pagesj = hypervisor_pagelist[j].pages;
+
+		if (pfnj <= pfni) {
+			if (((pfnj + pagesj - 1) <= (pfni + pagesi - 1)) &&
+			    ((pfnj + pagesj - 1) >= (pfni - 1))) {
+				hypervisor_pagelist[i].pfn = pfnj;
+				hypervisor_pagelist[i].pages += pfni - pfnj;
+				hypervisor_pagelist[j].pfn = 0;
+				hypervisor_pagelist[j].pages = 0;
+				j++;
+				merge_counter++;
+				continue;
+			} else if ((pfnj + pagesj - 1) > (pfni + pagesi - 1)) {
+				hypervisor_pagelist[i].pfn = pfnj;
+				hypervisor_pagelist[i].pages = pagesj;
+				hypervisor_pagelist[j].pfn = 0;
+				hypervisor_pagelist[j].pages = 0;
+				j++;
+				merge_counter++;
+				continue;
+			}
+		} else if (pfnj > pfni) {
+			if ((pfnj + pagesj - 1) > (pfni + pagesi - 1) &&
+			    (pfnj <= pfni + pagesi)) {
+				hypervisor_pagelist[i].pages +=
+						(pfnj + pagesj - 1) -
+						(pfni + pagesi - 1);
+				hypervisor_pagelist[j].pfn = 0;
+				hypervisor_pagelist[j].pages = 0;
+				j++;
+				merge_counter++;
+				continue;
+			} else if ((pfnj + pagesj - 1) <= (pfni + pagesi - 1)) {
+				hypervisor_pagelist[j].pfn = 0;
+				hypervisor_pagelist[j].pages = 0;
+				j++;
+				merge_counter++;
+				continue;
+			}
+		}
+		i = j;
+		j++;
+	}
+	if (merge_counter != 0)
+		ret = pack_hyperlist() - 1;
+	else
+		ret = MAX_FGPT_ENTRIES - 1;
+	return ret;
+}
+
+void copy_hyperlist(int hyper_idx)
+{
+	int *idx = &get_cpu_var(kvm_pt_idx);
+	struct kvm_free_pages *free_page_obj;
+	int i = 0;
+
+	free_page_obj = &get_cpu_var(kvm_pt)[0];
+	while (i < hyper_idx) {
+		free_page_obj[*idx].pfn = hypervisor_pagelist[i].pfn;
+		free_page_obj[*idx].pages = hypervisor_pagelist[i].pages;
+		*idx += 1;
+		i++;
+	}
+	empty_hyperlist();
+	put_cpu_var(kvm_pt);
+	put_cpu_var(kvm_pt_idx);
+}
+
+/*
+ * arch_free_page_slowpath() - This function adds the guest free page entries
+ * to hypervisor_pages list and also ensures defragmentation prior to addition
+ * if it is present with any entry of the kvm_free_pages list.
+ */
+void arch_free_page_slowpath(void)
+{
+	int idx = 0;
+	int hyper_idx = -1;
+	int *kvm_idx = &get_cpu_var(kvm_pt_idx);
+	struct kvm_free_pages *free_page_obj = &get_cpu_var(kvm_pt)[0];
+
+	write_seqlock(&guest_page_lock);
+	while (idx < MAX_FGPT_ENTRIES) {
+		unsigned long pfn = free_page_obj[idx].pfn;
+		unsigned long pfn_end = free_page_obj[idx].pfn +
+					free_page_obj[idx].pages - 1;
+		bool prev_free = false;
+
+		while (pfn <= pfn_end) {
+			struct page *p = pfn_to_page(pfn);
+
+			if (PageCompound(p)) {
+				struct page *head_page = compound_head(p);
+				unsigned long head_pfn = page_to_pfn(head_page);
+				unsigned int alloc_pages =
+					1 << compound_order(head_page);
+
+				pfn = head_pfn + alloc_pages;
+				prev_free = false;
+				continue;
+			}
+			if (page_ref_count(p)) {
+				pfn++;
+				prev_free = false;
+				continue;
+			}
+			/*
+			 * The page is free so add it to the list and free the
+			 * hypervisor_pagelist if required.
+			 */
+			if (!prev_free) {
+				hyper_idx++;
+				hypervisor_pagelist[hyper_idx].pfn = pfn;
+				hypervisor_pagelist[hyper_idx].pages = 1;
+				if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
+					hyper_idx =  compress_hyperlist();
+					if (hyper_idx >=
+					    HYPERLIST_THRESHOLD) {
+						make_hypercall();
+						hyper_idx = 0;
+					}
+				}
+				/*
+				 * If the next contiguous page is free, it can
+				 * be added to this same entry.
+				 */
+				prev_free = true;
+			} else {
+				/*
+				 * Multiple adjacent free pages
+				 */
+				hypervisor_pagelist[hyper_idx].pages++;
+			}
+			pfn++;
+		}
+		free_page_obj[idx].pfn = 0;
+		free_page_obj[idx].pages = 0;
+		idx++;
+	}
+	*kvm_idx = 0;
+	put_cpu_var(kvm_pt);
+	put_cpu_var(kvm_pt_idx);
+	write_sequnlock(&guest_page_lock);
+}
+
 void arch_alloc_page(struct page *page, int order)
 {
+	unsigned int seq;
+
+	/*
+	 * arch_free_page will acquire the lock once the list carrying guest
+	 * free pages is full and a hypercall will be made. Until complete free
+	 * page list is traversed no further allocaiton will be allowed.
+	 */
+	do {
+		seq = read_seqbegin(&guest_page_lock);
+	} while (read_seqretry(&guest_page_lock, seq));
 }
 
 void arch_free_page(struct page *page, int order)
 {
+	int *free_page_idx = &get_cpu_var(kvm_pt_idx);
+	struct kvm_free_pages *free_page_obj;
+	unsigned long flags;
+
+	/*
+	 * use of global variables may trigger a race condition between irq and
+	 * process context causing unwanted overwrites. This will be replaced
+	 * with a better solution to prevent such race conditions.
+	 */
+	local_irq_save(flags);
+	free_page_obj = &get_cpu_var(kvm_pt)[0];
+	free_page_obj[*free_page_idx].pfn = page_to_pfn(page);
+	free_page_obj[*free_page_idx].pages = 1 << order;
+	*free_page_idx += 1;
+	if (*free_page_idx == MAX_FGPT_ENTRIES)
+		arch_free_page_slowpath();
+	put_cpu_var(kvm_pt);
+	put_cpu_var(kvm_pt_idx);
+	local_irq_restore(flags);
 }
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Patch v6 3/7] KVM: Adding tracepoints for guest page hinting
  2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
  2017-12-01 17:31 ` [Patch v6 1/7] KVM: Support for guest " nilal
  2017-12-01 17:31 ` [Patch v6 2/7] KVM: Guest page hinting functionality nilal
@ 2017-12-01 17:31 ` nilal
  2017-12-01 17:31 ` [Patch v6 4/7] virtio: Exposes added descriptor to the other side synchronously nilal
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: nilal @ 2017-12-01 17:31 UTC (permalink / raw)
  To: kvm, pbonzini, pagupta, wei.w.wang, yang.zhang.wz, riel, david,
	mst, dodgen, konrad.wilk

From: Nitesh Narayan Lal <nilal@redhat.com>

Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
---
 include/trace/events/kmem.h | 101 ++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/page_hinting.c     |  20 ++++++++-
 2 files changed, 119 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index eb57e30..c9c5e6c 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -315,6 +315,107 @@ TRACE_EVENT(mm_page_alloc_extfrag,
 		__entry->change_ownership)
 );
 
+TRACE_EVENT(guest_free_page,
+	    TP_PROTO(struct page *page, unsigned int order),
+
+	TP_ARGS(page, order),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, pfn)
+		__field(unsigned int, order)
+	),
+
+	TP_fast_assign(
+		__entry->pfn            = page_to_pfn(page);
+		__entry->order          = order;
+	),
+
+	TP_printk("page=%p pfn=%lu number of pages=%d",
+		  pfn_to_page(__entry->pfn),
+		  __entry->pfn,
+		  (1 << __entry->order))
+);
+
+TRACE_EVENT(guest_alloc_page,
+	    TP_PROTO(struct page *page, unsigned int order),
+
+	TP_ARGS(page, order),
+
+	TP_STRUCT__entry(
+		__field(unsigned long,  pfn)
+		__field(unsigned int,   order)
+	),
+
+	TP_fast_assign(
+		__entry->pfn            = page_to_pfn(page);
+		__entry->order          = order;
+		),
+
+	TP_printk("page=%p pfn=%lu number of pages=%d",
+		  pfn_to_page(__entry->pfn),
+		  __entry->pfn,
+		  (1 << __entry->order))
+);
+
+TRACE_EVENT(guest_free_page_slowpath,
+	    TP_PROTO(unsigned long pfn, unsigned int pages),
+
+	TP_ARGS(pfn, pages),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, pfn)
+		__field(unsigned int, pages)
+	),
+
+	TP_fast_assign(
+		__entry->pfn            = pfn;
+		__entry->pages          = pages;
+	),
+
+	TP_printk("pfn=%lu number of pages=%u",
+		  __entry->pfn,
+		  __entry->pages)
+);
+
+TRACE_EVENT(guest_pfn_dump,
+	    TP_PROTO(char *type, unsigned long pfn, unsigned int pages),
+
+	TP_ARGS(type, pfn, pages),
+
+	TP_STRUCT__entry(
+		__field(char *, type)
+		__field(unsigned long, pfn)
+		__field(unsigned int, pages)
+	),
+
+	TP_fast_assign(
+		__entry->type		= type;
+		__entry->pfn            = pfn;
+		__entry->pages          = pages;
+		),
+
+	TP_printk("Type=%s pfn=%lu number of pages=%d",
+		  __entry->type,
+		  __entry->pfn,
+		  __entry->pages)
+);
+
+TRACE_EVENT(guest_str_dump,
+	    TP_PROTO(char *str),
+
+	TP_ARGS(str),
+
+	TP_STRUCT__entry(
+		__field(char *, str)
+	),
+
+	TP_fast_assign(
+		__entry->str		= str;
+		),
+
+	TP_printk("Debug=%s",
+		  __entry->str)
+);
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
index 658856d..54fe6bc 100644
--- a/virt/kvm/page_hinting.c
+++ b/virt/kvm/page_hinting.c
@@ -4,6 +4,7 @@
 #include <linux/kvm_host.h>
 #include <linux/sort.h>
 #include <linux/kernel.h>
+#include <trace/events/kmem.h>
 
 #define MAX_FGPT_ENTRIES	1000
 #define HYPERLIST_THRESHOLD	500
@@ -48,12 +49,13 @@ static void empty_hyperlist(void)
 	}
 }
 
-static void make_hypercall(void)
+void make_hypercall(void)
 {
 	/*
 	 * Dummy function: Tobe filled later.
 	 */
 	empty_hyperlist();
+	trace_guest_str_dump("Hypercall to host...:");
 }
 
 static int sort_pfn(const void *a1, const void *b1)
@@ -70,13 +72,16 @@ static int sort_pfn(const void *a1, const void *b1)
 	return 0;
 }
 
-static int pack_hyperlist(void)
+int pack_hyperlist(void)
 {
 	int i = 0, j = 0;
 
 	while (i < MAX_FGPT_ENTRIES) {
 		if (hypervisor_pagelist[i].pfn != 0) {
 			if (i != j) {
+				trace_guest_pfn_dump("Packing Hyperlist",
+						     hypervisor_pagelist[i].pfn,
+						hypervisor_pagelist[i].pages);
 				hypervisor_pagelist[j].pfn =
 						hypervisor_pagelist[i].pfn;
 				hypervisor_pagelist[j].pages =
@@ -163,6 +168,9 @@ void copy_hyperlist(int hyper_idx)
 
 	free_page_obj = &get_cpu_var(kvm_pt)[0];
 	while (i < hyper_idx) {
+		trace_guest_pfn_dump("HyperList entry copied",
+				     hypervisor_pagelist[i].pfn,
+				     hypervisor_pagelist[i].pages);
 		free_page_obj[*idx].pfn = hypervisor_pagelist[i].pfn;
 		free_page_obj[*idx].pages = hypervisor_pagelist[i].pages;
 		*idx += 1;
@@ -203,11 +211,14 @@ void arch_free_page_slowpath(void)
 
 				pfn = head_pfn + alloc_pages;
 				prev_free = false;
+				trace_guest_pfn_dump("Compound",
+						     head_pfn, alloc_pages);
 				continue;
 			}
 			if (page_ref_count(p)) {
 				pfn++;
 				prev_free = false;
+				trace_guest_pfn_dump("Single", pfn, 1);
 				continue;
 			}
 			/*
@@ -218,6 +229,9 @@ void arch_free_page_slowpath(void)
 				hyper_idx++;
 				hypervisor_pagelist[hyper_idx].pfn = pfn;
 				hypervisor_pagelist[hyper_idx].pages = 1;
+				trace_guest_free_page_slowpath(
+				hypervisor_pagelist[hyper_idx].pfn,
+				hypervisor_pagelist[hyper_idx].pages);
 				if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
 					hyper_idx =  compress_hyperlist();
 					if (hyper_idx >=
@@ -261,6 +275,7 @@ void arch_alloc_page(struct page *page, int order)
 	do {
 		seq = read_seqbegin(&guest_page_lock);
 	} while (read_seqretry(&guest_page_lock, seq));
+	trace_guest_alloc_page(page, order);
 }
 
 void arch_free_page(struct page *page, int order)
@@ -276,6 +291,7 @@ void arch_free_page(struct page *page, int order)
 	 */
 	local_irq_save(flags);
 	free_page_obj = &get_cpu_var(kvm_pt)[0];
+	trace_guest_free_page(page, order);
 	free_page_obj[*free_page_idx].pfn = page_to_pfn(page);
 	free_page_obj[*free_page_idx].pages = 1 << order;
 	*free_page_idx += 1;
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Patch v6 4/7] virtio: Exposes added descriptor to the other side synchronously
  2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
                   ` (2 preceding siblings ...)
  2017-12-01 17:31 ` [Patch v6 3/7] KVM: Adding tracepoints for guest page hinting nilal
@ 2017-12-01 17:31 ` nilal
  2017-12-01 17:31 ` [Patch v6 5/7] KVM: Sending hyperlist to the host via hinting_vq nilal
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: nilal @ 2017-12-01 17:31 UTC (permalink / raw)
  To: kvm, pbonzini, pagupta, wei.w.wang, yang.zhang.wz, riel, david,
	mst, dodgen, konrad.wilk

From: Nitesh Narayan Lal <nilal@redhat.com>

This patch enables the driver to expose a chain of buffers to the
other end using vring descriptor followed by a kick. After which it
busy waits till the update is done.

Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
---
 drivers/virtio/virtio_ring.c | 157 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/virtio.h       |  19 ++++++
 2 files changed, 175 insertions(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index eb30f3e..651ce8f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -438,6 +438,136 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 }
 
 /**
+ * virtqueue_add_chain - expose a chain of buffers to the other end
+ * @_vq: the struct virtqueue we're talking about.
+ * @head: desc id of the chain head.
+ * @indirect: set if the chain of descs are indrect descs.
+ * @indir_desc: the first indirect desc.
+ * @data: the token identifying the chain.
+ * @ctx: extra context for the token.
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_chain(struct virtqueue *_vq,
+			unsigned int head,
+			bool indirect,
+			struct vring_desc *indir_desc,
+			void *data,
+			void *ctx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	/* The desc chain is empty. */
+	if (head == VIRTQUEUE_DESC_ID_INIT)
+		return 0;
+
+	START_USE(vq);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return -EIO;
+	}
+
+	/* This is the data for callback, in our case may not be required. */
+	vq->desc_state[head].data = data;
+	if (indirect)
+		vq->desc_state[head].indir_desc = indir_desc;
+	if (ctx)
+		vq->desc_state[head].indir_desc = ctx;
+
+	vq->avail_idx_shadow = 1;
+	vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
+	vq->num_added = 1;
+	END_USE(vq);
+	virtqueue_kick_sync(_vq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_chain);
+
+/**
+ * virtqueue_add_chain_desc - add a buffer to a chain using a vring desc
+ * @vq: the struct virtqueue we're talking about.
+ * @addr: address of the buffer to add.
+ * @len: length of the buffer.
+ * @head_id: desc id of the chain head.
+ * @prev_id: desc id of the previous buffer.
+ * @in: set if the buffer is for the device to write.
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_chain_desc(struct virtqueue *_vq,
+			     u64 addr,
+			     u32 len,
+			     unsigned int *head_id,
+			     unsigned int *prev_id,
+			     bool in)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_desc *desc = vq->vring.desc;
+	u16 flags = in ? VRING_DESC_F_WRITE : 0;
+	unsigned int i;
+
+	/* Sanity check */
+	if (!_vq || !head_id || !prev_id)
+		return -EINVAL;
+retry:
+	START_USE(vq);
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return -EIO;
+	}
+
+	if (vq->vq.num_free < 1) {
+		/*
+		 * If there is no desc avail in the vq, so kick what is
+		 * already added, and re-start to build a new chain for
+		 * the passed sg.
+		 */
+		if (likely(*head_id != VIRTQUEUE_DESC_ID_INIT)) {
+			END_USE(vq);
+			virtqueue_add_chain(_vq, *head_id, 0, NULL, vq, NULL);
+			virtqueue_kick_sync(_vq);
+			*head_id = VIRTQUEUE_DESC_ID_INIT;
+			*prev_id = VIRTQUEUE_DESC_ID_INIT;
+			goto retry;
+		} else {
+			END_USE(vq);
+			return -ENOSPC;
+		}
+	}
+
+	i = vq->free_head;
+	flags &= ~VRING_DESC_F_NEXT;
+	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
+	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
+	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
+
+	/* Add the desc to the end of the chain */
+	if (*prev_id != VIRTQUEUE_DESC_ID_INIT) {
+		desc[*prev_id].next = cpu_to_virtio16(_vq->vdev, i);
+		desc[*prev_id].flags |= cpu_to_virtio16(_vq->vdev,
+							VRING_DESC_F_NEXT);
+	}
+	*prev_id = i;
+	if (*head_id == VIRTQUEUE_DESC_ID_INIT)
+		*head_id = *prev_id;
+
+	vq->vq.num_free--;
+	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
+	END_USE(vq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_chain_desc);
+
+/**
  * virtqueue_add_sgs - expose buffers to other end
  * @vq: the struct virtqueue we're talking about.
  * @sgs: array of terminated scatterlists.
@@ -559,7 +689,6 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
 	START_USE(vq);
 	/* We need to expose available array entries before checking avail
 	 * event. */
-	virtio_mb(vq->weak_barriers);
 
 	old = vq->avail_idx_shadow - vq->num_added;
 	new = vq->avail_idx_shadow;
@@ -609,6 +738,32 @@ bool virtqueue_notify(struct virtqueue *_vq)
 EXPORT_SYMBOL_GPL(virtqueue_notify);
 
 /**
+ * virtqueue_kick_sync - update after add_buf and busy wait till update is done
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_* calls, invoke this to kick
+ * the other side. Busy wait till the other side is done with the update.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Returns false if kick failed, otherwise true.
+ */
+bool virtqueue_kick_sync(struct virtqueue *vq)
+{
+	u32 len;
+
+	if (likely(virtqueue_kick(vq))) {
+		while (!virtqueue_get_buf(vq, &len) &&
+		       !virtqueue_is_broken(vq))
+			cpu_relax();
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
+
+/**
  * virtqueue_kick - update after add_buf
  * @vq: the struct virtqueue
  *
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 988c735..58f0a2c 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -57,6 +57,25 @@ int virtqueue_add_sgs(struct virtqueue *vq,
 		      unsigned int in_sgs,
 		      void *data,
 		      gfp_t gfp);
+/* A desc with this init id is treated as an invalid desc */
+#define VIRTQUEUE_DESC_ID_INIT UINT_MAX
+int virtqueue_add_chain_desc(struct virtqueue *_vq,
+			     u64 addr,
+			     u32 len,
+			     unsigned int *head_id,
+			     unsigned int *prev_id,
+			     bool in);
+
+int virtqueue_add_chain(struct virtqueue *_vq,
+			unsigned int head,
+			bool indirect,
+			struct vring_desc *indirect_desc,
+			void *data,
+			void *ctx);
+
+bool virtqueue_kick_sync(struct virtqueue *vq);
+
+bool virtqueue_kick_async(struct virtqueue *vq, wait_queue_head_t wq);
 
 bool virtqueue_kick(struct virtqueue *vq);
 
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Patch v6 5/7] KVM: Sending hyperlist to the host via hinting_vq
  2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
                   ` (3 preceding siblings ...)
  2017-12-01 17:31 ` [Patch v6 4/7] virtio: Exposes added descriptor to the other side synchronously nilal
@ 2017-12-01 17:31 ` nilal
  2017-12-01 17:31 ` [Patch v6 6/7] KVM: Enabling guest page hinting via static key nilal
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: nilal @ 2017-12-01 17:31 UTC (permalink / raw)
  To: kvm, pbonzini, pagupta, wei.w.wang, yang.zhang.wz, riel, david,
	mst, dodgen, konrad.wilk

From: Nitesh Narayan Lal <nilal@redhat.com>

This patch creates a new vq (hinting_vq) to be used for page hinting
and adds support in the existing virtio balloon infrastructure so
that the hyper list carrying pages which are supposed to be freed
could be sent to the host (QEMU) for processing by using hinting_vq.

Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
---
 drivers/virtio/virtio_balloon.c     | 99 ++++++++++++++++++++++++++++++++++++-
 include/linux/page_hinting.h        | 16 ++++++
 include/uapi/linux/virtio_balloon.h |  1 +
 virt/kvm/page_hinting.c             | 36 ++++++--------
 4 files changed, 129 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/page_hinting.h

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7960746..7aef032 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -32,6 +32,7 @@
 #include <linux/mm.h>
 #include <linux/mount.h>
 #include <linux/magic.h>
+#include <linux/page_hinting.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -53,8 +54,11 @@ static struct vfsmount *balloon_mnt;
 
 struct virtio_balloon {
 	struct virtio_device *vdev;
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *hinting_vq;
+#else
 	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
-
+#endif
 	/* The balloon servicing is delegated to a freezable workqueue. */
 	struct work_struct update_balloon_stats_work;
 	struct work_struct update_balloon_size_work;
@@ -95,6 +99,33 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq,
+			       u64 gvaddr, int len)
+{
+	unsigned int id = VIRTQUEUE_DESC_ID_INIT;
+	u64 gpaddr = virt_to_phys((void *)gvaddr);
+
+	virtqueue_add_chain_desc(vq, gpaddr, len, &id, &id, 0);
+	virtqueue_add_chain(vq, id, 0, NULL, (void *)gpaddr, NULL);
+}
+
+void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries)
+{
+	u64 gvaddr = (u64)hypervisor_pagelist;
+
+	vb->num_pfns = hyper_entries;
+	tell_host_one_page(vb, vb->hinting_vq, gvaddr, hyper_entries);
+}
+
+static void hinting_ack(struct virtqueue *vq)
+{
+	struct virtio_balloon *vb = vq->vdev->priv;
+
+	wake_up(&vb->acked);
+}
+#endif
+
 static u32 page_to_balloon_pfn(struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page);
@@ -416,6 +447,62 @@ static void update_balloon_size_func(struct work_struct *work)
 		queue_work(system_freezable_wq, work);
 }
 
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+static int init_vqs(struct virtio_balloon *vb)
+{
+	struct virtqueue *vqs[4];
+	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request,
+				       hinting_ack };
+	static const char * const names[] = { "inflate", "deflate", "stats",
+					      "hinting" };
+	int err, nvqs;
+	bool stats_vq_support, page_hinting_support;
+
+	/*
+	 * We expect two virtqueues: inflate and deflate, and
+	 * optionally stat and hinting.
+	 */
+	stats_vq_support = virtio_has_feature(vb->vdev,
+					      VIRTIO_BALLOON_F_STATS_VQ);
+	page_hinting_support = virtio_has_feature(vb->vdev,
+						  VIRTIO_GUEST_PAGE_HINTING_VQ
+						  );
+	if (stats_vq_support && page_hinting_support)
+		nvqs = 4;
+	else if (stats_vq_support || page_hinting_support)
+		nvqs = 3;
+	else
+		nvqs = 2;
+
+	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
+	if (err)
+		return err;
+
+	vb->inflate_vq = vqs[0];
+	vb->deflate_vq = vqs[1];
+	if (page_hinting_support)
+		vb->hinting_vq = vqs[3];
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		struct scatterlist sg;
+		unsigned int num_stats;
+
+		vb->stats_vq = vqs[2];
+
+		/*
+		 * Prime this virtqueue with one buffer so the hypervisor can
+		 * use it to signal us later (it can't be broken yet!).
+		 */
+		num_stats = update_balloon_stats(vb);
+
+		sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
+		if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
+		    < 0)
+			BUG();
+		virtqueue_kick(vb->stats_vq);
+	}
+	return 0;
+}
+#else
 static int init_vqs(struct virtio_balloon *vb)
 {
 	struct virtqueue *vqs[3];
@@ -454,6 +541,8 @@ static int init_vqs(struct virtio_balloon *vb)
 	return 0;
 }
 
+#endif
+
 #ifdef CONFIG_BALLOON_COMPACTION
 /*
  * virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -595,6 +684,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
 	virtio_device_ready(vdev);
 
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+	if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ)) {
+		request_hypercall = (void *)&virtballoon_page_hinting;
+		balloon_ptr = vb;
+	}
+#endif
+
 	if (towards_target(vb))
 		virtballoon_changed(vdev);
 	return 0;
@@ -683,6 +779,7 @@ static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+	VIRTIO_GUEST_PAGE_HINTING_VQ,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
new file mode 100644
index 0000000..0bfb646
--- /dev/null
+++ b/include/linux/page_hinting.h
@@ -0,0 +1,16 @@
+#define MAX_FGPT_ENTRIES	1000
+/*
+ * hypervisor_pages - It is a dummy structure passed with the hypercall.
+ * @pfn - page frame number for the page which is to be freed.
+ * @pages - number of pages which are supposed to be freed.
+ * A global array object is used to to hold the list of pfn and pages and is
+ * passed as part of the hypercall.
+ */
+struct hypervisor_pages {
+	unsigned long pfn;
+	unsigned int pages;
+};
+
+extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
+extern void (*request_hypercall)(void *, int);
+extern void *balloon_ptr;
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 343d7dd..45bdcc8 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -34,6 +34,7 @@
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
+#define VIRTIO_GUEST_PAGE_HINTING_VQ	3 /* Page hinting virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
index 54fe6bc..22c892b 100644
--- a/virt/kvm/page_hinting.c
+++ b/virt/kvm/page_hinting.c
@@ -5,8 +5,8 @@
 #include <linux/sort.h>
 #include <linux/kernel.h>
 #include <trace/events/kmem.h>
+#include <linux/page_hinting.h>
 
-#define MAX_FGPT_ENTRIES	1000
 #define HYPERLIST_THRESHOLD	500
 /*
  * struct kvm_free_pages - Tracks the pages which are freed by the guest.
@@ -21,22 +21,15 @@ struct kvm_free_pages {
 	unsigned int pages;
 };
 
-/*
- * hypervisor_pages - It is a dummy structure passed with the hypercall.
- * @pfn - page frame number for the page which is to be freed.
- * @pages - number of pages which are supposed to be freed.
- * A global array object is used to to hold the list of pfn and pages and is
- * passed as part of the hypercall.
- */
-struct hypervisor_pages {
-	unsigned long pfn;
-	unsigned int pages;
-};
-
 static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock);
 DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
 DEFINE_PER_CPU(int, kvm_pt_idx);
 struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
+EXPORT_SYMBOL(hypervisor_pagelist);
+void (*request_hypercall)(void *, int);
+EXPORT_SYMBOL(request_hypercall);
+void *balloon_ptr;
+EXPORT_SYMBOL(balloon_ptr);
 
 static void empty_hyperlist(void)
 {
@@ -49,13 +42,11 @@ static void empty_hyperlist(void)
 	}
 }
 
-void make_hypercall(void)
+void hyperlist_ready(int entries)
 {
-	/*
-	 * Dummy function: Tobe filled later.
-	 */
-	empty_hyperlist();
 	trace_guest_str_dump("Hypercall to host...:");
+	request_hypercall(balloon_ptr, entries);
+	empty_hyperlist();
 }
 
 static int sort_pfn(const void *a1, const void *b1)
@@ -156,7 +147,7 @@ int compress_hyperlist(void)
 	if (merge_counter != 0)
 		ret = pack_hyperlist() - 1;
 	else
-		ret = MAX_FGPT_ENTRIES - 1;
+		ret = MAX_FGPT_ENTRIES;
 	return ret;
 }
 
@@ -227,16 +218,16 @@ void arch_free_page_slowpath(void)
 			 */
 			if (!prev_free) {
 				hyper_idx++;
-				hypervisor_pagelist[hyper_idx].pfn = pfn;
-				hypervisor_pagelist[hyper_idx].pages = 1;
 				trace_guest_free_page_slowpath(
 				hypervisor_pagelist[hyper_idx].pfn,
 				hypervisor_pagelist[hyper_idx].pages);
+				hypervisor_pagelist[hyper_idx].pfn = pfn;
+				hypervisor_pagelist[hyper_idx].pages = 1;
 				if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
 					hyper_idx =  compress_hyperlist();
 					if (hyper_idx >=
 					    HYPERLIST_THRESHOLD) {
-						make_hypercall();
+						hyperlist_ready(hyper_idx);
 						hyper_idx = 0;
 					}
 				}
@@ -272,6 +263,7 @@ void arch_alloc_page(struct page *page, int order)
 	 * free pages is full and a hypercall will be made. Until complete free
 	 * page list is traversed no further allocaiton will be allowed.
 	 */
+
 	do {
 		seq = read_seqbegin(&guest_page_lock);
 	} while (read_seqretry(&guest_page_lock, seq));
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Patch v6 6/7] KVM: Enabling guest page hinting via static key
  2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
                   ` (4 preceding siblings ...)
  2017-12-01 17:31 ` [Patch v6 5/7] KVM: Sending hyperlist to the host via hinting_vq nilal
@ 2017-12-01 17:31 ` nilal
  2017-12-01 17:31 ` [Patch v6 7/7] KVM: Disabling page poisoning to avoid memory corruption errors nilal
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: nilal @ 2017-12-01 17:31 UTC (permalink / raw)
  To: kvm, pbonzini, pagupta, wei.w.wang, yang.zhang.wz, riel, david,
	mst, dodgen, konrad.wilk

From: Nitesh Narayan Lal <nilal@redhat.com>

This patch enables the guest page hinting support
to enable or disable based on the STATIC key which
could be set via sysctl.

Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
---
 drivers/virtio/virtio_balloon.c | 16 +++++++++++-----
 include/linux/gfp.h             | 16 ++++++++++++++--
 include/linux/page_hinting.h    |  7 +++++++
 kernel/sysctl.c                 |  9 +++++++++
 virt/kvm/page_hinting.c         | 29 +++++++++++++++++++++++++----
 5 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7aef032..e6457de 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -32,7 +32,6 @@
 #include <linux/mm.h>
 #include <linux/mount.h>
 #include <linux/magic.h>
-#include <linux/page_hinting.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -124,6 +123,14 @@ static void hinting_ack(struct virtqueue *vq)
 
 	wake_up(&vb->acked);
 }
+
+static void enable_hinting(struct virtio_balloon *vb)
+{
+	guest_page_hinting_flag = 1;
+	static_branch_enable(&guest_page_hinting_key);
+	request_hypercall = (void *)&virtballoon_page_hinting;
+	balloon_ptr = vb;
+}
 #endif
 
 static u32 page_to_balloon_pfn(struct page *page)
@@ -524,6 +531,7 @@ static int init_vqs(struct virtio_balloon *vb)
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
 		struct scatterlist sg;
 		unsigned int num_stats;
+
 		vb->stats_vq = vqs[2];
 
 		/*
@@ -685,10 +693,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	virtio_device_ready(vdev);
 
 #ifdef CONFIG_KVM_FREE_PAGE_HINTING
-	if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ)) {
-		request_hypercall = (void *)&virtballoon_page_hinting;
-		balloon_ptr = vb;
-	}
+	if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ))
+		enable_hinting(vb);
 #endif
 
 	if (towards_target(vb))
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e02369b..2212e08 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -7,6 +7,7 @@
 #include <linux/stddef.h>
 #include <linux/linkage.h>
 #include <linux/topology.h>
+#include <linux/page_hinting.h>
 
 struct vm_area_struct;
 
@@ -442,8 +443,19 @@ static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
 #ifdef	CONFIG_KVM_FREE_PAGE_HINTING
 #define HAVE_ARCH_ALLOC_PAGE
 #define HAVE_ARCH_FREE_PAGE
-void arch_free_page(struct page *page, int order);
-void arch_alloc_page(struct page *page, int order);
+static inline void arch_alloc_page(struct page *page, int order)
+{
+	if (!static_branch_unlikely(&guest_page_hinting_key))
+		return;
+	guest_alloc_page(page, order);
+}
+
+static inline void arch_free_page(struct page *page, int order)
+{
+	if (!static_branch_unlikely(&guest_page_hinting_key))
+		return;
+	guest_free_page(page, order);
+}
 #endif
 
 #ifndef HAVE_ARCH_FREE_PAGE
diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
index 0bfb646..dd30644 100644
--- a/include/linux/page_hinting.h
+++ b/include/linux/page_hinting.h
@@ -14,3 +14,10 @@ struct hypervisor_pages {
 extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
 extern void (*request_hypercall)(void *, int);
 extern void *balloon_ptr;
+
+extern struct static_key_false guest_page_hinting_key;
+int guest_page_hinting_sysctl(struct ctl_table *table, int write,
+			      void __user *buffer, size_t *lenp, loff_t *ppos);
+extern int guest_page_hinting_flag;
+void guest_alloc_page(struct page *page, int order);
+void guest_free_page(struct page *page, int order);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 557d467..2717ceb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1655,6 +1655,15 @@ static struct ctl_table vm_table[] = {
 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
 	},
 #endif
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+	{
+		.procname	= "guest-page-hinting",
+		.data		= &guest_page_hinting_flag,
+		.maxlen		= sizeof(guest_page_hinting_flag),
+		.mode		= 0644,
+		.proc_handler   = guest_page_hinting_sysctl,
+	},
+#endif
 	{ }
 };
 
diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
index 22c892b..f66ad63 100644
--- a/virt/kvm/page_hinting.c
+++ b/virt/kvm/page_hinting.c
@@ -5,7 +5,6 @@
 #include <linux/sort.h>
 #include <linux/kernel.h>
 #include <trace/events/kmem.h>
-#include <linux/page_hinting.h>
 
 #define HYPERLIST_THRESHOLD	500
 /*
@@ -30,6 +29,29 @@ void (*request_hypercall)(void *, int);
 EXPORT_SYMBOL(request_hypercall);
 void *balloon_ptr;
 EXPORT_SYMBOL(balloon_ptr);
+struct static_key_false guest_page_hinting_key  = STATIC_KEY_FALSE_INIT;
+EXPORT_SYMBOL(guest_page_hinting_key);
+static DEFINE_MUTEX(hinting_mutex);
+int guest_page_hinting_flag;
+EXPORT_SYMBOL(guest_page_hinting_flag);
+
+int guest_page_hinting_sysctl(struct ctl_table *table, int write,
+			      void __user *buffer, size_t *lenp,
+			      loff_t *ppos)
+{
+	int ret;
+
+	mutex_lock(&hinting_mutex);
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+	if (guest_page_hinting_flag)
+		static_key_enable(&guest_page_hinting_key.key);
+	else
+		static_key_disable(&guest_page_hinting_key.key);
+	mutex_unlock(&hinting_mutex);
+	return ret;
+}
 
 static void empty_hyperlist(void)
 {
@@ -254,7 +276,7 @@ void arch_free_page_slowpath(void)
 	write_sequnlock(&guest_page_lock);
 }
 
-void arch_alloc_page(struct page *page, int order)
+void guest_alloc_page(struct page *page, int order)
 {
 	unsigned int seq;
 
@@ -270,12 +292,11 @@ void arch_alloc_page(struct page *page, int order)
 	trace_guest_alloc_page(page, order);
 }
 
-void arch_free_page(struct page *page, int order)
+void guest_free_page(struct page *page, int order)
 {
 	int *free_page_idx = &get_cpu_var(kvm_pt_idx);
 	struct kvm_free_pages *free_page_obj;
 	unsigned long flags;
-
 	/*
 	 * use of global variables may trigger a race condition between irq and
 	 * process context causing unwanted overwrites. This will be replaced
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Patch v6 7/7] KVM: Disabling page poisoning to avoid memory corruption errors
  2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
                   ` (5 preceding siblings ...)
  2017-12-01 17:31 ` [Patch v6 6/7] KVM: Enabling guest page hinting via static key nilal
@ 2017-12-01 17:31 ` nilal
  2017-12-01 17:32 ` [QEMU PATCH] kvm: Support for guest page hinting nilal
  2018-01-25 17:28 ` [Patch v6 0/7] KVM: Guest " Michael S. Tsirkin
  8 siblings, 0 replies; 10+ messages in thread
From: nilal @ 2017-12-01 17:31 UTC (permalink / raw)
  To: kvm, pbonzini, pagupta, wei.w.wang, yang.zhang.wz, riel, david,
	mst, dodgen, konrad.wilk

From: Nitesh Narayan Lal <nilal@redhat.com>

This patch disables page poisoning if guest page hinting is enabled.
It is required to avoid possible guest memory corruption errors.
Page Poisoning is a feature in which the page is filled with a specific
pattern of (0x00 or 0xaa) after arch_free_page and the same is verified
before arch_alloc_page to prevent following issues:
    *information leak from the freed data
    *use after free bugs
    *memory corruption
Selection of the pattern depends on the CONFIG_PAGE_POISONING_ZERO
Once the guest pages which are supposed to be freed are sent to the
hypervisor it frees them. After freeing the pages in the global list
following things may happen:
    *Hypervisor reallocates the freed memory back to the guest
    *Hypervisor frees the memory and maps a different physical memory
In order to prevent any information leak hypervisor before allocating
memory to the guest fills it with zeroes.
The issue arises when the pattern used for Page Poisoning is 0xaa while
the newly allocated page received from the hypervisor by the guest is
filled with the pattern 0x00. This will result in memory corruption errors.

Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
---
 include/linux/page_hinting.h | 9 +++++++++
 mm/page_poison.c             | 2 +-
 virt/kvm/page_hinting.c      | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
index dd30644..b639078 100644
--- a/include/linux/page_hinting.h
+++ b/include/linux/page_hinting.h
@@ -1,3 +1,4 @@
+#include <linux/poison.h>
 #define MAX_FGPT_ENTRIES	1000
 /*
  * hypervisor_pages - It is a dummy structure passed with the hypercall.
@@ -14,6 +15,7 @@ struct hypervisor_pages {
 extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
 extern void (*request_hypercall)(void *, int);
 extern void *balloon_ptr;
+extern bool want_page_poisoning;
 
 extern struct static_key_false guest_page_hinting_key;
 int guest_page_hinting_sysctl(struct ctl_table *table, int write,
@@ -21,3 +23,10 @@ int guest_page_hinting_sysctl(struct ctl_table *table, int write,
 extern int guest_page_hinting_flag;
 void guest_alloc_page(struct page *page, int order);
 void guest_free_page(struct page *page, int order);
+
+static inline void disable_page_poisoning(void)
+{
+#ifdef CONFIG_PAGE_POISONING
+	want_page_poisoning = 0;
+#endif
+}
diff --git a/mm/page_poison.c b/mm/page_poison.c
index e83fd44..3e9f26d 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -7,7 +7,7 @@
 #include <linux/poison.h>
 #include <linux/ratelimit.h>
 
-static bool want_page_poisoning __read_mostly;
+bool want_page_poisoning __read_mostly;
 
 static int early_page_poison_param(char *buf)
 {
diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
index f66ad63..1ba2e0b 100644
--- a/virt/kvm/page_hinting.c
+++ b/virt/kvm/page_hinting.c
@@ -302,6 +302,7 @@ void guest_free_page(struct page *page, int order)
 	 * process context causing unwanted overwrites. This will be replaced
 	 * with a better solution to prevent such race conditions.
 	 */
+	disable_page_poisoning();
 	local_irq_save(flags);
 	free_page_obj = &get_cpu_var(kvm_pt)[0];
 	trace_guest_free_page(page, order);
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [QEMU PATCH] kvm: Support for guest page hinting
  2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
                   ` (6 preceding siblings ...)
  2017-12-01 17:31 ` [Patch v6 7/7] KVM: Disabling page poisoning to avoid memory corruption errors nilal
@ 2017-12-01 17:32 ` nilal
  2018-01-25 17:28 ` [Patch v6 0/7] KVM: Guest " Michael S. Tsirkin
  8 siblings, 0 replies; 10+ messages in thread
From: nilal @ 2017-12-01 17:32 UTC (permalink / raw)
  To: kvm, pbonzini, pagupta, wei.w.wang, yang.zhang.wz, riel, david,
	mst, dodgen, konrad.wilk

From: Nitesh Narayan Lal <nilal@redhat.com>

This patch enables QEMU to handle page hinting requests
from the guest. Once the guest kicks QEMU to free a list of
page, QEMU retrives the guest physical address in the list
and converts each to host virtual address and then
MADVISE that memory.

Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
---
 hw/virtio/virtio-balloon.c                      | 117 +++++++++++++++++++++++-
 hw/virtio/virtio.c                              |  23 +++++
 include/hw/virtio/virtio-access.h               |   1 +
 include/hw/virtio/virtio-balloon.h              |   2 +-
 include/qemu/osdep.h                            |   7 ++
 include/standard-headers/linux/virtio_balloon.h |   1 +
 6 files changed, 149 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 37cde38..c2d005d 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -33,6 +33,8 @@
 
 #define BALLOON_PAGE_SIZE  (1 << VIRTIO_BALLOON_PFN_SHIFT)
 
+void page_hinting_request(uint64_t addr, uint32_t len);
+
 static void balloon_page(void *addr, int deflate)
 {
     if (!qemu_balloon_is_inhibited() && (!kvm_enabled() ||
@@ -73,11 +75,22 @@ static bool balloon_stats_supported(const VirtIOBalloon *s)
     return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ);
 }
 
+static bool balloon_hinting_supported(const VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+    return virtio_vdev_has_feature(vdev, VIRTIO_GUEST_PAGE_HINTING_VQ);
+}
+
 static bool balloon_stats_enabled(const VirtIOBalloon *s)
 {
     return s->stats_poll_interval > 0;
 }
 
+static bool page_hinting_enabled(const VirtIOBalloon *s)
+{
+    return s->stats_poll_interval > 0;
+}
+
 static void balloon_stats_destroy_timer(VirtIOBalloon *s)
 {
     if (balloon_stats_enabled(s)) {
@@ -93,14 +106,20 @@ static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
     timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
 }
 
+static void page_hinting_change_timer(VirtIOBalloon *s, int64_t secs)
+{
+    timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
+}
+
 static void balloon_stats_poll_cb(void *opaque)
 {
     VirtIOBalloon *s = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
-    if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) {
+    if (s->stats_vq_elem == NULL || !balloon_stats_supported(s) || !balloon_hinting_supported(s)) {
         /* re-schedule */
         balloon_stats_change_timer(s, s->stats_poll_interval);
+        page_hinting_change_timer(s, s->stats_poll_interval);
         return;
     }
 
@@ -197,12 +216,101 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
         balloon_stats_change_timer(s, value);
         return;
     }
+    
+    if (page_hinting_enabled(s)) {
+        /* timer interval change */
+        s->stats_poll_interval = value;
+        page_hinting_change_timer(s, value);
+        return;
+    }
 
     /* create a new timer */
     g_assert(s->stats_timer == NULL);
     s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s);
     s->stats_poll_interval = value;
     balloon_stats_change_timer(s, 0);
+    /* create a new timer */
+    g_assert(s->stats_timer == NULL);
+    s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s);
+    s->stats_poll_interval = value;
+    page_hinting_change_timer(s, 0);
+}
+
+static void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, Error **errp)
+{
+    MemoryRegionSection mrs = memory_region_find(get_system_memory(),
+                                                 addr, 1);
+
+    if (!mrs.mr) {
+        error_setg(errp, "No memory is mapped at address 0x%" HWADDR_PRIx, addr);
+        return NULL;
+    }
+
+    if (!memory_region_is_ram(mrs.mr) && !memory_region_is_romd(mrs.mr)) {
+        error_setg(errp, "Memory at address 0x%" HWADDR_PRIx "is not RAM", addr);
+        memory_region_unref(mrs.mr);
+        return NULL;
+    }
+
+    *p_mr = mrs.mr;
+    return qemu_map_ram_ptr(mrs.mr->ram_block, mrs.offset_within_region);
+}
+
+struct guest_pages {
+	unsigned long pfn;
+	unsigned int pages;
+};
+
+
+void page_hinting_request(uint64_t addr, uint32_t len)
+{
+    Error *local_err = NULL;
+    MemoryRegion *mr = NULL;
+    void *hvaddr;
+    int ret = 0;
+    struct guest_pages *guest_obj;
+    int i = 0;
+    void *hvaddr_to_free;
+    unsigned long pfn, pfn_end;
+    uint64_t gpaddr_to_free;
+
+    hvaddr = gpa2hva(&mr, addr, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+    guest_obj = hvaddr;
+
+    while (i < len) {
+        pfn = guest_obj[i].pfn;
+	pfn_end = guest_obj[i].pfn + guest_obj[i].pages - 1;
+	while (pfn <= pfn_end) {
+	        gpaddr_to_free = pfn << VIRTIO_BALLOON_PFN_SHIFT;
+	        hvaddr_to_free = gpa2hva(&mr, gpaddr_to_free, &local_err);
+	        if (local_err) {
+			error_report_err(local_err);
+		        return;
+		}
+		ret = qemu_madvise((void *)hvaddr_to_free, 4096, QEMU_MADV_FREE);
+		if (ret == -1)
+		    printf("\n%d:%s Error: Madvise failed with error:%d\n", __LINE__, __func__, ret);
+		pfn++;
+	}
+	i++;
+    }
+}
+
+
+static void virtio_balloon_page_hinting(VirtIODevice *vdev, VirtQueue *vq)
+{
+    uint64_t addr;
+    uint32_t len;
+    VirtQueueElement elem = {};
+
+    pop_hinting_addr(vq, &addr, &len);
+    page_hinting_request(addr, len);
+    virtqueue_push(vq, &elem, 0);
+    virtio_notify(vdev, vq);
 }
 
 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
@@ -374,6 +482,7 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
     f |= dev->host_features;
     virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
+    virtio_add_feature(&f, VIRTIO_GUEST_PAGE_HINTING_VQ);
     return f;
 }
 
@@ -407,6 +516,9 @@ static int virtio_balloon_post_load_device(void *opaque, int version_id)
     if (balloon_stats_enabled(s)) {
         balloon_stats_change_timer(s, s->stats_poll_interval);
     }
+    if (page_hinting_enabled(s)) {
+        page_hinting_change_timer(s, s->stats_poll_interval);
+    }
     return 0;
 }
 
@@ -443,6 +555,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
     s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
+    s->hvq = virtio_add_queue(vdev, 128, virtio_balloon_page_hinting);
 
     reset_stats(s);
 }
@@ -486,6 +599,8 @@ static void virtio_balloon_instance_init(Object *obj)
 
     object_property_add(obj, "guest-stats", "guest statistics",
                         balloon_stats_get_all, NULL, NULL, s, NULL);
+    object_property_add(obj, "guest-page-hinting", "guest page hinting",
+                        NULL, NULL, NULL, s, NULL);
 
     object_property_add(obj, "guest-stats-polling-interval", "int",
                         balloon_stats_get_poll_interval,
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 311929e..bd45135 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -825,6 +825,29 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu
     return elem;
 }
 
+void pop_hinting_addr(VirtQueue *vq, uint64_t *addr, uint32_t *len)
+{
+   VRingMemoryRegionCaches *caches;
+   VRingDesc desc;
+   MemoryRegionCache *desc_cache;
+   VirtIODevice *vdev = vq->vdev;
+   unsigned int head, max;
+
+   max = vq->vring.num;
+   if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
+	printf("\n%d:%sError: Unable to read head\n", __LINE__, __func__);
+   }
+
+   caches = vring_get_region_caches(vq);
+   if (caches->desc.len < max * sizeof(VRingDesc)) {
+       virtio_error(vdev, "Cannot map descriptor ring");
+   }
+   desc_cache = &caches->desc;
+   vring_desc_read(vdev, &desc, desc_cache, head);
+   *addr = desc.addr;
+   *len = desc.len;
+}
+
 void *virtqueue_pop(VirtQueue *vq, size_t sz)
 {
     unsigned int i, head, max;
diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
index 2e92074..568d71f 100644
--- a/include/hw/virtio/virtio-access.h
+++ b/include/hw/virtio/virtio-access.h
@@ -24,6 +24,7 @@
 #define LEGACY_VIRTIO_IS_BIENDIAN 1
 #endif
 
+void pop_hinting_addr(VirtQueue *vq, uint64_t *addr, uint32_t *len);
 static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
 {
 #if defined(LEGACY_VIRTIO_IS_BIENDIAN)
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 1ea13bd..dfb5782 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -33,7 +33,7 @@ typedef struct virtio_balloon_stat_modern {
 
 typedef struct VirtIOBalloon {
     VirtIODevice parent_obj;
-    VirtQueue *ivq, *dvq, *svq;
+    VirtQueue *ivq, *dvq, *svq, *hvq;
     uint32_t num_pages;
     uint32_t actual;
     uint64_t stats[VIRTIO_BALLOON_S_NR];
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 9dd318a..c3ea68c 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -278,6 +278,11 @@ void qemu_anon_ram_free(void *ptr, size_t size);
 #else
 #define QEMU_MADV_REMOVE QEMU_MADV_INVALID
 #endif
+#ifdef MADV_FREE
+#define QEMU_MADV_FREE MADV_FREE
+#else
+#define QEMU_MADV_FREE QEMU_MADV_INVALID
+#endif
 
 #elif defined(CONFIG_POSIX_MADVISE)
 
@@ -291,6 +296,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
 #define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
 #define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID
 #define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_FREE QEMU_MADV_INVALID
 
 #else /* no-op */
 
@@ -304,6 +310,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
 #define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
 #define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID
 #define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_FREE QEMU_MADV_INVALID
 
 #endif
 
diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h
index 9d06ccd..41766e1 100644
--- a/include/standard-headers/linux/virtio_balloon.h
+++ b/include/standard-headers/linux/virtio_balloon.h
@@ -34,6 +34,7 @@
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
+#define VIRTIO_GUEST_PAGE_HINTING_VQ	3 /* Page hinting virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Patch v6 0/7] KVM: Guest page hinting
  2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
                   ` (7 preceding siblings ...)
  2017-12-01 17:32 ` [QEMU PATCH] kvm: Support for guest page hinting nilal
@ 2018-01-25 17:28 ` Michael S. Tsirkin
  8 siblings, 0 replies; 10+ messages in thread
From: Michael S. Tsirkin @ 2018-01-25 17:28 UTC (permalink / raw)
  To: nilal
  Cc: kvm, pbonzini, pagupta, wei.w.wang, yang.zhang.wz, riel, david,
	dodgen, konrad.wilk

On Fri, Dec 01, 2017 at 12:31:29PM -0500, nilal@redhat.com wrote:
> Changelog in v6:
> 	-Moved CONFIG_KVM_FREE_PAGE_HINTING from virt/kvm/Kconfig to arch/x86/kvm/Kconfig in order to resolve conflicts with other architectures:
> 		*s390 has its own page hinting functionality
> 		*powerpc has its own arch_free_page implementation
> 	-Fixed the typo in virtio_balloon structure which caused compilation issues when guest page hinting was disabled
> 	-Separated init_vqs implementation with and without hinting_vq enablement based on the CONFIG_KVM_FREE_PAGE_HINTING ifdef
> 
> Virtio interface changes are picked up from Wei's patch-set for Virtio-balloon enhancement[3]. "Wei, How would you like me to credit you in the final patch?")

So the main point missing in all this is any kind of performance
numbers: what kind of gain do you see with these patches and for which
workloads?  What kind of overhead and for which workloads?

-- 
MST

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-01-25 17:29 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-12-01 17:31 [Patch v6 0/7] KVM: Guest page hinting nilal
2017-12-01 17:31 ` [Patch v6 1/7] KVM: Support for guest " nilal
2017-12-01 17:31 ` [Patch v6 2/7] KVM: Guest page hinting functionality nilal
2017-12-01 17:31 ` [Patch v6 3/7] KVM: Adding tracepoints for guest page hinting nilal
2017-12-01 17:31 ` [Patch v6 4/7] virtio: Exposes added descriptor to the other side synchronously nilal
2017-12-01 17:31 ` [Patch v6 5/7] KVM: Sending hyperlist to the host via hinting_vq nilal
2017-12-01 17:31 ` [Patch v6 6/7] KVM: Enabling guest page hinting via static key nilal
2017-12-01 17:31 ` [Patch v6 7/7] KVM: Disabling page poisoning to avoid memory corruption errors nilal
2017-12-01 17:32 ` [QEMU PATCH] kvm: Support for guest page hinting nilal
2018-01-25 17:28 ` [Patch v6 0/7] KVM: Guest " Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).