[patch 0/4] KVM paravirt MMU updates

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [patch 0/4] KVM paravirt MMU updates
@ 2008-01-30 21:26 Marcelo Tosatti
  2008-01-30 21:26 ` [patch 1/4] KVM: basic paravirt support Marcelo Tosatti
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2008-01-30 21:26 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

The following patchset, based on an earlier version by Anthony Liguori, adds
paravirt_ops support for KVM guests enabling hypercall based pte updates
and hypercall batching.

make -j4 compilation of a recent 2.6 kernel tree (two runs): 
stock:
826.34user 573.75system 9:07.13elapsed 255%CPU (0avgtext+0avgdata 0maxresident)k0inputs+0outputs (30major+26687895minor)pagefaults 0swaps

user 608.32system 9:01.11elapsed 275%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+26518405minor)pagefaults 0swaps

hypercall pte: 
816.85user 463.26system 8:15.57elapsed 258%CPU (0avgtext+0avgdata 0maxresident)k0inputs+0outputs (0major+26518853minor)pagefaults 0swaps

820.17user 459.96system 8:14.30elapsed 258%CPU (0avgtext+0avgdata 0maxresident)k0inputs+0outputs (0major+26522238minor)pagefaults 0swaps

About 8.5% faster.

-- 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [patch 1/4] KVM: basic paravirt support
  2008-01-30 21:26 [patch 0/4] KVM paravirt MMU updates Marcelo Tosatti
@ 2008-01-30 21:26 ` Marcelo Tosatti
  2008-01-30 21:26 ` [patch 2/4] KVM: hypercall based pte updates and TLB flushes Marcelo Tosatti
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2008-01-30 21:26 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti

[-- Attachment #1: kvm-paravirt-core --]
[-- Type: text/plain, Size: 7119 bytes --]

Add basic KVM paravirt support. Avoid vm-exits on IO delays.

Add KVM_GET_PARA_FEATURES ioctl so paravirt features can be reported via
cpuid.

Signed-off-by: Marcelo Tosatti <mtosatti-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

Index: linux-2.6-x86-kvm/arch/x86/Kconfig
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/Kconfig
+++ linux-2.6-x86-kvm/arch/x86/Kconfig
@@ -357,6 +356,14 @@ config VMI
 	  at the moment), by linking the kernel to a GPL-ed ROM module
 	  provided by the hypervisor.
 
+config KVM_GUEST
+	bool "KVM Guest support"
+	select PARAVIRT
+	depends on !(X86_VISWS || X86_VOYAGER)
+	help
+	 This option enables various optimizations for running under the KVM
+	 hypervisor.
+
 source "arch/x86/lguest/Kconfig"
 
 endif
Index: linux-2.6-x86-kvm/arch/x86/kernel/Makefile
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/Makefile
+++ linux-2.6-x86-kvm/arch/x86/kernel/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_K8_NB)		+= k8.o
 obj-$(CONFIG_MGEODE_LX)		+= geode_32.o mfgpt_32.o
 
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
+obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 obj-y				+= pcspeaker.o
 
Index: linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
===================================================================
--- /dev/null
+++ linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
@@ -0,0 +1,52 @@
+/*
+ * KVM paravirt_ops implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
+ * Copyright IBM Corporation, 2007
+ *   Authors: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/cpu.h>
+#include <linux/mm.h>
+
+/*
+ * No need for any "IO delay" on KVM
+ */
+static void kvm_io_delay(void)
+{
+}
+
+static void paravirt_ops_setup(void)
+{
+	pv_info.name = "KVM";
+	pv_info.paravirt_enabled = 1;
+
+	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
+		pv_cpu_ops.io_delay = kvm_io_delay;
+
+}
+
+void __init kvm_guest_init(void)
+{
+	if (!kvm_para_available())
+		return;
+
+	paravirt_ops_setup();
+}
Index: linux-2.6-x86-kvm/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6-x86-kvm/arch/x86/kernel/setup_32.c
@@ -46,6 +46,7 @@
 #include <linux/pfn.h>
 #include <linux/pci.h>
 #include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
 
 #include <video/edid.h>
 
@@ -772,6 +773,7 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	vmi_init();
 #endif
+	kvm_guest_init();
 
 	/*
 	 * NOTE: before this point _nobody_ is allowed to allocate
Index: linux-2.6-x86-kvm/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/setup_64.c
+++ linux-2.6-x86-kvm/arch/x86/kernel/setup_64.c
@@ -42,6 +42,7 @@
 #include <linux/ctype.h>
 #include <linux/uaccess.h>
 #include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -506,6 +507,8 @@ void __init setup_arch(char **cmdline_p)
 	init_apic_mappings();
 	ioapic_init_mappings();
 
+	kvm_guest_init();
+
 	/*
 	 * We trust e820 completely. No explicit ROM probing in memory.
 	 */
Index: linux-2.6-x86-kvm/arch/x86/kvm/x86.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/x86.c
+++ linux-2.6-x86-kvm/arch/x86/kvm/x86.c
@@ -680,6 +680,7 @@ int kvm_dev_ioctl_check_extension(long e
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
+	case KVM_CAP_PARA_FEATURES:
 		r = 1;
 		break;
 	case KVM_CAP_VAPIC:
@@ -727,6 +728,15 @@ long kvm_arch_dev_ioctl(struct file *fil
 		r = 0;
 		break;
 	}
+	case KVM_GET_PARA_FEATURES: {
+		__u32 para_features = KVM_PARA_FEATURES;
+
+		r = -EFAULT;
+		if (copy_to_user(argp, &para_features, sizeof para_features))
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
Index: linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/kvm_para.h
+++ linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
@@ -5,6 +5,7 @@
  * should be used to determine that a VM is running under KVM.
  */
 #define KVM_CPUID_SIGNATURE	0x40000000
+#define KVM_FEATURE_NOP_IO_DELAY	0
 
 /* This CPUID returns a feature bitmap in eax.  Before enabling a particular
  * paravirtualization, the appropriate feature bit should be checked.
@@ -14,6 +15,8 @@
 #ifdef __KERNEL__
 #include <asm/processor.h>
 
+#define KVM_PARA_FEATURES (1UL << KVM_FEATURE_NOP_IO_DELAY)
+
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.
  */
Index: linux-2.6-x86-kvm/include/linux/kvm.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/linux/kvm.h
+++ linux-2.6-x86-kvm/include/linux/kvm.h
@@ -221,6 +221,7 @@ struct kvm_vapic_addr {
  * Get size for mmap(vcpu_fd)
  */
 #define KVM_GET_VCPU_MMAP_SIZE    _IO(KVMIO,   0x04) /* in bytes */
+#define KVM_GET_PARA_FEATURES     _IOR(KVMIO,  0x05, __u32)
 
 /*
  * Extension capability list.
@@ -232,6 +233,7 @@ struct kvm_vapic_addr {
 #define KVM_CAP_SET_TSS_ADDR 4
 #define KVM_CAP_EXT_CPUID 5
 #define KVM_CAP_VAPIC 6
+#define KVM_CAP_PARA_FEATURES 7
 
 /*
  * ioctls for VM fds
Index: linux-2.6-x86-kvm/include/linux/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/linux/kvm_para.h
+++ linux-2.6-x86-kvm/include/linux/kvm_para.h
@@ -20,6 +20,12 @@
 #include <asm/kvm_para.h>
 
 #ifdef __KERNEL__
+#ifdef CONFIG_KVM_GUEST
+void __init kvm_guest_init(void);
+#else
+#define kvm_guest_init() do { } while (0)
+#endif
+
 static inline int kvm_para_has_feature(unsigned int feature)
 {
 	if (kvm_arch_para_features() & (1UL << feature))

-- 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [patch 2/4] KVM: hypercall based pte updates and TLB flushes
  2008-01-30 21:26 [patch 0/4] KVM paravirt MMU updates Marcelo Tosatti
  2008-01-30 21:26 ` [patch 1/4] KVM: basic paravirt support Marcelo Tosatti
@ 2008-01-30 21:26 ` Marcelo Tosatti
  2008-01-30 21:26 ` [patch 3/4] paravirt: set_access_flags/set_wrprotect should use paravirt interface Marcelo Tosatti
  2008-01-30 21:26 ` [patch 4/4] KVM: hypercall batching Marcelo Tosatti
  3 siblings, 0 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2008-01-30 21:26 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti

[-- Attachment #1: kvm-mmu-write --]
[-- Type: text/plain, Size: 8433 bytes --]

Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.

Signed-off-by: Marcelo Tosatti <mtosatti-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

Index: linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/kvm.c
+++ linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
@@ -33,6 +33,104 @@ static void kvm_io_delay(void)
 {
 }
 
+static void kvm_mmu_write(void *dest, const void *src, size_t size)
+{
+	const uint8_t *p = src;
+	unsigned long a0 = *(unsigned long *)p;
+	unsigned long a1 = 0;
+
+	size >>= 2;
+#ifdef CONFIG_X86_32
+	if (size == 2)
+		a1 = *(u32 *)&p[4];
+#endif
+	kvm_hypercall4(KVM_HYPERCALL_MMU_WRITE, (unsigned long)dest, size, a0,
+		       a1);
+}
+
+/*
+ * We only need to hook operations that are MMU writes.  We hook these so that
+ * we can use lazy MMU mode to batch these operations.  We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing of batched writes.
+ */
+static void kvm_set_pte(pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
+			   pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+	kvm_mmu_write(pmdp, &pmd, sizeof(pmd));
+}
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_pte_clear(struct mm_struct *mm,
+			  unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = __pte(0);
+	kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_pmd_clear(pmd_t *pmdp)
+{
+	pmd_t pmd = __pmd(0);
+	kvm_mmu_write(pmdp, &pmd, sizeof(pmd));
+}
+#endif
+
+static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	kvm_mmu_write(pgdp, &pgd, sizeof(pgd));
+}
+
+static void kvm_pgd_clear(pgd_t *pgdp)
+{
+	pgd_t pgd = __pgd(0);
+	kvm_mmu_write(pgdp, &pgd, sizeof(pgd));
+}
+
+static void kvm_set_pud(pud_t *pudp, pud_t pud)
+{
+	kvm_mmu_write(pudp, &pud, sizeof(pud));
+}
+#if PAGETABLE_LEVELS == 4
+static void kvm_pud_clear(pud_t *pudp)
+{
+	pud_t pud = __pud(0);
+	kvm_mmu_write(pudp, &pud, sizeof(pud));
+}
+#endif
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static void kvm_flush_tlb(void)
+{
+	kvm_hypercall0(KVM_HYPERCALL_FLUSH_TLB);
+}
+
+static void kvm_release_pt(u32 pfn)
+{
+	kvm_hypercall1(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT);
+}
+
 static void paravirt_ops_setup(void)
 {
 	pv_info.name = "KVM";
@@ -41,6 +139,28 @@ static void paravirt_ops_setup(void)
 	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
 		pv_cpu_ops.io_delay = kvm_io_delay;
 
+	if (kvm_para_has_feature(KVM_FEATURE_MMU_WRITE)) {
+		pv_mmu_ops.set_pte = kvm_set_pte;
+		pv_mmu_ops.set_pte_at = kvm_set_pte_at;
+		pv_mmu_ops.set_pmd = kvm_set_pmd;
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+		pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+		pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+		pv_mmu_ops.pte_clear = kvm_pte_clear;
+		pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+#endif
+		pv_mmu_ops.set_pud = kvm_set_pud;
+		pv_mmu_ops.set_pgd = kvm_set_pgd;
+		pv_mmu_ops.pgd_clear = kvm_pgd_clear;
+#if PAGETABLE_LEVELS == 4
+		pv_mmu_ops.pud_clear = kvm_pud_clear;
+#endif
+#endif
+		pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
+		pv_mmu_ops.release_pt = kvm_release_pt;
+		pv_mmu_ops.release_pd = kvm_release_pt;
+	}
 }
 
 void __init kvm_guest_init(void)
Index: linux-2.6-x86-kvm/arch/x86/kvm/mmu.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/mmu.c
+++ linux-2.6-x86-kvm/arch/x86/kvm/mmu.c
@@ -287,7 +287,7 @@ static void mmu_free_memory_cache_page(s
 		free_page((unsigned long)mc->objects[--mc->nobjs]);
 }
 
-static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 {
 	int r;
 
@@ -856,7 +856,7 @@ static int kvm_mmu_unprotect_page(struct
 	return r;
 }
 
-static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
+void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_mmu_page *sp;
 
Index: linux-2.6-x86-kvm/arch/x86/kvm/mmu.h
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/mmu.h
+++ linux-2.6-x86-kvm/arch/x86/kvm/mmu.h
@@ -41,4 +41,7 @@ static inline int is_paging(struct kvm_v
 	return vcpu->arch.cr0 & X86_CR0_PG;
 }
 
+void mmu_unshadow(struct kvm *kvm, gfn_t gfn);
+int mmu_topup_memory_caches(struct kvm_vcpu *vcpu);
+
 #endif
Index: linux-2.6-x86-kvm/arch/x86/kvm/x86.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/x86.c
+++ linux-2.6-x86-kvm/arch/x86/kvm/x86.c
@@ -2250,6 +2250,52 @@ int kvm_emulate_halt(struct kvm_vcpu *vc
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
+static int kvm_hypercall_mmu_write(struct kvm_vcpu *vcpu, gva_t addr,
+				   unsigned long size, unsigned long a0,
+				   unsigned long a1)
+{
+	gpa_t gpa;
+	u64 value;
+
+	if (mmu_topup_memory_caches(vcpu))
+		return -KVM_EFAULT;
+
+	down_read(&current->mm->mmap_sem);
+	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+	up_read(&current->mm->mmap_sem);
+
+	if (gpa == UNMAPPED_GVA)
+		return -KVM_EFAULT;
+	if (size == 1) {
+		if (!emulator_write_phys(vcpu, gpa, &a0, sizeof(a0)))
+			return -KVM_EFAULT;
+	} else if (size == 2) {
+		if (!is_long_mode(vcpu) && is_pae(vcpu))
+			value = (u64)a1 << 32 | a0;
+		else
+			value = a0;
+		if (!emulator_write_phys(vcpu, gpa, &value, sizeof(value)))
+			return -KVM_EFAULT;
+	} else
+		return -KVM_E2BIG;
+
+	return 0;
+}
+
+static int kvm_hypercall_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	kvm_x86_ops->tlb_flush(vcpu);
+	return 0;
+}
+
+static int kvm_hypercall_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
+{
+	spin_lock(&vcpu->kvm->mmu_lock);
+	mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	return 0;
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
@@ -2274,6 +2320,15 @@ int kvm_emulate_hypercall(struct kvm_vcp
 	case KVM_HC_VAPIC_POLL_IRQ:
 		ret = 0;
 		break;
+	case KVM_HYPERCALL_MMU_WRITE:
+		ret = kvm_hypercall_mmu_write(vcpu, a0, a1, a2, a3);
+		break;
+	case KVM_HYPERCALL_FLUSH_TLB:
+		ret = kvm_hypercall_flush_tlb(vcpu);
+		break;
+	case KVM_HYPERCALL_RELEASE_PT:
+		ret = kvm_hypercall_release_pt(vcpu, a0);
+		break;
 	default:
 		ret = -KVM_ENOSYS;
 		break;
Index: linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/kvm_para.h
+++ linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
@@ -6,6 +6,7 @@
  */
 #define KVM_CPUID_SIGNATURE	0x40000000
 #define KVM_FEATURE_NOP_IO_DELAY	0
+#define KVM_FEATURE_MMU_WRITE		1
 
 /* This CPUID returns a feature bitmap in eax.  Before enabling a particular
  * paravirtualization, the appropriate feature bit should be checked.
@@ -15,7 +16,8 @@
 #ifdef __KERNEL__
 #include <asm/processor.h>
 
-#define KVM_PARA_FEATURES (1UL << KVM_FEATURE_NOP_IO_DELAY)
+#define KVM_PARA_FEATURES ((1UL << KVM_FEATURE_NOP_IO_DELAY)	|	\
+			   (1UL << KVM_FEATURE_MMU_WRITE))
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.
Index: linux-2.6-x86-kvm/include/linux/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/linux/kvm_para.h
+++ linux-2.6-x86-kvm/include/linux/kvm_para.h
@@ -11,8 +11,13 @@
 
 /* Return values for hypercalls */
 #define KVM_ENOSYS		1000
+#define KVM_EFAULT		EFAULT
+#define KVM_E2BIG		E2BIG
 
-#define KVM_HC_VAPIC_POLL_IRQ            1
+#define KVM_HC_VAPIC_POLL_IRQ		1
+#define KVM_HYPERCALL_MMU_WRITE		2
+#define KVM_HYPERCALL_FLUSH_TLB		3
+#define KVM_HYPERCALL_RELEASE_PT	4
 
 /*
  * hypercalls use architecture specific

-- 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [patch 3/4] paravirt: set_access_flags/set_wrprotect should use paravirt interface
  2008-01-30 21:26 [patch 0/4] KVM paravirt MMU updates Marcelo Tosatti
  2008-01-30 21:26 ` [patch 1/4] KVM: basic paravirt support Marcelo Tosatti
  2008-01-30 21:26 ` [patch 2/4] KVM: hypercall based pte updates and TLB flushes Marcelo Tosatti
@ 2008-01-30 21:26 ` Marcelo Tosatti
  2008-01-30 21:26 ` [patch 4/4] KVM: hypercall batching Marcelo Tosatti
  3 siblings, 0 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2008-01-30 21:26 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti

[-- Attachment #1: pte-access-paravirt --]
[-- Type: text/plain, Size: 2480 bytes --]

ptep_set_access_flags and ptep_set_wrprotect are doing direct pte
updates ignoring the paravirt interface.

The wrprotect change is especially important since it allows full
batching of fork() on COW mappings.

There are still a few PTE update interfaces bypassing paravirt, such as
ptep_get_and_clear_full and ptep_get_and_clear.


Signed-off-by: Marcelo Tosatti <mtosatti-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

Index: linux-2.6-x86-kvm/include/asm-x86/paravirt.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/paravirt.h
+++ linux-2.6-x86-kvm/include/asm-x86/paravirt.h
@@ -1004,6 +1004,13 @@ static inline void set_pte(pte_t *ptep, 
 			    pte.pte);
 }
 
+static inline void pte_clear_bit(unsigned int bit, pte_t *ptep)
+{
+	pte_t pte = *ptep;
+	clear_bit(bit, (unsigned long *)&pte.pte);
+	set_pte(ptep, pte);
+}
+
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
Index: linux-2.6-x86-kvm/include/asm-x86/pgtable.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/pgtable.h
+++ linux-2.6-x86-kvm/include/asm-x86/pgtable.h
@@ -227,6 +227,8 @@ void native_pagetable_setup_done(pgd_t *
 #define pte_update(mm, addr, ptep)              do { } while (0)
 #define pte_update_defer(mm, addr, ptep)        do { } while (0)
 
+#define pte_clear_bit(bit, ptep)	clear_bit(bit, (unsigned long *)&ptep->pte)
+
 static inline void paravirt_pagetable_setup_start(pgd_t *base)
 {
 	native_pagetable_setup_start(base);
@@ -302,7 +304,7 @@ static inline void native_set_pte_at(str
 ({									\
 	int __changed = !pte_same(*(ptep), entry);			\
 	if (__changed && dirty) {					\
-		*ptep = entry;						\
+		set_pte(ptep, entry);					\
 		pte_update_defer((vma)->vm_mm, (address), (ptep));	\
 		flush_tlb_page(vma, address);				\
 	}								\
@@ -357,7 +359,7 @@ static inline pte_t ptep_get_and_clear_f
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
+	pte_clear_bit(_PAGE_BIT_RW, ptep);
 	pte_update(mm, addr, ptep);
 }
 

-- 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [patch 4/4] KVM: hypercall batching
  2008-01-30 21:26 [patch 0/4] KVM paravirt MMU updates Marcelo Tosatti
                   ` (2 preceding siblings ...)
  2008-01-30 21:26 ` [patch 3/4] paravirt: set_access_flags/set_wrprotect should use paravirt interface Marcelo Tosatti
@ 2008-01-30 21:26 ` Marcelo Tosatti
  3 siblings, 0 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2008-01-30 21:26 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti

[-- Attachment #1: kvm-multicall --]
[-- Type: text/plain, Size: 8478 bytes --]

Batch pte updates and tlb flushes in lazy MMU mode.

Signed-off-by: Marcelo Tosatti <mtosatti-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

Index: linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/kvm.c
+++ linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
@@ -25,6 +25,74 @@
 #include <linux/kvm_para.h>
 #include <linux/cpu.h>
 #include <linux/mm.h>
+#include <linux/hardirq.h>
+
+#define MAX_MULTICALL_NR (PAGE_SIZE / sizeof(struct kvm_multicall_entry))
+
+struct kvm_para_state {
+	struct kvm_multicall_entry queue[MAX_MULTICALL_NR];
+	int queue_index;
+	enum paravirt_lazy_mode mode;
+};
+
+static DEFINE_PER_CPU(struct kvm_para_state, para_state);
+
+static int can_defer_hypercall(struct kvm_para_state *state, unsigned int nr)
+{
+	if (state->mode == PARAVIRT_LAZY_MMU) {
+		switch (nr) {
+		case KVM_HYPERCALL_MMU_WRITE:
+		case KVM_HYPERCALL_FLUSH_TLB:
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static void hypercall_queue_flush(struct kvm_para_state *state)
+{
+	if (state->queue_index) {
+		kvm_hypercall2(KVM_HYPERCALL_MULTICALL, __pa(&state->queue),
+			      state->queue_index);
+		state->queue_index = 0;
+	}
+}
+
+static void kvm_hypercall_defer(struct kvm_para_state *state,
+				unsigned int nr,
+				unsigned long a0, unsigned long a1,
+				unsigned long a2, unsigned long a3)
+{
+	struct kvm_multicall_entry *entry;
+
+	BUG_ON(preemptible());
+
+	if (state->queue_index == MAX_MULTICALL_NR)
+		hypercall_queue_flush(state);
+
+	entry = &state->queue[state->queue_index++];
+	entry->nr = nr;
+	entry->a0 = a0;
+	entry->a1 = a1;
+	entry->a2 = a2;
+	entry->a3 = a3;
+}
+
+static long kvm_hypercall(unsigned int nr, unsigned long a0,
+			  unsigned long a1, unsigned long a2,
+			  unsigned long a3)
+{
+	struct kvm_para_state *state = &get_cpu_var(para_state);
+	long ret = 0;
+
+	if (can_defer_hypercall(state, nr))
+		kvm_hypercall_defer(state, nr, a0, a1, a2, a3);
+	else
+		ret = kvm_hypercall4(nr, a0, a1, a2, a3);
+
+	put_cpu_var(para_state);
+	return ret;
+}
 
 /*
  * No need for any "IO delay" on KVM
@@ -44,7 +112,7 @@ static void kvm_mmu_write(void *dest, co
 	if (size == 2)
 		a1 = *(u32 *)&p[4];
 #endif
-	kvm_hypercall4(KVM_HYPERCALL_MMU_WRITE, (unsigned long)dest, size, a0,
+	kvm_hypercall(KVM_HYPERCALL_MMU_WRITE, (unsigned long)dest, size, a0,
 		       a1);
 }
 
@@ -123,12 +191,31 @@ static void kvm_pud_clear(pud_t *pudp)
 
 static void kvm_flush_tlb(void)
 {
-	kvm_hypercall0(KVM_HYPERCALL_FLUSH_TLB);
+	kvm_hypercall(KVM_HYPERCALL_FLUSH_TLB, 0, 0, 0, 0);
 }
 
 static void kvm_release_pt(u32 pfn)
 {
-	kvm_hypercall1(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT);
+	kvm_hypercall(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT, 0, 0, 0);
+}
+
+static void kvm_enter_lazy_mmu(void)
+{
+	struct kvm_para_state *state
+		= &per_cpu(para_state, smp_processor_id());
+
+	paravirt_enter_lazy_mmu();
+	state->mode = paravirt_get_lazy_mode();
+}
+
+static void kvm_leave_lazy_mmu(void)
+{
+	struct kvm_para_state *state
+		= &per_cpu(para_state, smp_processor_id());
+
+	hypercall_queue_flush(state);
+	paravirt_leave_lazy(paravirt_get_lazy_mode());
+	state->mode = paravirt_get_lazy_mode();
 }
 
 static void paravirt_ops_setup(void)
@@ -161,6 +248,11 @@ static void paravirt_ops_setup(void)
 		pv_mmu_ops.release_pt = kvm_release_pt;
 		pv_mmu_ops.release_pd = kvm_release_pt;
 	}
+
+	if (kvm_para_has_feature(KVM_FEATURE_MULTICALL)) {
+		pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
+		pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
+	}
 }
 
 void __init kvm_guest_init(void)
Index: linux-2.6-x86-kvm/arch/x86/kvm/x86.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/x86.c
+++ linux-2.6-x86-kvm/arch/x86/kvm/x86.c
@@ -67,6 +67,8 @@ struct kvm_stats_debugfs_item debugfs_en
 	{ "fpu_reload", VCPU_STAT(fpu_reload) },
 	{ "insn_emulation", VCPU_STAT(insn_emulation) },
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
+	{ "multicall", VCPU_STAT(multicall) },
+	{ "multicall_nr", VCPU_STAT(multicall_nr) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -2296,6 +2298,52 @@ static int kvm_hypercall_release_pt(stru
 	return 0;
 }
 
+static int dispatch_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
+			      unsigned long a0, unsigned long a1,
+			      unsigned long a2, unsigned long a3)
+{
+	switch (nr) {
+	case KVM_HC_VAPIC_POLL_IRQ:
+		return 0;
+	case KVM_HYPERCALL_MMU_WRITE:
+		return kvm_hypercall_mmu_write(vcpu, a0, a1, a2, a3);
+	case KVM_HYPERCALL_FLUSH_TLB:
+		return kvm_hypercall_flush_tlb(vcpu);
+	case KVM_HYPERCALL_RELEASE_PT:
+		return kvm_hypercall_release_pt(vcpu, a0);
+	}
+
+	return -KVM_ENOSYS;
+}
+
+static int kvm_hypercall_multicall(struct kvm_vcpu *vcpu, gpa_t addr, u32 nents)
+{
+	int i, result = 0;
+
+	++vcpu->stat.multicall;
+	vcpu->stat.multicall_nr += nents;
+
+	for (i = 0; i < nents; i++) {
+		struct kvm_multicall_entry mc;
+		int ret;
+
+		down_read(&current->mm->mmap_sem);
+		ret = kvm_read_guest(vcpu->kvm, addr, &mc, sizeof(mc));
+		up_read(&current->mm->mmap_sem);
+		if (ret)
+			return -KVM_EFAULT;
+
+		ret = dispatch_hypercall(vcpu, mc.nr, mc.a0, mc.a1, mc.a2,
+					    mc.a3);
+		if (ret)
+			result = ret;
+		addr += sizeof(mc);
+	}
+	if (result < 0)
+		return -KVM_EINVAL;
+	return result;
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
@@ -2316,23 +2364,11 @@ int kvm_emulate_hypercall(struct kvm_vcp
 		a3 &= 0xFFFFFFFF;
 	}
 
-	switch (nr) {
-	case KVM_HC_VAPIC_POLL_IRQ:
-		ret = 0;
-		break;
-	case KVM_HYPERCALL_MMU_WRITE:
-		ret = kvm_hypercall_mmu_write(vcpu, a0, a1, a2, a3);
-		break;
-	case KVM_HYPERCALL_FLUSH_TLB:
-		ret = kvm_hypercall_flush_tlb(vcpu);
-		break;
-	case KVM_HYPERCALL_RELEASE_PT:
-		ret = kvm_hypercall_release_pt(vcpu, a0);
-		break;
-	default:
-		ret = -KVM_ENOSYS;
-		break;
-	}
+	if (nr == KVM_HYPERCALL_MULTICALL)
+		ret = kvm_hypercall_multicall(vcpu, a0, a1);
+	else
+		ret = dispatch_hypercall(vcpu, nr, a0, a1, a2, a3);
+
 	vcpu->arch.regs[VCPU_REGS_RAX] = ret;
 	kvm_x86_ops->decache_regs(vcpu);
 	return 0;
Index: linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/kvm_para.h
+++ linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
@@ -7,6 +7,7 @@
 #define KVM_CPUID_SIGNATURE	0x40000000
 #define KVM_FEATURE_NOP_IO_DELAY	0
 #define KVM_FEATURE_MMU_WRITE		1
+#define KVM_FEATURE_MULTICALL		2
 
 /* This CPUID returns a feature bitmap in eax.  Before enabling a particular
  * paravirtualization, the appropriate feature bit should be checked.
@@ -17,7 +18,17 @@
 #include <asm/processor.h>
 
 #define KVM_PARA_FEATURES ((1UL << KVM_FEATURE_NOP_IO_DELAY)	|	\
-			   (1UL << KVM_FEATURE_MMU_WRITE))
+			   (1UL << KVM_FEATURE_MMU_WRITE)	|	\
+			   (1UL << KVM_FEATURE_MULTICALL))
+
+struct kvm_multicall_entry
+{
+	u64 nr;
+	u64 a0;
+	u64 a1;
+	u64 a2;
+	u64 a3;
+};
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.
Index: linux-2.6-x86-kvm/include/linux/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/linux/kvm_para.h
+++ linux-2.6-x86-kvm/include/linux/kvm_para.h
@@ -13,11 +13,13 @@
 #define KVM_ENOSYS		1000
 #define KVM_EFAULT		EFAULT
 #define KVM_E2BIG		E2BIG
+#define KVM_EINVAL		EINVAL
 
 #define KVM_HC_VAPIC_POLL_IRQ		1
 #define KVM_HYPERCALL_MMU_WRITE		2
 #define KVM_HYPERCALL_FLUSH_TLB		3
 #define KVM_HYPERCALL_RELEASE_PT	4
+#define KVM_HYPERCALL_MULTICALL		5
 
 /*
  * hypercalls use architecture specific
Index: linux-2.6-x86-kvm/include/asm-x86/kvm_host.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/kvm_host.h
+++ linux-2.6-x86-kvm/include/asm-x86/kvm_host.h
@@ -320,6 +320,8 @@ struct kvm_vcpu_stat {
 	u32 fpu_reload;
 	u32 insn_emulation;
 	u32 insn_emulation_fail;
+	u32 multicall;
+	u32 multicall_nr;
 };
 
 struct descriptor_table {

-- 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-01-30 21:26 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-30 21:26 [patch 0/4] KVM paravirt MMU updates Marcelo Tosatti
2008-01-30 21:26 ` [patch 1/4] KVM: basic paravirt support Marcelo Tosatti
2008-01-30 21:26 ` [patch 2/4] KVM: hypercall based pte updates and TLB flushes Marcelo Tosatti
2008-01-30 21:26 ` [patch 3/4] paravirt: set_access_flags/set_wrprotect should use paravirt interface Marcelo Tosatti
2008-01-30 21:26 ` [patch 4/4] KVM: hypercall batching Marcelo Tosatti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox