* [patch 0/4] KVM paravirt MMU updates
@ 2008-01-30 21:26 Marcelo Tosatti
2008-01-30 21:26 ` [patch 1/4] KVM: basic paravirt support Marcelo Tosatti
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2008-01-30 21:26 UTC (permalink / raw)
To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
The following patchset, based on an earlier version by Anthony Liguori, adds
paravirt_ops support for KVM guests enabling hypercall based pte updates
and hypercall batching.
make -j4 compilation of a recent 2.6 kernel tree (two runs):
stock:
826.34user 573.75system 9:07.13elapsed 255%CPU (0avgtext+0avgdata 0maxresident)k0inputs+0outputs (30major+26687895minor)pagefaults 0swaps
user 608.32system 9:01.11elapsed 275%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+26518405minor)pagefaults 0swaps
hypercall pte:
816.85user 463.26system 8:15.57elapsed 258%CPU (0avgtext+0avgdata 0maxresident)k0inputs+0outputs (0major+26518853minor)pagefaults 0swaps
820.17user 459.96system 8:14.30elapsed 258%CPU (0avgtext+0avgdata 0maxresident)k0inputs+0outputs (0major+26522238minor)pagefaults 0swaps
About 8.5% faster.
--
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 5+ messages in thread
* [patch 1/4] KVM: basic paravirt support
2008-01-30 21:26 [patch 0/4] KVM paravirt MMU updates Marcelo Tosatti
@ 2008-01-30 21:26 ` Marcelo Tosatti
2008-01-30 21:26 ` [patch 2/4] KVM: hypercall based pte updates and TLB flushes Marcelo Tosatti
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2008-01-30 21:26 UTC (permalink / raw)
To: Anthony Liguori
Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti
[-- Attachment #1: kvm-paravirt-core --]
[-- Type: text/plain, Size: 7119 bytes --]
Add basic KVM paravirt support. Avoid vm-exits on IO delays.
Add KVM_GET_PARA_FEATURES ioctl so paravirt features can be reported via
cpuid.
Signed-off-by: Marcelo Tosatti <mtosatti-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Index: linux-2.6-x86-kvm/arch/x86/Kconfig
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/Kconfig
+++ linux-2.6-x86-kvm/arch/x86/Kconfig
@@ -357,6 +356,14 @@ config VMI
at the moment), by linking the kernel to a GPL-ed ROM module
provided by the hypervisor.
+config KVM_GUEST
+ bool "KVM Guest support"
+ select PARAVIRT
+ depends on !(X86_VISWS || X86_VOYAGER)
+ help
+ This option enables various optimizations for running under the KVM
+ hypervisor.
+
source "arch/x86/lguest/Kconfig"
endif
Index: linux-2.6-x86-kvm/arch/x86/kernel/Makefile
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/Makefile
+++ linux-2.6-x86-kvm/arch/x86/kernel/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_K8_NB) += k8.o
obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
+obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
obj-y += pcspeaker.o
Index: linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
===================================================================
--- /dev/null
+++ linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
@@ -0,0 +1,52 @@
+/*
+ * KVM paravirt_ops implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
+ * Copyright IBM Corporation, 2007
+ * Authors: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/cpu.h>
+#include <linux/mm.h>
+
+/*
+ * No need for any "IO delay" on KVM
+ */
+static void kvm_io_delay(void)
+{
+}
+
+static void paravirt_ops_setup(void)
+{
+ pv_info.name = "KVM";
+ pv_info.paravirt_enabled = 1;
+
+ if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
+ pv_cpu_ops.io_delay = kvm_io_delay;
+
+}
+
+void __init kvm_guest_init(void)
+{
+ if (!kvm_para_available())
+ return;
+
+ paravirt_ops_setup();
+}
Index: linux-2.6-x86-kvm/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6-x86-kvm/arch/x86/kernel/setup_32.c
@@ -46,6 +46,7 @@
#include <linux/pfn.h>
#include <linux/pci.h>
#include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
#include <video/edid.h>
@@ -772,6 +773,7 @@ void __init setup_arch(char **cmdline_p)
*/
vmi_init();
#endif
+ kvm_guest_init();
/*
* NOTE: before this point _nobody_ is allowed to allocate
Index: linux-2.6-x86-kvm/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/setup_64.c
+++ linux-2.6-x86-kvm/arch/x86/kernel/setup_64.c
@@ -42,6 +42,7 @@
#include <linux/ctype.h>
#include <linux/uaccess.h>
#include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
@@ -506,6 +507,8 @@ void __init setup_arch(char **cmdline_p)
init_apic_mappings();
ioapic_init_mappings();
+ kvm_guest_init();
+
/*
* We trust e820 completely. No explicit ROM probing in memory.
*/
Index: linux-2.6-x86-kvm/arch/x86/kvm/x86.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/x86.c
+++ linux-2.6-x86-kvm/arch/x86/kvm/x86.c
@@ -680,6 +680,7 @@ int kvm_dev_ioctl_check_extension(long e
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
+ case KVM_CAP_PARA_FEATURES:
r = 1;
break;
case KVM_CAP_VAPIC:
@@ -727,6 +728,15 @@ long kvm_arch_dev_ioctl(struct file *fil
r = 0;
break;
}
+ case KVM_GET_PARA_FEATURES: {
+ __u32 para_features = KVM_PARA_FEATURES;
+
+ r = -EFAULT;
+ if (copy_to_user(argp, ¶_features, sizeof para_features))
+ goto out;
+ r = 0;
+ break;
+ }
default:
r = -EINVAL;
}
Index: linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/kvm_para.h
+++ linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
@@ -5,6 +5,7 @@
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
+#define KVM_FEATURE_NOP_IO_DELAY 0
/* This CPUID returns a feature bitmap in eax. Before enabling a particular
* paravirtualization, the appropriate feature bit should be checked.
@@ -14,6 +15,8 @@
#ifdef __KERNEL__
#include <asm/processor.h>
+#define KVM_PARA_FEATURES (1UL << KVM_FEATURE_NOP_IO_DELAY)
+
/* This instruction is vmcall. On non-VT architectures, it will generate a
* trap that we will then rewrite to the appropriate instruction.
*/
Index: linux-2.6-x86-kvm/include/linux/kvm.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/linux/kvm.h
+++ linux-2.6-x86-kvm/include/linux/kvm.h
@@ -221,6 +221,7 @@ struct kvm_vapic_addr {
* Get size for mmap(vcpu_fd)
*/
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
+#define KVM_GET_PARA_FEATURES _IOR(KVMIO, 0x05, __u32)
/*
* Extension capability list.
@@ -232,6 +233,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_SET_TSS_ADDR 4
#define KVM_CAP_EXT_CPUID 5
#define KVM_CAP_VAPIC 6
+#define KVM_CAP_PARA_FEATURES 7
/*
* ioctls for VM fds
Index: linux-2.6-x86-kvm/include/linux/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/linux/kvm_para.h
+++ linux-2.6-x86-kvm/include/linux/kvm_para.h
@@ -20,6 +20,12 @@
#include <asm/kvm_para.h>
#ifdef __KERNEL__
+#ifdef CONFIG_KVM_GUEST
+void __init kvm_guest_init(void);
+#else
+#define kvm_guest_init() do { } while (0)
+#endif
+
static inline int kvm_para_has_feature(unsigned int feature)
{
if (kvm_arch_para_features() & (1UL << feature))
--
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 5+ messages in thread
* [patch 2/4] KVM: hypercall based pte updates and TLB flushes
2008-01-30 21:26 [patch 0/4] KVM paravirt MMU updates Marcelo Tosatti
2008-01-30 21:26 ` [patch 1/4] KVM: basic paravirt support Marcelo Tosatti
@ 2008-01-30 21:26 ` Marcelo Tosatti
2008-01-30 21:26 ` [patch 3/4] paravirt: set_access_flags/set_wrprotect should use paravirt interface Marcelo Tosatti
2008-01-30 21:26 ` [patch 4/4] KVM: hypercall batching Marcelo Tosatti
3 siblings, 0 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2008-01-30 21:26 UTC (permalink / raw)
To: Anthony Liguori
Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti
[-- Attachment #1: kvm-mmu-write --]
[-- Type: text/plain, Size: 8433 bytes --]
Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.
Signed-off-by: Marcelo Tosatti <mtosatti-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Index: linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/kvm.c
+++ linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
@@ -33,6 +33,104 @@ static void kvm_io_delay(void)
{
}
+static void kvm_mmu_write(void *dest, const void *src, size_t size)
+{
+ const uint8_t *p = src;
+ unsigned long a0 = *(unsigned long *)p;
+ unsigned long a1 = 0;
+
+ size >>= 2;
+#ifdef CONFIG_X86_32
+ if (size == 2)
+ a1 = *(u32 *)&p[4];
+#endif
+ kvm_hypercall4(KVM_HYPERCALL_MMU_WRITE, (unsigned long)dest, size, a0,
+ a1);
+}
+
+/*
+ * We only need to hook operations that are MMU writes. We hook these so that
+ * we can use lazy MMU mode to batch these operations. We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing of batched writes.
+ */
+static void kvm_set_pte(pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+ kvm_mmu_write(pmdp, &pmd, sizeof(pmd));
+}
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t pte = __pte(0);
+ kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_pmd_clear(pmd_t *pmdp)
+{
+ pmd_t pmd = __pmd(0);
+ kvm_mmu_write(pmdp, &pmd, sizeof(pmd));
+}
+#endif
+
+static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ kvm_mmu_write(pgdp, &pgd, sizeof(pgd));
+}
+
+static void kvm_pgd_clear(pgd_t *pgdp)
+{
+ pgd_t pgd = __pgd(0);
+ kvm_mmu_write(pgdp, &pgd, sizeof(pgd));
+}
+
+static void kvm_set_pud(pud_t *pudp, pud_t pud)
+{
+ kvm_mmu_write(pudp, &pud, sizeof(pud));
+}
+#if PAGETABLE_LEVELS == 4
+static void kvm_pud_clear(pud_t *pudp)
+{
+ pud_t pud = __pud(0);
+ kvm_mmu_write(pudp, &pud, sizeof(pud));
+}
+#endif
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static void kvm_flush_tlb(void)
+{
+ kvm_hypercall0(KVM_HYPERCALL_FLUSH_TLB);
+}
+
+static void kvm_release_pt(u32 pfn)
+{
+ kvm_hypercall1(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT);
+}
+
static void paravirt_ops_setup(void)
{
pv_info.name = "KVM";
@@ -41,6 +139,28 @@ static void paravirt_ops_setup(void)
if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
pv_cpu_ops.io_delay = kvm_io_delay;
+ if (kvm_para_has_feature(KVM_FEATURE_MMU_WRITE)) {
+ pv_mmu_ops.set_pte = kvm_set_pte;
+ pv_mmu_ops.set_pte_at = kvm_set_pte_at;
+ pv_mmu_ops.set_pmd = kvm_set_pmd;
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+ pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+ pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+ pv_mmu_ops.pte_clear = kvm_pte_clear;
+ pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+#endif
+ pv_mmu_ops.set_pud = kvm_set_pud;
+ pv_mmu_ops.set_pgd = kvm_set_pgd;
+ pv_mmu_ops.pgd_clear = kvm_pgd_clear;
+#if PAGETABLE_LEVELS == 4
+ pv_mmu_ops.pud_clear = kvm_pud_clear;
+#endif
+#endif
+ pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
+ pv_mmu_ops.release_pt = kvm_release_pt;
+ pv_mmu_ops.release_pd = kvm_release_pt;
+ }
}
void __init kvm_guest_init(void)
Index: linux-2.6-x86-kvm/arch/x86/kvm/mmu.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/mmu.c
+++ linux-2.6-x86-kvm/arch/x86/kvm/mmu.c
@@ -287,7 +287,7 @@ static void mmu_free_memory_cache_page(s
free_page((unsigned long)mc->objects[--mc->nobjs]);
}
-static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
{
int r;
@@ -856,7 +856,7 @@ static int kvm_mmu_unprotect_page(struct
return r;
}
-static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
+void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
{
struct kvm_mmu_page *sp;
Index: linux-2.6-x86-kvm/arch/x86/kvm/mmu.h
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/mmu.h
+++ linux-2.6-x86-kvm/arch/x86/kvm/mmu.h
@@ -41,4 +41,7 @@ static inline int is_paging(struct kvm_v
return vcpu->arch.cr0 & X86_CR0_PG;
}
+void mmu_unshadow(struct kvm *kvm, gfn_t gfn);
+int mmu_topup_memory_caches(struct kvm_vcpu *vcpu);
+
#endif
Index: linux-2.6-x86-kvm/arch/x86/kvm/x86.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/x86.c
+++ linux-2.6-x86-kvm/arch/x86/kvm/x86.c
@@ -2250,6 +2250,52 @@ int kvm_emulate_halt(struct kvm_vcpu *vc
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
+static int kvm_hypercall_mmu_write(struct kvm_vcpu *vcpu, gva_t addr,
+ unsigned long size, unsigned long a0,
+ unsigned long a1)
+{
+ gpa_t gpa;
+ u64 value;
+
+ if (mmu_topup_memory_caches(vcpu))
+ return -KVM_EFAULT;
+
+ down_read(¤t->mm->mmap_sem);
+ gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ up_read(¤t->mm->mmap_sem);
+
+ if (gpa == UNMAPPED_GVA)
+ return -KVM_EFAULT;
+ if (size == 1) {
+ if (!emulator_write_phys(vcpu, gpa, &a0, sizeof(a0)))
+ return -KVM_EFAULT;
+ } else if (size == 2) {
+ if (!is_long_mode(vcpu) && is_pae(vcpu))
+ value = (u64)a1 << 32 | a0;
+ else
+ value = a0;
+ if (!emulator_write_phys(vcpu, gpa, &value, sizeof(value)))
+ return -KVM_EFAULT;
+ } else
+ return -KVM_E2BIG;
+
+ return 0;
+}
+
+static int kvm_hypercall_flush_tlb(struct kvm_vcpu *vcpu)
+{
+ kvm_x86_ops->tlb_flush(vcpu);
+ return 0;
+}
+
+static int kvm_hypercall_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
+{
+ spin_lock(&vcpu->kvm->mmu_lock);
+ mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ return 0;
+}
+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
@@ -2274,6 +2320,15 @@ int kvm_emulate_hypercall(struct kvm_vcp
case KVM_HC_VAPIC_POLL_IRQ:
ret = 0;
break;
+ case KVM_HYPERCALL_MMU_WRITE:
+ ret = kvm_hypercall_mmu_write(vcpu, a0, a1, a2, a3);
+ break;
+ case KVM_HYPERCALL_FLUSH_TLB:
+ ret = kvm_hypercall_flush_tlb(vcpu);
+ break;
+ case KVM_HYPERCALL_RELEASE_PT:
+ ret = kvm_hypercall_release_pt(vcpu, a0);
+ break;
default:
ret = -KVM_ENOSYS;
break;
Index: linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/kvm_para.h
+++ linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
@@ -6,6 +6,7 @@
*/
#define KVM_CPUID_SIGNATURE 0x40000000
#define KVM_FEATURE_NOP_IO_DELAY 0
+#define KVM_FEATURE_MMU_WRITE 1
/* This CPUID returns a feature bitmap in eax. Before enabling a particular
* paravirtualization, the appropriate feature bit should be checked.
@@ -15,7 +16,8 @@
#ifdef __KERNEL__
#include <asm/processor.h>
-#define KVM_PARA_FEATURES (1UL << KVM_FEATURE_NOP_IO_DELAY)
+#define KVM_PARA_FEATURES ((1UL << KVM_FEATURE_NOP_IO_DELAY) | \
+ (1UL << KVM_FEATURE_MMU_WRITE))
/* This instruction is vmcall. On non-VT architectures, it will generate a
* trap that we will then rewrite to the appropriate instruction.
Index: linux-2.6-x86-kvm/include/linux/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/linux/kvm_para.h
+++ linux-2.6-x86-kvm/include/linux/kvm_para.h
@@ -11,8 +11,13 @@
/* Return values for hypercalls */
#define KVM_ENOSYS 1000
+#define KVM_EFAULT EFAULT
+#define KVM_E2BIG E2BIG
-#define KVM_HC_VAPIC_POLL_IRQ 1
+#define KVM_HC_VAPIC_POLL_IRQ 1
+#define KVM_HYPERCALL_MMU_WRITE 2
+#define KVM_HYPERCALL_FLUSH_TLB 3
+#define KVM_HYPERCALL_RELEASE_PT 4
/*
* hypercalls use architecture specific
--
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 5+ messages in thread
* [patch 3/4] paravirt: set_access_flags/set_wrprotect should use paravirt interface
2008-01-30 21:26 [patch 0/4] KVM paravirt MMU updates Marcelo Tosatti
2008-01-30 21:26 ` [patch 1/4] KVM: basic paravirt support Marcelo Tosatti
2008-01-30 21:26 ` [patch 2/4] KVM: hypercall based pte updates and TLB flushes Marcelo Tosatti
@ 2008-01-30 21:26 ` Marcelo Tosatti
2008-01-30 21:26 ` [patch 4/4] KVM: hypercall batching Marcelo Tosatti
3 siblings, 0 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2008-01-30 21:26 UTC (permalink / raw)
To: Anthony Liguori
Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti
[-- Attachment #1: pte-access-paravirt --]
[-- Type: text/plain, Size: 2480 bytes --]
ptep_set_access_flags and ptep_set_wrprotect are doing direct pte
updates ignoring the paravirt interface.
The wrprotect change is especially important since it allows full
batching of fork() on COW mappings.
There are still a few PTE update interfaces bypassing paravirt, such as
ptep_get_and_clear_full and ptep_get_and_clear.
Signed-off-by: Marcelo Tosatti <mtosatti-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Index: linux-2.6-x86-kvm/include/asm-x86/paravirt.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/paravirt.h
+++ linux-2.6-x86-kvm/include/asm-x86/paravirt.h
@@ -1004,6 +1004,13 @@ static inline void set_pte(pte_t *ptep,
pte.pte);
}
+static inline void pte_clear_bit(unsigned int bit, pte_t *ptep)
+{
+ pte_t pte = *ptep;
+ clear_bit(bit, (unsigned long *)&pte.pte);
+ set_pte(ptep, pte);
+}
+
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
Index: linux-2.6-x86-kvm/include/asm-x86/pgtable.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/pgtable.h
+++ linux-2.6-x86-kvm/include/asm-x86/pgtable.h
@@ -227,6 +227,8 @@ void native_pagetable_setup_done(pgd_t *
#define pte_update(mm, addr, ptep) do { } while (0)
#define pte_update_defer(mm, addr, ptep) do { } while (0)
+#define pte_clear_bit(bit, ptep) clear_bit(bit, (unsigned long *)&ptep->pte)
+
static inline void paravirt_pagetable_setup_start(pgd_t *base)
{
native_pagetable_setup_start(base);
@@ -302,7 +304,7 @@ static inline void native_set_pte_at(str
({ \
int __changed = !pte_same(*(ptep), entry); \
if (__changed && dirty) { \
- *ptep = entry; \
+ set_pte(ptep, entry); \
pte_update_defer((vma)->vm_mm, (address), (ptep)); \
flush_tlb_page(vma, address); \
} \
@@ -357,7 +359,7 @@ static inline pte_t ptep_get_and_clear_f
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
+ pte_clear_bit(_PAGE_BIT_RW, ptep);
pte_update(mm, addr, ptep);
}
--
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 5+ messages in thread
* [patch 4/4] KVM: hypercall batching
2008-01-30 21:26 [patch 0/4] KVM paravirt MMU updates Marcelo Tosatti
` (2 preceding siblings ...)
2008-01-30 21:26 ` [patch 3/4] paravirt: set_access_flags/set_wrprotect should use paravirt interface Marcelo Tosatti
@ 2008-01-30 21:26 ` Marcelo Tosatti
3 siblings, 0 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2008-01-30 21:26 UTC (permalink / raw)
To: Anthony Liguori
Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Marcelo Tosatti
[-- Attachment #1: kvm-multicall --]
[-- Type: text/plain, Size: 8478 bytes --]
Batch pte updates and tlb flushes in lazy MMU mode.
Signed-off-by: Marcelo Tosatti <mtosatti-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Index: linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/kvm.c
+++ linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
@@ -25,6 +25,74 @@
#include <linux/kvm_para.h>
#include <linux/cpu.h>
#include <linux/mm.h>
+#include <linux/hardirq.h>
+
+#define MAX_MULTICALL_NR (PAGE_SIZE / sizeof(struct kvm_multicall_entry))
+
+struct kvm_para_state {
+ struct kvm_multicall_entry queue[MAX_MULTICALL_NR];
+ int queue_index;
+ enum paravirt_lazy_mode mode;
+};
+
+static DEFINE_PER_CPU(struct kvm_para_state, para_state);
+
+static int can_defer_hypercall(struct kvm_para_state *state, unsigned int nr)
+{
+ if (state->mode == PARAVIRT_LAZY_MMU) {
+ switch (nr) {
+ case KVM_HYPERCALL_MMU_WRITE:
+ case KVM_HYPERCALL_FLUSH_TLB:
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void hypercall_queue_flush(struct kvm_para_state *state)
+{
+ if (state->queue_index) {
+ kvm_hypercall2(KVM_HYPERCALL_MULTICALL, __pa(&state->queue),
+ state->queue_index);
+ state->queue_index = 0;
+ }
+}
+
+static void kvm_hypercall_defer(struct kvm_para_state *state,
+ unsigned int nr,
+ unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3)
+{
+ struct kvm_multicall_entry *entry;
+
+ BUG_ON(preemptible());
+
+ if (state->queue_index == MAX_MULTICALL_NR)
+ hypercall_queue_flush(state);
+
+ entry = &state->queue[state->queue_index++];
+ entry->nr = nr;
+ entry->a0 = a0;
+ entry->a1 = a1;
+ entry->a2 = a2;
+ entry->a3 = a3;
+}
+
+static long kvm_hypercall(unsigned int nr, unsigned long a0,
+ unsigned long a1, unsigned long a2,
+ unsigned long a3)
+{
+ struct kvm_para_state *state = &get_cpu_var(para_state);
+ long ret = 0;
+
+ if (can_defer_hypercall(state, nr))
+ kvm_hypercall_defer(state, nr, a0, a1, a2, a3);
+ else
+ ret = kvm_hypercall4(nr, a0, a1, a2, a3);
+
+ put_cpu_var(para_state);
+ return ret;
+}
/*
* No need for any "IO delay" on KVM
@@ -44,7 +112,7 @@ static void kvm_mmu_write(void *dest, co
if (size == 2)
a1 = *(u32 *)&p[4];
#endif
- kvm_hypercall4(KVM_HYPERCALL_MMU_WRITE, (unsigned long)dest, size, a0,
+ kvm_hypercall(KVM_HYPERCALL_MMU_WRITE, (unsigned long)dest, size, a0,
a1);
}
@@ -123,12 +191,31 @@ static void kvm_pud_clear(pud_t *pudp)
static void kvm_flush_tlb(void)
{
- kvm_hypercall0(KVM_HYPERCALL_FLUSH_TLB);
+ kvm_hypercall(KVM_HYPERCALL_FLUSH_TLB, 0, 0, 0, 0);
}
static void kvm_release_pt(u32 pfn)
{
- kvm_hypercall1(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT);
+ kvm_hypercall(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT, 0, 0, 0);
+}
+
+static void kvm_enter_lazy_mmu(void)
+{
+ struct kvm_para_state *state
+ = &per_cpu(para_state, smp_processor_id());
+
+ paravirt_enter_lazy_mmu();
+ state->mode = paravirt_get_lazy_mode();
+}
+
+static void kvm_leave_lazy_mmu(void)
+{
+ struct kvm_para_state *state
+ = &per_cpu(para_state, smp_processor_id());
+
+ hypercall_queue_flush(state);
+ paravirt_leave_lazy(paravirt_get_lazy_mode());
+ state->mode = paravirt_get_lazy_mode();
}
static void paravirt_ops_setup(void)
@@ -161,6 +248,11 @@ static void paravirt_ops_setup(void)
pv_mmu_ops.release_pt = kvm_release_pt;
pv_mmu_ops.release_pd = kvm_release_pt;
}
+
+ if (kvm_para_has_feature(KVM_FEATURE_MULTICALL)) {
+ pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
+ pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
+ }
}
void __init kvm_guest_init(void)
Index: linux-2.6-x86-kvm/arch/x86/kvm/x86.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/x86.c
+++ linux-2.6-x86-kvm/arch/x86/kvm/x86.c
@@ -67,6 +67,8 @@ struct kvm_stats_debugfs_item debugfs_en
{ "fpu_reload", VCPU_STAT(fpu_reload) },
{ "insn_emulation", VCPU_STAT(insn_emulation) },
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
+ { "multicall", VCPU_STAT(multicall) },
+ { "multicall_nr", VCPU_STAT(multicall_nr) },
{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -2296,6 +2298,52 @@ static int kvm_hypercall_release_pt(stru
return 0;
}
+static int dispatch_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
+ unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3)
+{
+ switch (nr) {
+ case KVM_HC_VAPIC_POLL_IRQ:
+ return 0;
+ case KVM_HYPERCALL_MMU_WRITE:
+ return kvm_hypercall_mmu_write(vcpu, a0, a1, a2, a3);
+ case KVM_HYPERCALL_FLUSH_TLB:
+ return kvm_hypercall_flush_tlb(vcpu);
+ case KVM_HYPERCALL_RELEASE_PT:
+ return kvm_hypercall_release_pt(vcpu, a0);
+ }
+
+ return -KVM_ENOSYS;
+}
+
+static int kvm_hypercall_multicall(struct kvm_vcpu *vcpu, gpa_t addr, u32 nents)
+{
+ int i, result = 0;
+
+ ++vcpu->stat.multicall;
+ vcpu->stat.multicall_nr += nents;
+
+ for (i = 0; i < nents; i++) {
+ struct kvm_multicall_entry mc;
+ int ret;
+
+ down_read(¤t->mm->mmap_sem);
+ ret = kvm_read_guest(vcpu->kvm, addr, &mc, sizeof(mc));
+ up_read(¤t->mm->mmap_sem);
+ if (ret)
+ return -KVM_EFAULT;
+
+ ret = dispatch_hypercall(vcpu, mc.nr, mc.a0, mc.a1, mc.a2,
+ mc.a3);
+ if (ret)
+ result = ret;
+ addr += sizeof(mc);
+ }
+ if (result < 0)
+ return -KVM_EINVAL;
+ return result;
+}
+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
@@ -2316,23 +2364,11 @@ int kvm_emulate_hypercall(struct kvm_vcp
a3 &= 0xFFFFFFFF;
}
- switch (nr) {
- case KVM_HC_VAPIC_POLL_IRQ:
- ret = 0;
- break;
- case KVM_HYPERCALL_MMU_WRITE:
- ret = kvm_hypercall_mmu_write(vcpu, a0, a1, a2, a3);
- break;
- case KVM_HYPERCALL_FLUSH_TLB:
- ret = kvm_hypercall_flush_tlb(vcpu);
- break;
- case KVM_HYPERCALL_RELEASE_PT:
- ret = kvm_hypercall_release_pt(vcpu, a0);
- break;
- default:
- ret = -KVM_ENOSYS;
- break;
- }
+ if (nr == KVM_HYPERCALL_MULTICALL)
+ ret = kvm_hypercall_multicall(vcpu, a0, a1);
+ else
+ ret = dispatch_hypercall(vcpu, nr, a0, a1, a2, a3);
+
vcpu->arch.regs[VCPU_REGS_RAX] = ret;
kvm_x86_ops->decache_regs(vcpu);
return 0;
Index: linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/kvm_para.h
+++ linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
@@ -7,6 +7,7 @@
#define KVM_CPUID_SIGNATURE 0x40000000
#define KVM_FEATURE_NOP_IO_DELAY 0
#define KVM_FEATURE_MMU_WRITE 1
+#define KVM_FEATURE_MULTICALL 2
/* This CPUID returns a feature bitmap in eax. Before enabling a particular
* paravirtualization, the appropriate feature bit should be checked.
@@ -17,7 +18,17 @@
#include <asm/processor.h>
#define KVM_PARA_FEATURES ((1UL << KVM_FEATURE_NOP_IO_DELAY) | \
- (1UL << KVM_FEATURE_MMU_WRITE))
+ (1UL << KVM_FEATURE_MMU_WRITE) | \
+ (1UL << KVM_FEATURE_MULTICALL))
+
+struct kvm_multicall_entry
+{
+ u64 nr;
+ u64 a0;
+ u64 a1;
+ u64 a2;
+ u64 a3;
+};
/* This instruction is vmcall. On non-VT architectures, it will generate a
* trap that we will then rewrite to the appropriate instruction.
Index: linux-2.6-x86-kvm/include/linux/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/linux/kvm_para.h
+++ linux-2.6-x86-kvm/include/linux/kvm_para.h
@@ -13,11 +13,13 @@
#define KVM_ENOSYS 1000
#define KVM_EFAULT EFAULT
#define KVM_E2BIG E2BIG
+#define KVM_EINVAL EINVAL
#define KVM_HC_VAPIC_POLL_IRQ 1
#define KVM_HYPERCALL_MMU_WRITE 2
#define KVM_HYPERCALL_FLUSH_TLB 3
#define KVM_HYPERCALL_RELEASE_PT 4
+#define KVM_HYPERCALL_MULTICALL 5
/*
* hypercalls use architecture specific
Index: linux-2.6-x86-kvm/include/asm-x86/kvm_host.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/kvm_host.h
+++ linux-2.6-x86-kvm/include/asm-x86/kvm_host.h
@@ -320,6 +320,8 @@ struct kvm_vcpu_stat {
u32 fpu_reload;
u32 insn_emulation;
u32 insn_emulation_fail;
+ u32 multicall;
+ u32 multicall_nr;
};
struct descriptor_table {
--
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2008-01-30 21:26 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-30 21:26 [patch 0/4] KVM paravirt MMU updates Marcelo Tosatti
2008-01-30 21:26 ` [patch 1/4] KVM: basic paravirt support Marcelo Tosatti
2008-01-30 21:26 ` [patch 2/4] KVM: hypercall based pte updates and TLB flushes Marcelo Tosatti
2008-01-30 21:26 ` [patch 3/4] paravirt: set_access_flags/set_wrprotect should use paravirt interface Marcelo Tosatti
2008-01-30 21:26 ` [patch 4/4] KVM: hypercall batching Marcelo Tosatti
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox