[patch 2/5] KVM: hypercall based pte updates and TLB flushes (v2)

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Marcelo Tosatti <mtosatti@redhat.com>
To: Avi Kivity <avi@qumranet.com>
Cc: kvm-devel@lists.sourceforge.net, Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 2/5] KVM: hypercall based pte updates and TLB flushes (v2)
Date: Wed, 20 Feb 2008 14:47:22 -0500	[thread overview]
Message-ID: <20080220195019.634096711@harmony.lab.boston.redhat.com> (raw)
In-Reply-To: 20080220194720.750258362@harmony.lab.boston.redhat.com

[-- Attachment #1: kvm-mmu-write --]
[-- Type: text/plain, Size: 8871 bytes --]

Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.

Don't report the feature if two dimensional paging is enabled.

v1->v2:
- guest passes physical destination addr, which is cheaper than doing v->p
translation in the host.
- infer size of pte from guest mode

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>


Index: kvm.paravirt2/arch/x86/kernel/kvm.c
===================================================================
--- kvm.paravirt2.orig/arch/x86/kernel/kvm.c
+++ kvm.paravirt2/arch/x86/kernel/kvm.c
@@ -33,6 +33,91 @@ static void kvm_io_delay(void)
 {
 }
 
+static void kvm_mmu_write(void *dest, const void *src, size_t size)
+{
+	const uint8_t *p = src;
+	unsigned long a0 = *(unsigned long *)p;
+	unsigned long a1 = 0;
+
+#ifdef CONFIG_X86_32
+	size >>= 2;
+	if (size == 2)
+		a1 = *(u32 *)&p[4];
+#endif
+	kvm_hypercall3(KVM_HYPERCALL_MMU_WRITE, (unsigned long)__pa(dest), a0,
+			a1);
+}
+
+/*
+ * We only need to hook operations that are MMU writes.  We hook these so that
+ * we can use lazy MMU mode to batch these operations.  We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing of batched writes.
+ */
+static void kvm_set_pte(pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
+			   pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+	kvm_mmu_write(pmdp, &pmd, sizeof(pmd));
+}
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_pte_clear(struct mm_struct *mm,
+			  unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = __pte(0);
+	kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_pmd_clear(pmd_t *pmdp)
+{
+	pmd_t pmd = __pmd(0);
+	kvm_mmu_write(pmdp, &pmd, sizeof(pmd));
+}
+#endif
+
+static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	kvm_mmu_write(pgdp, &pgd, sizeof(pgd));
+}
+
+static void kvm_set_pud(pud_t *pudp, pud_t pud)
+{
+	kvm_mmu_write(pudp, &pud, sizeof(pud));
+}
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static void kvm_flush_tlb(void)
+{
+	kvm_hypercall0(KVM_HYPERCALL_FLUSH_TLB);
+}
+
+static void kvm_release_pt(u32 pfn)
+{
+	kvm_hypercall1(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT);
+}
+
 static void paravirt_ops_setup(void)
 {
 	pv_info.name = "KVM";
@@ -41,6 +126,24 @@ static void paravirt_ops_setup(void)
 	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
 		pv_cpu_ops.io_delay = kvm_io_delay;
 
+	if (kvm_para_has_feature(KVM_FEATURE_MMU_WRITE)) {
+		pv_mmu_ops.set_pte = kvm_set_pte;
+		pv_mmu_ops.set_pte_at = kvm_set_pte_at;
+		pv_mmu_ops.set_pmd = kvm_set_pmd;
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+		pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+		pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+		pv_mmu_ops.pte_clear = kvm_pte_clear;
+		pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+#endif
+		pv_mmu_ops.set_pud = kvm_set_pud;
+		pv_mmu_ops.set_pgd = kvm_set_pgd;
+#endif
+		pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
+		pv_mmu_ops.release_pt = kvm_release_pt;
+		pv_mmu_ops.release_pd = kvm_release_pt;
+	}
 }
 
 void __init kvm_guest_init(void)
Index: kvm.paravirt2/arch/x86/kvm/mmu.c
===================================================================
--- kvm.paravirt2.orig/arch/x86/kvm/mmu.c
+++ kvm.paravirt2/arch/x86/kvm/mmu.c
@@ -39,7 +39,7 @@
  * 2. while doing 1. it walks guest-physical to host-physical
  * If the hardware supports that we don't need to do shadow paging.
  */
-static bool tdp_enabled = false;
+bool tdp_enabled = false;
 
 #undef MMU_DEBUG
 
@@ -288,7 +288,7 @@ static void mmu_free_memory_cache_page(s
 		free_page((unsigned long)mc->objects[--mc->nobjs]);
 }
 
-static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 {
 	int r;
 
@@ -857,7 +857,7 @@ static int kvm_mmu_unprotect_page(struct
 	return r;
 }
 
-static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
+void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_mmu_page *sp;
 
Index: kvm.paravirt2/arch/x86/kvm/mmu.h
===================================================================
--- kvm.paravirt2.orig/arch/x86/kvm/mmu.h
+++ kvm.paravirt2/arch/x86/kvm/mmu.h
@@ -47,4 +47,7 @@ static inline int is_paging(struct kvm_v
 	return vcpu->arch.cr0 & X86_CR0_PG;
 }
 
+void mmu_unshadow(struct kvm *kvm, gfn_t gfn);
+int mmu_topup_memory_caches(struct kvm_vcpu *vcpu);
+
 #endif
Index: kvm.paravirt2/arch/x86/kvm/x86.c
===================================================================
--- kvm.paravirt2.orig/arch/x86/kvm/x86.c
+++ kvm.paravirt2/arch/x86/kvm/x86.c
@@ -856,6 +856,8 @@ long kvm_arch_dev_ioctl(struct file *fil
 	}
 	case KVM_GET_PARA_FEATURES: {
 		__u32 para_features = KVM_PARA_FEATURES;
+		if (tdp_enabled)
+			para_features &= ~(1UL << KVM_FEATURE_MMU_WRITE);
 
 		r = -EFAULT;
 		if (copy_to_user(argp, &para_features, sizeof para_features))
@@ -2362,6 +2364,45 @@ int kvm_emulate_halt(struct kvm_vcpu *vc
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
+static int kvm_hypercall_mmu_write(struct kvm_vcpu *vcpu, gpa_t addr,
+				   unsigned long a1, unsigned long a2)
+{
+	u64 value;
+	int bytes;
+
+	if (mmu_topup_memory_caches(vcpu))
+		return -KVM_EFAULT;
+
+	bytes = 8;
+	value = a1;
+
+	if (!is_long_mode(vcpu)) {
+		if (is_pae(vcpu))
+			value = (u64)a2 << 32 | a1;
+		else
+			bytes = 4;
+	}
+
+	if (!emulator_write_phys(vcpu, addr, &value, bytes))
+		return -KVM_EFAULT;
+
+	return 0;
+}
+
+static int kvm_hypercall_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	kvm_x86_ops->tlb_flush(vcpu);
+	return 0;
+}
+
+static int kvm_hypercall_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
+{
+	spin_lock(&vcpu->kvm->mmu_lock);
+	mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	return 0;
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
@@ -2386,6 +2427,15 @@ int kvm_emulate_hypercall(struct kvm_vcp
 	case KVM_HC_VAPIC_POLL_IRQ:
 		ret = 0;
 		break;
+	case KVM_HYPERCALL_MMU_WRITE:
+		ret = kvm_hypercall_mmu_write(vcpu, a0, a1, a2);
+		break;
+	case KVM_HYPERCALL_FLUSH_TLB:
+		ret = kvm_hypercall_flush_tlb(vcpu);
+		break;
+	case KVM_HYPERCALL_RELEASE_PT:
+		ret = kvm_hypercall_release_pt(vcpu, a0);
+		break;
 	default:
 		ret = -KVM_ENOSYS;
 		break;
Index: kvm.paravirt2/include/asm-x86/kvm_para.h
===================================================================
--- kvm.paravirt2.orig/include/asm-x86/kvm_para.h
+++ kvm.paravirt2/include/asm-x86/kvm_para.h
@@ -12,6 +12,7 @@
 #define KVM_CPUID_FEATURES	0x40000001
 #define KVM_FEATURE_CLOCKSOURCE		0
 #define KVM_FEATURE_NOP_IO_DELAY	1
+#define KVM_FEATURE_MMU_WRITE		2
 
 #define MSR_KVM_WALL_CLOCK  0x11
 #define MSR_KVM_SYSTEM_TIME 0x12
@@ -41,7 +42,8 @@ extern void kvmclock_init(void);
 
 
 #define KVM_PARA_FEATURES ((1UL << KVM_FEATURE_NOP_IO_DELAY)	|	\
-			   (1UL << KVM_FEATURE_CLOCKSOURCE))
+			   (1UL << KVM_FEATURE_CLOCKSOURCE)	|	\
+			   (1UL << KVM_FEATURE_MMU_WRITE))
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.
Index: kvm.paravirt2/include/linux/kvm_para.h
===================================================================
--- kvm.paravirt2.orig/include/linux/kvm_para.h
+++ kvm.paravirt2/include/linux/kvm_para.h
@@ -11,8 +11,13 @@
 
 /* Return values for hypercalls */
 #define KVM_ENOSYS		1000
+#define KVM_EFAULT		EFAULT
+#define KVM_E2BIG		E2BIG
 
-#define KVM_HC_VAPIC_POLL_IRQ            1
+#define KVM_HC_VAPIC_POLL_IRQ		1
+#define KVM_HYPERCALL_MMU_WRITE		2
+#define KVM_HYPERCALL_FLUSH_TLB		3
+#define KVM_HYPERCALL_RELEASE_PT	4
 
 /*
  * hypercalls use architecture specific
Index: kvm.paravirt2/include/asm-x86/kvm_host.h
===================================================================
--- kvm.paravirt2.orig/include/asm-x86/kvm_host.h
+++ kvm.paravirt2/include/asm-x86/kvm_host.h
@@ -419,6 +419,8 @@ void kvm_mmu_change_mmu_pages(struct kvm
 
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 
+extern bool tdp_enabled;
+
 enum emulation_result {
 	EMULATE_DONE,       /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with mmio request */

-- 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

next prev parent reply	other threads:[~2008-02-20 19:47 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-02-20 19:47 [patch 0/5] KVM paravirt MMU updates and cr3 caching (v2) Marcelo Tosatti
2008-02-20 19:47 ` [patch 1/5] KVM: add basic paravirt support (v2) Marcelo Tosatti
2008-02-21 15:38   ` Avi Kivity
2008-02-21 15:55     ` Marcelo Tosatti
2008-02-21 18:31       ` Avi Kivity
2008-02-20 19:47 ` Marcelo Tosatti [this message]
2008-02-21 15:43   ` [patch 2/5] KVM: hypercall based pte updates and TLB flushes (v2) Avi Kivity
2008-02-20 19:47 ` [patch 3/5] KVM: hypercall batching (v2) Marcelo Tosatti
2008-02-21 15:52   ` Avi Kivity
2008-02-21 18:05     ` Marcelo Tosatti
2008-02-21 18:30       ` Avi Kivity
2008-02-21 19:31         ` Marcelo Tosatti
2008-02-22  7:10           ` Avi Kivity
2008-02-20 19:47 ` [patch 4/5] KVM: ignore zapped root pagetables (v2) Marcelo Tosatti
2008-02-21 15:57   ` Avi Kivity
2008-02-20 19:47 ` [patch 5/5] KVM: VMX cr3 cache support (v2) Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080220195019.634096711@harmony.lab.boston.redhat.com \
    --to=mtosatti@redhat.com \
    --cc=avi@qumranet.com \
    --cc=kvm-devel@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox