public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 8/8] x86: KVM guest: VMX cr3 cache support
@ 2008-03-02 16:31 Avi Kivity
  2008-03-06  9:17 ` Zhao Forrest
  0 siblings, 1 reply; 3+ messages in thread
From: Avi Kivity @ 2008-03-02 16:31 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm-devel, Marcelo Tosatti

From: Marcelo Tosatti <mtosatti@redhat.com>

Add support for the cr3 cache feature on Intel VMX CPU's. This avoids
vmexits on context switch if the cr3 value is cached in one of the
entries (currently 4 are present).

This is especially important for Xenner, where each guest syscall
involves a cr3 switch.

v1->v2:
- handle the race which happens when the guest has the cache cleared
in the middle of kvm_write_cr3 by injecting a GP and trapping it to
fallback to hypercall variant (suggested by Avi).

v2->v3:
- one ioctl per paravirt feature

v3->v4:
- switch to mmu_op

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kernel/kvm.c |  145 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 144 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8405984..30e3568 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -26,14 +26,17 @@
 #include <linux/cpu.h>
 #include <linux/mm.h>
 #include <linux/hardirq.h>
+#include <asm/tlbflush.h>
+#include <asm/asm.h>
 
 #define MMU_QUEUE_SIZE 1024
 
 struct kvm_para_state {
+	struct kvm_cr3_cache cr3_cache;
 	u8 mmu_queue[MMU_QUEUE_SIZE];
 	int mmu_queue_len;
 	enum paravirt_lazy_mode mode;
-};
+} __attribute__ ((aligned(PAGE_SIZE)));
 
 static DEFINE_PER_CPU(struct kvm_para_state, para_state);
 
@@ -85,6 +88,121 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
 	state->mmu_queue_len += len;
 }
 
+static void kvm_new_cr3(unsigned long cr3)
+{
+	struct kvm_mmu_op_set_cr3 scr3 = {
+		.header.op = KVM_MMU_OP_SET_CR3,
+		.cr3 = cr3,
+	};
+
+	kvm_mmu_op(&scr3, sizeof scr3);
+}
+
+static unsigned long __force_order;
+
+/*
+ * Special, register-to-cr3 instruction based hypercall API
+ * variant to the KVM host. This utilizes the cr3 filter capability
+ * of the hardware - if this works out then no VM exit happens,
+ * if a VM exit happens then KVM will get the virtual address too.
+ */
+static void kvm_write_cr3(unsigned long guest_cr3)
+{
+	struct kvm_para_state *para_state = &get_cpu_var(para_state);
+	struct kvm_cr3_cache *cache = &para_state->cr3_cache;
+	int idx;
+
+	/*
+	 * Check the cache (maintained by the host) for a matching
+	 * guest_cr3 => host_cr3 mapping. Use it if found:
+	 */
+	for (idx = 0; idx < cache->max_idx; idx++) {
+		if (cache->entry[idx].guest_cr3 == guest_cr3) {
+			unsigned long trap;
+
+			/*
+			 * Cache-hit: we load the cached host-CR3 value.
+			 * Fallback to hypercall variant if it raced with
+			 * the host clearing the cache after guest_cr3
+			 * comparison.
+			 */
+			__asm__ __volatile__ (
+				"    mov %2, %0\n"
+				"0:  mov %3, %%cr3\n"
+				"1:\n"
+				".section .fixup,\"ax\"\n"
+				"2:  mov %1, %0\n"
+				"    jmp 1b\n"
+				".previous\n"
+				_ASM_EXTABLE(0b, 2b)
+				: "=&r" (trap)
+				: "n" (1UL), "n" (0UL),
+				  "b" (cache->entry[idx].host_cr3),
+				  "m" (__force_order));
+			if (!trap)
+				goto out;
+			break;
+		}
+	}
+
+	/*
+	 * Cache-miss. Tell the host the new cr3 via hypercall (to avoid
+	 * aliasing problems with a cached host_cr3 == guest_cr3).
+	 */
+	kvm_new_cr3(guest_cr3);
+out:
+	put_cpu_var(para_state);
+}
+
+/*
+ * Avoid the VM exit upon cr3 load by using the cached
+ * ->active_mm->pgd value:
+ */
+static void kvm_flush_tlb_user(void)
+{
+	kvm_write_cr3(__pa(current->active_mm->pgd));
+}
+
+/*
+ * Disable global pages, do a flush, then enable global pages:
+ */
+static void kvm_flush_tlb_kernel(void)
+{
+	unsigned long orig_cr4 = read_cr4();
+
+	write_cr4(orig_cr4 & ~X86_CR4_PGE);
+	kvm_flush_tlb_user();
+	write_cr4(orig_cr4);
+}
+
+static void register_cr3_cache(void *cache)
+{
+	struct kvm_para_state *state;
+
+	state = &per_cpu(para_state, raw_smp_processor_id());
+	wrmsrl(KVM_MSR_SET_CR3_CACHE, __pa(&state->cr3_cache));
+}
+
+static unsigned __init kvm_patch(u8 type, u16 clobbers, void *ibuf,
+				 unsigned long addr, unsigned len)
+{
+	switch (type) {
+	case PARAVIRT_PATCH(pv_mmu_ops.write_cr3):
+		return paravirt_patch_default(type, clobbers, ibuf, addr, len);
+	default:
+		return native_patch(type, clobbers, ibuf, addr, len);
+	}
+}
+
+static void __init setup_guest_cr3_cache(void)
+{
+	on_each_cpu(register_cr3_cache, NULL, 0, 1);
+
+	pv_mmu_ops.write_cr3 = kvm_write_cr3;
+	pv_mmu_ops.flush_tlb_user = kvm_flush_tlb_user;
+	pv_mmu_ops.flush_tlb_kernel = kvm_flush_tlb_kernel;
+}
+
 static void kvm_mmu_write(void *dest, u64 val)
 {
 	struct kvm_mmu_op_write_pte wpte = {
@@ -97,6 +215,28 @@ static void kvm_mmu_write(void *dest, u64 val)
 }
 
 /*
+ * CR3 cache initialization uses on_each_cpu(), so it can't
+ * happen at kvm_guest_init time.
+ */
+int __init kvm_cr3_cache_init(void)
+{
+	unsigned long flags;
+
+	if (!kvm_para_available())
+		return -ENOSYS;
+
+	if (kvm_para_has_feature(KVM_FEATURE_CR3_CACHE)) {
+		setup_guest_cr3_cache();
+		local_irq_save(flags);
+		apply_paravirt(__parainstructions, __parainstructions_end);
+		local_irq_restore(flags);
+	}
+
+	return 0;
+}
+module_init(kvm_cr3_cache_init);
+
+/*
  * We only need to hook operations that are MMU writes.  We hook these so that
  * we can use lazy MMU mode to batch these operations.  We could probably
  * improve the performance of the host code if we used some of the information
@@ -219,6 +359,9 @@ static void paravirt_ops_setup(void)
 		pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
 		pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
 	}
+
+	if (kvm_para_has_feature(KVM_FEATURE_CR3_CACHE))
+		pv_init_ops.patch = kvm_patch;
 }
 
 void __init kvm_guest_init(void)
-- 
1.5.4.2


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 8/8] x86: KVM guest: VMX cr3 cache support
  2008-03-02 16:31 [PATCH 8/8] x86: KVM guest: VMX cr3 cache support Avi Kivity
@ 2008-03-06  9:17 ` Zhao Forrest
  2008-03-06  9:32   ` Avi Kivity
  0 siblings, 1 reply; 3+ messages in thread
From: Zhao Forrest @ 2008-03-06  9:17 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel, Marcelo Tosatti

> * We only need to hook operations that are MMU writes. We hook these so
> that
> * we can use lazy MMU mode to batch these operations. We could probably
> * improve the performance of the host code if we used some of the
> information
> @@ -219,6 +359,9 @@ static void paravirt_ops_setup(void)
> pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
> pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
> }
> +
> +	if (kvm_para_has_feature(KVM_FEATURE_CR3_CACHE))
Here guest OS calls cpuid() to know if KVM_FEATURE_CR3_CACHE is
supported by KVM, so I think that the kernel counterpart of this
patch(i.e. [kvm-devel] [PATCH 7/8] KVM: MMU: VMX cr3 cache support)
should include the code to intercept cpuid trap and set
KVM_FEATURE_CR3_CACHE. But I didn't find such code in [PATCH 7/8] KVM:
MMU: VMX cr3 cache support. Do I miss anything relevant?

Thanks,
Forrest

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 8/8] x86: KVM guest: VMX cr3 cache support
  2008-03-06  9:17 ` Zhao Forrest
@ 2008-03-06  9:32   ` Avi Kivity
  0 siblings, 0 replies; 3+ messages in thread
From: Avi Kivity @ 2008-03-06  9:32 UTC (permalink / raw)
  To: Zhao Forrest; +Cc: kvm-devel, Marcelo Tosatti

Zhao Forrest wrote:
>> * We only need to hook operations that are MMU writes. We hook these so
>> that
>> * we can use lazy MMU mode to batch these operations. We could probably
>> * improve the performance of the host code if we used some of the
>> information
>> @@ -219,6 +359,9 @@ static void paravirt_ops_setup(void)
>> pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
>> pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
>> }
>> +
>> +	if (kvm_para_has_feature(KVM_FEATURE_CR3_CACHE))
>>     
> Here guest OS calls cpuid() to know if KVM_FEATURE_CR3_CACHE is
> supported by KVM, so I think that the kernel counterpart of this
> patch(i.e. [kvm-devel] [PATCH 7/8] KVM: MMU: VMX cr3 cache support)
> should include the code to intercept cpuid trap and set
> KVM_FEATURE_CR3_CACHE. But I didn't find such code in [PATCH 7/8] KVM:
> MMU: VMX cr3 cache support. Do I miss anything relevant?
>
>   

Userspace sets the cpuid information; this allows, for example, command 
line switches to hide paravirtualization support.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2008-03-06  9:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-02 16:31 [PATCH 8/8] x86: KVM guest: VMX cr3 cache support Avi Kivity
2008-03-06  9:17 ` Zhao Forrest
2008-03-06  9:32   ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox