public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH 27/58] KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit
Date: Sun, 17 Jun 2007 12:44:08 +0300	[thread overview]
Message-ID: <11820734803416-git-send-email-avi@qumranet.com> (raw)
In-Reply-To: <1182073479890-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>

From: Eddie Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

MSR_EFER.LME/LMA bits are automatically save/restored by VMX
hardware, KVM only needs to save NX/SCE bits at time of heavy
weight VM Exit. But clearing NX bits in host envirnment may
cause system hang if the host page table is using EXB bits,
thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we
can do guest page table EXB bits check before inserting a shadow
pte (though no guest is expecting to see this kind of gp fault).
If host NX=0, we present guest no Execute-Disable feature to guest,
thus no host NX=0, guest NX=1 combination.

This patch reduces raw vmexit time by ~27%.

Me: fix compile warnings on i386.

Signed-off-by: Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
---
 drivers/kvm/kvm.h      |    2 +
 drivers/kvm/kvm_main.c |   23 ++++++++++++++++
 drivers/kvm/vmx.c      |   67 +++++++++++++++++++++++++++++++++---------------
 3 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index c252efe..db2bc6f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -255,6 +255,7 @@ struct kvm_stat {
 	u32 request_irq_exits;
 	u32 irq_exits;
 	u32 light_exits;
+	u32 efer_reload;
 };
 
 struct kvm_vcpu {
@@ -289,6 +290,7 @@ struct kvm_vcpu {
 	u64 ia32_misc_enable_msr;
 	int nmsrs;
 	int save_nmsrs;
+	int msr_offset_efer;
 #ifdef CONFIG_X86_64
 	int msr_offset_kernel_gs_base;
 #endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 095d673..af07cd5 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -73,6 +73,7 @@ static struct kvm_stats_debugfs_item {
 	{ "request_irq", STAT_OFFSET(request_irq_exits) },
 	{ "irq_exits", STAT_OFFSET(irq_exits) },
 	{ "light_exits", STAT_OFFSET(light_exits) },
+	{ "efer_reload", STAT_OFFSET(efer_reload) },
 	{ NULL }
 };
 
@@ -2378,6 +2379,27 @@ out:
 	return r;
 }
 
+static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
+{
+	u64 efer;
+	int i;
+	struct kvm_cpuid_entry *e, *entry;
+
+	rdmsrl(MSR_EFER, efer);
+	entry = NULL;
+	for (i = 0; i < vcpu->cpuid_nent; ++i) {
+		e = &vcpu->cpuid_entries[i];
+		if (e->function == 0x80000001) {
+			entry = e;
+			break;
+		}
+	}
+	if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) {
+		entry->edx &= ~(1 << 20);
+		printk(KERN_INFO ": guest NX capability removed\n");
+	}
+}
+
 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 				    struct kvm_cpuid *cpuid,
 				    struct kvm_cpuid_entry __user *entries)
@@ -2392,6 +2414,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
 		goto out;
 	vcpu->cpuid_nent = cpuid->nent;
+	cpuid_fix_nx_cap(vcpu);
 	return 0;
 
 out:
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index dc99191..93e5bb2 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -42,6 +42,7 @@ static struct page *vmx_io_bitmap_b;
 #else
 #define HOST_IS_64 0
 #endif
+#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE)
 
 static struct vmcs_descriptor {
 	int size;
@@ -85,6 +86,18 @@ static const u32 vmx_msr_index[] = {
 };
 #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
 
+static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr)
+{
+	return (u64)msr.data & EFER_SAVE_RESTORE_BITS;
+}
+
+static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu)
+{
+	int efer_offset = vcpu->msr_offset_efer;
+	return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) !=
+		msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]);
+}
+
 static inline int is_page_fault(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -265,6 +278,19 @@ static void reload_tss(void)
 #endif
 }
 
+static void load_transition_efer(struct kvm_vcpu *vcpu)
+{
+	u64 trans_efer;
+	int efer_offset = vcpu->msr_offset_efer;
+
+	trans_efer = vcpu->host_msrs[efer_offset].data;
+	trans_efer &= ~EFER_SAVE_RESTORE_BITS;
+	trans_efer |= msr_efer_save_restore_bits(
+				vcpu->guest_msrs[efer_offset]);
+	wrmsrl(MSR_EFER, trans_efer);
+	vcpu->stat.efer_reload++;
+}
+
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
 	struct vmx_host_state *hs = &vcpu->vmx_host_state;
@@ -308,6 +334,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 	}
 #endif
 	load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+	if (msr_efer_need_save_restore(vcpu))
+		load_transition_efer(vcpu);
 }
 
 static void vmx_load_host_state(struct kvm_vcpu *vcpu)
@@ -336,6 +364,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 	}
 	save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
 	load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+	if (msr_efer_need_save_restore(vcpu))
+		load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1);
 }
 
 /*
@@ -477,11 +507,13 @@ void move_msr_up(struct kvm_vcpu *vcpu, int from, int to)
  */
 static void setup_msrs(struct kvm_vcpu *vcpu)
 {
-	int index, save_nmsrs;
+	int save_nmsrs;
 
 	save_nmsrs = 0;
 #ifdef CONFIG_X86_64
 	if (is_long_mode(vcpu)) {
+		int index;
+
 		index = __find_msr_index(vcpu, MSR_SYSCALL_MASK);
 		if (index >= 0)
 			move_msr_up(vcpu, index, save_nmsrs++);
@@ -509,22 +541,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu)
 	vcpu->msr_offset_kernel_gs_base =
 		__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
 #endif
-	index = __find_msr_index(vcpu, MSR_EFER);
-	if (index >= 0)
-		save_nmsrs = 1;
-	else {
-		save_nmsrs = 0;
-		index = 0;
-	}
-	vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
-		    virt_to_phys(vcpu->guest_msrs + index));
-	vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
-		    virt_to_phys(vcpu->guest_msrs + index));
-	vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
-		    virt_to_phys(vcpu->host_msrs + index));
-	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs);
-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs);
-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs);
+	vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
 }
 
 /*
@@ -611,10 +628,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
 	struct vmx_msr_entry *msr;
+	int ret = 0;
+
 	switch (msr_index) {
 #ifdef CONFIG_X86_64
 	case MSR_EFER:
-		return kvm_set_msr_common(vcpu, msr_index, data);
+		ret = kvm_set_msr_common(vcpu, msr_index, data);
+		if (vcpu->vmx_host_state.loaded)
+			load_transition_efer(vcpu);
+		break;
 	case MSR_FS_BASE:
 		vmcs_writel(GUEST_FS_BASE, data);
 		break;
@@ -639,13 +661,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		if (msr) {
 			msr->data = data;
 			if (vcpu->vmx_host_state.loaded)
-				load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs);
+				load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
 			break;
 		}
-		return kvm_set_msr_common(vcpu, msr_index, data);
+		ret = kvm_set_msr_common(vcpu, msr_index, data);
 	}
 
-	return 0;
+	return ret;
 }
 
 /*
@@ -1326,6 +1348,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
 
 	asm ("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
 	vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */
+	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
+	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
 
 	rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
 	vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);
-- 
1.5.0.6


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

  parent reply	other threads:[~2007-06-17  9:44 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-06-17  9:43 [PATCH 00/58] KVM updates for 2.6.23 Avi Kivity
     [not found] ` <1182073479890-git-send-email-avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-06-17  9:43   ` [PATCH 01/58] KVM: VMX: Enable io bitmaps to avoid IO port 0x80 VMEXITs Avi Kivity
2007-06-17  9:43   ` [PATCH 02/58] KVM: SVM: Allow direct guest access to PC debug port Avi Kivity
2007-06-17  9:43   ` [PATCH 03/58] KVM: Assume that writes smaller than 4 bytes are to non-pagetable pages Avi Kivity
2007-06-17  9:43   ` [PATCH 04/58] KVM: Avoid saving and restoring some host CPU state on lightweight vmexit Avi Kivity
2007-06-17  9:43   ` [PATCH 05/58] KVM: Unindent some code Avi Kivity
2007-06-17  9:43   ` [PATCH 06/58] KVM: Reduce misfirings of the fork detector Avi Kivity
2007-06-17  9:43   ` [PATCH 07/58] KVM: Be more careful restoring fs on lightweight vmexit Avi Kivity
2007-06-17  9:43   ` [PATCH 08/58] KVM: Unify kvm_mmu_pre_write() and kvm_mmu_post_write() Avi Kivity
2007-06-17  9:43   ` [PATCH 09/58] KVM: MMU: Respect nonpae pagetable quadrant when zapping ptes Avi Kivity
2007-06-17  9:43   ` [PATCH 10/58] KVM: Update shadow pte on write to guest pte Avi Kivity
2007-06-17  9:43   ` [PATCH 11/58] KVM: Increase mmu shadow cache to 1024 pages Avi Kivity
2007-06-17  9:43   ` [PATCH 12/58] KVM: Fix potential guest state leak into host Avi Kivity
2007-06-17  9:43   ` [PATCH 13/58] KVM: Move some more msr mangling into vmx_save_host_state() Avi Kivity
2007-06-17  9:43   ` [PATCH 14/58] KVM: Rationalize exception bitmap usage Avi Kivity
2007-06-17  9:43   ` [PATCH 15/58] KVM: Consolidate guest fpu activation and deactivation Avi Kivity
2007-06-17  9:43   ` [PATCH 16/58] KVM: Set cr0.mp for guests Avi Kivity
2007-06-17  9:43   ` [PATCH 17/58] KVM: Implement IA32_EBL_CR_POWERON msr Avi Kivity
2007-06-17  9:43   ` [PATCH 18/58] KVM: MMU: Simplify kvm_mmu_free_page() a tiny bit Avi Kivity
2007-06-17  9:44   ` [PATCH 19/58] KVM: MMU: Store shadow page tables as kernel virtual addresses, not physical Avi Kivity
2007-06-17  9:44   ` [PATCH 20/58] KVM: VMX: Only reload guest msrs if they are already loaded Avi Kivity
2007-06-17  9:44   ` [PATCH 21/58] KVM: Avoid corrupting tr in real mode Avi Kivity
2007-06-17  9:44   ` [PATCH 22/58] KVM: Fix vmx I/O bitmap initialization on highmem systems Avi Kivity
2007-06-17  9:44   ` [PATCH 23/58] KVM: VMX: Use local labels in inline assembly Avi Kivity
2007-06-17  9:44   ` [PATCH 24/58] KVM: VMX: Handle #SS faults from real mode Avi Kivity
2007-06-17  9:44   ` [PATCH 25/58] KVM: VMX: Avoid saving and restoring msrs on lightweight vmexit Avi Kivity
2007-06-17  9:44   ` [PATCH 26/58] KVM: VMX: Cleanup redundant code in MSR set Avi Kivity
2007-06-17  9:44   ` Avi Kivity [this message]
2007-06-17  9:44   ` [PATCH 28/58] Use menuconfig objects II - KVM/Virt Avi Kivity
2007-06-17  9:44   ` [PATCH 29/58] KVM: x86 emulator: implement wbinvd Avi Kivity
2007-06-17  9:44   ` [PATCH 30/58] KVM: Fix includes Avi Kivity
2007-06-17  9:44   ` [PATCH 31/58] KVM: Use symbolic constants instead of magic numbers Avi Kivity
2007-06-17  9:44   ` [PATCH 32/58] KVM: MMU: Use slab caches for shadow pages and their headers Avi Kivity
2007-06-17  9:44   ` [PATCH 33/58] KVM: MMU: Simplify fetch() a little bit Avi Kivity
2007-06-17  9:44   ` [PATCH 34/58] KVM: MMU: Move set_pte_common() to pte width dependent code Avi Kivity
2007-06-17  9:44   ` [PATCH 35/58] KVM: MMU: Pass the guest pde to set_pte_common Avi Kivity
2007-06-17  9:44   ` [PATCH 36/58] KVM: MMU: Fold fix_read_pf() into set_pte_common() Avi Kivity
2007-06-17  9:44   ` [PATCH 37/58] KVM: MMU: Fold fix_write_pf() " Avi Kivity
2007-06-17  9:44   ` [PATCH 38/58] KVM: Move shadow pte modifications from set_pte/set_pde to set_pde_common() Avi Kivity
2007-06-17  9:44   ` [PATCH 39/58] KVM: Make shadow pte updates atomic Avi Kivity
2007-06-17  9:44   ` [PATCH 40/58] KVM: MMU: Make setting shadow ptes atomic on i386 Avi Kivity
2007-06-17  9:44   ` [PATCH 41/58] KVM: MMU: Remove cr0.wp tricks Avi Kivity
2007-06-17  9:44   ` [PATCH 42/58] KVM: MMU: Simpify accessed/dirty/present/nx bit handling Avi Kivity
2007-06-17  9:44   ` [PATCH 43/58] KVM: MMU: Don't cache guest access bits in the shadow page table Avi Kivity
2007-06-17  9:44   ` [PATCH 44/58] KVM: MMU: Remove unused large page marker Avi Kivity
2007-06-17  9:44   ` [PATCH 45/58] KVM: Lazy guest cr3 switching Avi Kivity
2007-06-17  9:44   ` [PATCH 46/58] KVM: Replace C code with call to ARRAY_SIZE() macro Avi Kivity
2007-06-17  9:44   ` [PATCH 47/58] KVM: Remove unnecessary initialization and checks in mark_page_dirty() Avi Kivity
2007-06-17  9:44   ` [PATCH 48/58] KVM: Fix vcpu freeing for guest smp Avi Kivity
2007-06-17  9:44   ` [PATCH 49/58] KVM: Fix adding an smp virtual machine to the vm list Avi Kivity
2007-06-17  9:44   ` [PATCH 50/58] KVM: Enable guest smp Avi Kivity
2007-06-17  9:44   ` [PATCH 51/58] KVM: Move duplicate halt handling code into kvm_main.c Avi Kivity
2007-06-17  9:44   ` [PATCH 52/58] KVM: Emulate hlt on real mode for Intel Avi Kivity
2007-06-17  9:44   ` [PATCH 53/58] KVM: Keep an upper bound of initialized vcpus Avi Kivity
2007-06-17  9:44   ` [PATCH 54/58] KVM: Flush remote tlbs when reducing shadow pte permissions Avi Kivity
2007-06-17  9:44   ` [PATCH 55/58] KVM: SVM: Replace memset(<addr>, 0, PAGESIZE) with clear_page(<addr>) Avi Kivity
2007-06-17  9:44   ` [PATCH 56/58] KVM: VMX: " Avi Kivity
2007-06-17  9:44   ` [PATCH 57/58] KVM: Initialize the BSP bit in the APIC_BASE msr correctly Avi Kivity
2007-06-17  9:44   ` [PATCH 58/58] KVM: VMX: Ensure vcpu time stamp counter is monotonous Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=11820734803416-git-send-email-avi@qumranet.com \
    --to=avi-atkuwr5tajbwk0htik3j/w@public.gmane.org \
    --cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox