public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] MSR_EFER acceleration for lighweight VM Exit
@ 2007-05-18  5:09 Dong, Eddie
       [not found] ` <10EA09EFD8728347A513008B6B0DA77A0174CED7-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Dong, Eddie @ 2007-05-18  5:09 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel

[-- Attachment #1: Type: text/plain, Size: 6610 bytes --]

Avi:

	This patch is to avoid saving and restoring of msr_efer on
lightweight vmexit.

	With this patch, the Kernel build get 10% increasement for
64bits on 64 bits,
	and 5-8% increasement for 32bits on 64 bits.
	Vmexit.flat can see ~1185 cycles save for 64 bits on 64 bits,
which is 29%
	save of total VM Exit. It can save ~1206 cycles for 32bits on
64bits, which is 
	27.5% save of total.
	BTW, I use 4 core clovertown processor, the total VM Exit time
reported by
	vmexit.flat is about 2860 cycles now.

	Thx,eddie

    
        MSR_EFER.LME/LMA bits are automatically save/restored by VMX
        hardware, KVM only needs to save NX/SCE bits at time of heavy
        weight VM Exit. But clearing NX bits in host envirnment may
        cause system hang if the host page table is using EXB bits,
        thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we
        can do guest page table EXB bits check before inserting a shadow
        pte (though no guest is expecting to see this kind of gp fault).
        If host NX=0, we present guest no Execute-Disable feature to
guest,
        thus no host NX=0, guest NX=1 combination.

        Signed-off-by: Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>


 kvm.h      |    2 ++
 kvm_main.c |   13 +++++++++++++
 vmx.c      |   48 +++++++++++++++++++++++++++++++-----------------
 3 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 5f056d9..e35434b 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -254,6 +254,7 @@ struct kvm_stat {
 	u32 request_irq_exits;
 	u32 irq_exits;
 	u32 light_exits;
+	u32 efer_reload;
 };
 
 struct kvm_vcpu {
@@ -288,6 +289,7 @@ struct kvm_vcpu {
 	u64 ia32_misc_enable_msr;
 	int nmsrs;
 	int save_nmsrs;
+	int msr_offset_efer;
 #ifdef CONFIG_X86_64
 	int msr_offset_kernel_gs_base;
 #endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 1288cff..da31f55 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item {
 	{ "request_irq", STAT_OFFSET(request_irq_exits) },
 	{ "irq_exits", STAT_OFFSET(irq_exits) },
 	{ "light_exits", STAT_OFFSET(light_exits) },
+	{ "efer_reload", STAT_OFFSET(efer_reload) },
 	{ NULL }
 };
 
@@ -2377,6 +2378,17 @@ out:
 	return r;
 }
 
+void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
+{
+	__u64 efer;
+
+	rdmsrl(MSR_EFER, efer);
+	if ((vcpu->cpuid_nent >= 1) && !(efer & EFER_NX)) {
+		vcpu->cpuid_entries[1].edx &= ~(1<<20);
+		printk(KERN_INFO ": guest NX capibility removed\n");
+	}
+}
+
 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 				    struct kvm_cpuid *cpuid,
 				    struct kvm_cpuid_entry __user
*entries)
@@ -2391,6 +2403,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct
kvm_vcpu *vcpu,
 			   cpuid->nent * sizeof(struct
kvm_cpuid_entry)))
 		goto out;
 	vcpu->cpuid_nent = cpuid->nent;
+	cpuid_fix_nx_cap(vcpu);
 	return 0;
 
 out:
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 5386461..bbdeac9 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b;
 #else
 #define HOST_IS_64 0
 #endif
+#define EFER_SR_BITS EFER_SCE
 
 static struct vmcs_descriptor {
 	int size;
@@ -56,6 +57,10 @@ static struct vmcs_descriptor {
 		.ar_bytes = GUEST_##seg##_AR_BYTES,	   	\
 	}
 
+#define  efer_msr_srbits_changed(vcpu)
\
+	((vcpu->host_msrs[vcpu->msr_offset_efer].data & EFER_SR_BITS)!=
\
+	(vcpu->guest_msrs[vcpu->msr_offset_efer].data & EFER_SR_BITS))
+
 static struct kvm_vmx_segment_field {
 	unsigned selector;
 	unsigned base;
@@ -264,6 +269,18 @@ static void reload_tss(void)
 #endif
 }
 
+void load_transition_efer(struct kvm_vcpu *vcpu)
+{
+	__u64 msr_efer_trans;
+
+	msr_efer_trans = vcpu->host_msrs[vcpu->msr_offset_efer].data;
+	msr_efer_trans &= ~EFER_SR_BITS;
+	msr_efer_trans |= (EFER_SR_BITS &
+			vcpu->guest_msrs[vcpu->msr_offset_efer].data);
+	wrmsrl(MSR_EFER, msr_efer_trans);
+	vcpu->stat.efer_reload++;
+}
+
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
 	struct vmx_host_state *hs = &vcpu->vmx_host_state;
@@ -307,6 +324,8 @@ static void vmx_save_host_state(struct kvm_vcpu
*vcpu)
 	}
 #endif
 	load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+	if (efer_msr_srbits_changed(vcpu))
+		load_transition_efer(vcpu);
 }
 
 static void vmx_load_host_state(struct kvm_vcpu *vcpu)
@@ -335,6 +354,8 @@ static void vmx_load_host_state(struct kvm_vcpu
*vcpu)
 	}
 	save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
 	load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+	if (efer_msr_srbits_changed(vcpu))
+		load_msrs(vcpu->host_msrs+vcpu->msr_offset_efer, 1);
 }
 
 /*
@@ -508,22 +529,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu)
 	vcpu->msr_offset_kernel_gs_base =
 		__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
 #endif
-	index = __find_msr_index(vcpu, MSR_EFER);
-	if (index >= 0)
-		save_nmsrs = 1;
-	else {
-		save_nmsrs = 0;
-		index = 0;
-	}
-	vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
-		    virt_to_phys(vcpu->guest_msrs + index));
-	vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
-		    virt_to_phys(vcpu->guest_msrs + index));
-	vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
-		    virt_to_phys(vcpu->host_msrs + index));
-	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs);
-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs);
-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs);
+	vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
 }
 
 /*
@@ -610,10 +616,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32
msr_index, u64 *pdata)
 static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
 	struct vmx_msr_entry *msr;
+	int ret;
+
 	switch (msr_index) {
 #ifdef CONFIG_X86_64
 	case MSR_EFER:
-		return kvm_set_msr_common(vcpu, msr_index, data);
+		ret = kvm_set_msr_common(vcpu, msr_index, data);
+		if (vcpu->vmx_host_state.loaded)
+			load_transition_efer(vcpu);
+		return ret;
 	case MSR_FS_BASE:
 		vmcs_writel(GUEST_FS_BASE, data);
 		break;
@@ -1327,6 +1338,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
 
 	asm ("mov $.Lkvm_vmx_return, %0" : "=g"(kvm_vmx_return));
 	vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */
+ 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
+ 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
 
 	rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
 	vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);

[-- Attachment #2: efer4.patch --]
[-- Type: application/octet-stream, Size: 5878 bytes --]

commit e06a6176db8ae24171b4fa727660a7e3733ad27b
Author: root <root@vtsmp-xin.(none)>
Date:   Fri May 18 12:50:01 2007 +0800

        KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit
    
        MSR_EFER.LME/LMA bits are automatically save/restored by VMX
        hardware, KVM only needs to save NX/SCE bits at time of heavy
        weight VM Exit. But clearing NX bits in host envirnment may
        cause system hang if the host page table is using EXB bits,
        thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we
        can do guest page table EXB bits check before inserting a shadow
        pte (though no guest is expecting to see this kind of gp fault).
        If host NX=0, we present guest no Execute-Disable feature to guest,
        thus no host NX=0, guest NX=1 combination.
    
        Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 5f056d9..e35434b 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -254,6 +254,7 @@ struct kvm_stat {
 	u32 request_irq_exits;
 	u32 irq_exits;
 	u32 light_exits;
+	u32 efer_reload;
 };
 
 struct kvm_vcpu {
@@ -288,6 +289,7 @@ struct kvm_vcpu {
 	u64 ia32_misc_enable_msr;
 	int nmsrs;
 	int save_nmsrs;
+	int msr_offset_efer;
 #ifdef CONFIG_X86_64
 	int msr_offset_kernel_gs_base;
 #endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 1288cff..da31f55 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item {
 	{ "request_irq", STAT_OFFSET(request_irq_exits) },
 	{ "irq_exits", STAT_OFFSET(irq_exits) },
 	{ "light_exits", STAT_OFFSET(light_exits) },
+	{ "efer_reload", STAT_OFFSET(efer_reload) },
 	{ NULL }
 };
 
@@ -2377,6 +2378,17 @@ out:
 	return r;
 }
 
+void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
+{
+	__u64 efer;
+
+	rdmsrl(MSR_EFER, efer);
+	if ((vcpu->cpuid_nent >= 1) && !(efer & EFER_NX)) {
+		vcpu->cpuid_entries[1].edx &= ~(1<<20);
+		printk(KERN_INFO ": guest NX capibility removed\n");
+	}
+}
+
 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 				    struct kvm_cpuid *cpuid,
 				    struct kvm_cpuid_entry __user *entries)
@@ -2391,6 +2403,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
 		goto out;
 	vcpu->cpuid_nent = cpuid->nent;
+	cpuid_fix_nx_cap(vcpu);
 	return 0;
 
 out:
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 5386461..bbdeac9 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b;
 #else
 #define HOST_IS_64 0
 #endif
+#define EFER_SR_BITS EFER_SCE
 
 static struct vmcs_descriptor {
 	int size;
@@ -56,6 +57,10 @@ static struct vmcs_descriptor {
 		.ar_bytes = GUEST_##seg##_AR_BYTES,	   	\
 	}
 
+#define  efer_msr_srbits_changed(vcpu)						\
+	((vcpu->host_msrs[vcpu->msr_offset_efer].data & EFER_SR_BITS)!= \
+	(vcpu->guest_msrs[vcpu->msr_offset_efer].data & EFER_SR_BITS))
+
 static struct kvm_vmx_segment_field {
 	unsigned selector;
 	unsigned base;
@@ -264,6 +269,18 @@ static void reload_tss(void)
 #endif
 }
 
+void load_transition_efer(struct kvm_vcpu *vcpu)
+{
+	__u64 msr_efer_trans;
+
+	msr_efer_trans = vcpu->host_msrs[vcpu->msr_offset_efer].data;
+	msr_efer_trans &= ~EFER_SR_BITS;
+	msr_efer_trans |= (EFER_SR_BITS &
+			vcpu->guest_msrs[vcpu->msr_offset_efer].data);
+	wrmsrl(MSR_EFER, msr_efer_trans);
+	vcpu->stat.efer_reload++;
+}
+
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
 	struct vmx_host_state *hs = &vcpu->vmx_host_state;
@@ -307,6 +324,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 	}
 #endif
 	load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+	if (efer_msr_srbits_changed(vcpu))
+		load_transition_efer(vcpu);
 }
 
 static void vmx_load_host_state(struct kvm_vcpu *vcpu)
@@ -335,6 +354,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 	}
 	save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
 	load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+	if (efer_msr_srbits_changed(vcpu))
+		load_msrs(vcpu->host_msrs+vcpu->msr_offset_efer, 1);
 }
 
 /*
@@ -508,22 +529,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu)
 	vcpu->msr_offset_kernel_gs_base =
 		__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
 #endif
-	index = __find_msr_index(vcpu, MSR_EFER);
-	if (index >= 0)
-		save_nmsrs = 1;
-	else {
-		save_nmsrs = 0;
-		index = 0;
-	}
-	vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
-		    virt_to_phys(vcpu->guest_msrs + index));
-	vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
-		    virt_to_phys(vcpu->guest_msrs + index));
-	vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
-		    virt_to_phys(vcpu->host_msrs + index));
-	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs);
-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs);
-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs);
+	vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
 }
 
 /*
@@ -610,10 +616,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
 	struct vmx_msr_entry *msr;
+	int ret;
+
 	switch (msr_index) {
 #ifdef CONFIG_X86_64
 	case MSR_EFER:
-		return kvm_set_msr_common(vcpu, msr_index, data);
+		ret = kvm_set_msr_common(vcpu, msr_index, data);
+		if (vcpu->vmx_host_state.loaded)
+			load_transition_efer(vcpu);
+		return ret;
 	case MSR_FS_BASE:
 		vmcs_writel(GUEST_FS_BASE, data);
 		break;
@@ -1327,6 +1338,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
 
 	asm ("mov $.Lkvm_vmx_return, %0" : "=g"(kvm_vmx_return));
 	vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */
+ 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
+ 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
 
 	rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
 	vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);

[-- Attachment #3: Type: text/plain, Size: 286 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

[-- Attachment #4: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-05-21  4:29 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-18  5:09 [PATCH] MSR_EFER acceleration for lighweight VM Exit Dong, Eddie
     [not found] ` <10EA09EFD8728347A513008B6B0DA77A0174CED7-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-05-20  7:28   ` Avi Kivity
     [not found]     ` <464FF88C.6090004-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-05-20 13:54       ` Dong, Eddie
     [not found]         ` <10EA09EFD8728347A513008B6B0DA77A014E8ABF-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-05-20 15:42           ` Avi Kivity
     [not found]             ` <46506C4D.9070102-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-05-21  1:10               ` Dong, Eddie
     [not found]                 ` <10EA09EFD8728347A513008B6B0DA77A0174D33F-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-05-21  2:41                   ` Dong, Eddie
     [not found]                     ` <10EA09EFD8728347A513008B6B0DA77A0174D46A-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-05-21  4:29                       ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox