* [PATCH] MSR_EFER acceleration for lighweight VM Exit
@ 2007-05-18 5:09 Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A0174CED7-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 7+ messages in thread
From: Dong, Eddie @ 2007-05-18 5:09 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
[-- Attachment #1: Type: text/plain, Size: 6610 bytes --]
Avi:
This patch is to avoid saving and restoring of msr_efer on
lightweight vmexit.
With this patch, the Kernel build get 10% increasement for
64bits on 64 bits,
and 5-8% increasement for 32bits on 64 bits.
Vmexit.flat can see ~1185 cycles save for 64 bits on 64 bits,
which is 29%
save of total VM Exit. It can save ~1206 cycles for 32bits on
64bits, which is
27.5% save of total.
BTW, I use 4 core clovertown processor, the total VM Exit time
reported by
vmexit.flat is about 2860 cycles now.
Thx,eddie
MSR_EFER.LME/LMA bits are automatically save/restored by VMX
hardware, KVM only needs to save NX/SCE bits at time of heavy
weight VM Exit. But clearing NX bits in host envirnment may
cause system hang if the host page table is using EXB bits,
thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we
can do guest page table EXB bits check before inserting a shadow
pte (though no guest is expecting to see this kind of gp fault).
If host NX=0, we present guest no Execute-Disable feature to
guest,
thus no host NX=0, guest NX=1 combination.
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
kvm.h | 2 ++
kvm_main.c | 13 +++++++++++++
vmx.c | 48 +++++++++++++++++++++++++++++++-----------------
3 files changed, 46 insertions(+), 17 deletions(-)
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 5f056d9..e35434b 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -254,6 +254,7 @@ struct kvm_stat {
u32 request_irq_exits;
u32 irq_exits;
u32 light_exits;
+ u32 efer_reload;
};
struct kvm_vcpu {
@@ -288,6 +289,7 @@ struct kvm_vcpu {
u64 ia32_misc_enable_msr;
int nmsrs;
int save_nmsrs;
+ int msr_offset_efer;
#ifdef CONFIG_X86_64
int msr_offset_kernel_gs_base;
#endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 1288cff..da31f55 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item {
{ "request_irq", STAT_OFFSET(request_irq_exits) },
{ "irq_exits", STAT_OFFSET(irq_exits) },
{ "light_exits", STAT_OFFSET(light_exits) },
+ { "efer_reload", STAT_OFFSET(efer_reload) },
{ NULL }
};
@@ -2377,6 +2378,17 @@ out:
return r;
}
+void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
+{
+ __u64 efer;
+
+ rdmsrl(MSR_EFER, efer);
+ if ((vcpu->cpuid_nent >= 1) && !(efer & EFER_NX)) {
+ vcpu->cpuid_entries[1].edx &= ~(1<<20);
+ printk(KERN_INFO ": guest NX capibility removed\n");
+ }
+}
+
static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
struct kvm_cpuid_entry __user
*entries)
@@ -2391,6 +2403,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct
kvm_vcpu *vcpu,
cpuid->nent * sizeof(struct
kvm_cpuid_entry)))
goto out;
vcpu->cpuid_nent = cpuid->nent;
+ cpuid_fix_nx_cap(vcpu);
return 0;
out:
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 5386461..bbdeac9 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b;
#else
#define HOST_IS_64 0
#endif
+#define EFER_SR_BITS EFER_SCE
static struct vmcs_descriptor {
int size;
@@ -56,6 +57,10 @@ static struct vmcs_descriptor {
.ar_bytes = GUEST_##seg##_AR_BYTES, \
}
+#define efer_msr_srbits_changed(vcpu)
\
+ ((vcpu->host_msrs[vcpu->msr_offset_efer].data & EFER_SR_BITS)!=
\
+ (vcpu->guest_msrs[vcpu->msr_offset_efer].data & EFER_SR_BITS))
+
static struct kvm_vmx_segment_field {
unsigned selector;
unsigned base;
@@ -264,6 +269,18 @@ static void reload_tss(void)
#endif
}
+void load_transition_efer(struct kvm_vcpu *vcpu)
+{
+ __u64 msr_efer_trans;
+
+ msr_efer_trans = vcpu->host_msrs[vcpu->msr_offset_efer].data;
+ msr_efer_trans &= ~EFER_SR_BITS;
+ msr_efer_trans |= (EFER_SR_BITS &
+ vcpu->guest_msrs[vcpu->msr_offset_efer].data);
+ wrmsrl(MSR_EFER, msr_efer_trans);
+ vcpu->stat.efer_reload++;
+}
+
static void vmx_save_host_state(struct kvm_vcpu *vcpu)
{
struct vmx_host_state *hs = &vcpu->vmx_host_state;
@@ -307,6 +324,8 @@ static void vmx_save_host_state(struct kvm_vcpu
*vcpu)
}
#endif
load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+ if (efer_msr_srbits_changed(vcpu))
+ load_transition_efer(vcpu);
}
static void vmx_load_host_state(struct kvm_vcpu *vcpu)
@@ -335,6 +354,8 @@ static void vmx_load_host_state(struct kvm_vcpu
*vcpu)
}
save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+ if (efer_msr_srbits_changed(vcpu))
+ load_msrs(vcpu->host_msrs+vcpu->msr_offset_efer, 1);
}
/*
@@ -508,22 +529,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu)
vcpu->msr_offset_kernel_gs_base =
__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
#endif
- index = __find_msr_index(vcpu, MSR_EFER);
- if (index >= 0)
- save_nmsrs = 1;
- else {
- save_nmsrs = 0;
- index = 0;
- }
- vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
- virt_to_phys(vcpu->guest_msrs + index));
- vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
- virt_to_phys(vcpu->guest_msrs + index));
- vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
- virt_to_phys(vcpu->host_msrs + index));
- vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs);
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs);
+ vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
}
/*
@@ -610,10 +616,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32
msr_index, u64 *pdata)
static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vmx_msr_entry *msr;
+ int ret;
+
switch (msr_index) {
#ifdef CONFIG_X86_64
case MSR_EFER:
- return kvm_set_msr_common(vcpu, msr_index, data);
+ ret = kvm_set_msr_common(vcpu, msr_index, data);
+ if (vcpu->vmx_host_state.loaded)
+ load_transition_efer(vcpu);
+ return ret;
case MSR_FS_BASE:
vmcs_writel(GUEST_FS_BASE, data);
break;
@@ -1327,6 +1338,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
asm ("mov $.Lkvm_vmx_return, %0" : "=g"(kvm_vmx_return));
vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);
[-- Attachment #2: efer4.patch --]
[-- Type: application/octet-stream, Size: 5878 bytes --]
commit e06a6176db8ae24171b4fa727660a7e3733ad27b
Author: root <root@vtsmp-xin.(none)>
Date: Fri May 18 12:50:01 2007 +0800
KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit
MSR_EFER.LME/LMA bits are automatically save/restored by VMX
hardware, KVM only needs to save NX/SCE bits at time of heavy
weight VM Exit. But clearing NX bits in host envirnment may
cause system hang if the host page table is using EXB bits,
thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we
can do guest page table EXB bits check before inserting a shadow
pte (though no guest is expecting to see this kind of gp fault).
If host NX=0, we present guest no Execute-Disable feature to guest,
thus no host NX=0, guest NX=1 combination.
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 5f056d9..e35434b 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -254,6 +254,7 @@ struct kvm_stat {
u32 request_irq_exits;
u32 irq_exits;
u32 light_exits;
+ u32 efer_reload;
};
struct kvm_vcpu {
@@ -288,6 +289,7 @@ struct kvm_vcpu {
u64 ia32_misc_enable_msr;
int nmsrs;
int save_nmsrs;
+ int msr_offset_efer;
#ifdef CONFIG_X86_64
int msr_offset_kernel_gs_base;
#endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 1288cff..da31f55 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item {
{ "request_irq", STAT_OFFSET(request_irq_exits) },
{ "irq_exits", STAT_OFFSET(irq_exits) },
{ "light_exits", STAT_OFFSET(light_exits) },
+ { "efer_reload", STAT_OFFSET(efer_reload) },
{ NULL }
};
@@ -2377,6 +2378,17 @@ out:
return r;
}
+void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
+{
+ __u64 efer;
+
+ rdmsrl(MSR_EFER, efer);
+ if ((vcpu->cpuid_nent >= 1) && !(efer & EFER_NX)) {
+ vcpu->cpuid_entries[1].edx &= ~(1<<20);
+ printk(KERN_INFO ": guest NX capibility removed\n");
+ }
+}
+
static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
struct kvm_cpuid_entry __user *entries)
@@ -2391,6 +2403,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
cpuid->nent * sizeof(struct kvm_cpuid_entry)))
goto out;
vcpu->cpuid_nent = cpuid->nent;
+ cpuid_fix_nx_cap(vcpu);
return 0;
out:
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 5386461..bbdeac9 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b;
#else
#define HOST_IS_64 0
#endif
+#define EFER_SR_BITS EFER_SCE
static struct vmcs_descriptor {
int size;
@@ -56,6 +57,10 @@ static struct vmcs_descriptor {
.ar_bytes = GUEST_##seg##_AR_BYTES, \
}
+#define efer_msr_srbits_changed(vcpu) \
+ ((vcpu->host_msrs[vcpu->msr_offset_efer].data & EFER_SR_BITS)!= \
+ (vcpu->guest_msrs[vcpu->msr_offset_efer].data & EFER_SR_BITS))
+
static struct kvm_vmx_segment_field {
unsigned selector;
unsigned base;
@@ -264,6 +269,18 @@ static void reload_tss(void)
#endif
}
+void load_transition_efer(struct kvm_vcpu *vcpu)
+{
+ __u64 msr_efer_trans;
+
+ msr_efer_trans = vcpu->host_msrs[vcpu->msr_offset_efer].data;
+ msr_efer_trans &= ~EFER_SR_BITS;
+ msr_efer_trans |= (EFER_SR_BITS &
+ vcpu->guest_msrs[vcpu->msr_offset_efer].data);
+ wrmsrl(MSR_EFER, msr_efer_trans);
+ vcpu->stat.efer_reload++;
+}
+
static void vmx_save_host_state(struct kvm_vcpu *vcpu)
{
struct vmx_host_state *hs = &vcpu->vmx_host_state;
@@ -307,6 +324,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
}
#endif
load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+ if (efer_msr_srbits_changed(vcpu))
+ load_transition_efer(vcpu);
}
static void vmx_load_host_state(struct kvm_vcpu *vcpu)
@@ -335,6 +354,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
}
save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+ if (efer_msr_srbits_changed(vcpu))
+ load_msrs(vcpu->host_msrs+vcpu->msr_offset_efer, 1);
}
/*
@@ -508,22 +529,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu)
vcpu->msr_offset_kernel_gs_base =
__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
#endif
- index = __find_msr_index(vcpu, MSR_EFER);
- if (index >= 0)
- save_nmsrs = 1;
- else {
- save_nmsrs = 0;
- index = 0;
- }
- vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
- virt_to_phys(vcpu->guest_msrs + index));
- vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
- virt_to_phys(vcpu->guest_msrs + index));
- vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
- virt_to_phys(vcpu->host_msrs + index));
- vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs);
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs);
+ vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
}
/*
@@ -610,10 +616,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vmx_msr_entry *msr;
+ int ret;
+
switch (msr_index) {
#ifdef CONFIG_X86_64
case MSR_EFER:
- return kvm_set_msr_common(vcpu, msr_index, data);
+ ret = kvm_set_msr_common(vcpu, msr_index, data);
+ if (vcpu->vmx_host_state.loaded)
+ load_transition_efer(vcpu);
+ return ret;
case MSR_FS_BASE:
vmcs_writel(GUEST_FS_BASE, data);
break;
@@ -1327,6 +1338,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
asm ("mov $.Lkvm_vmx_return, %0" : "=g"(kvm_vmx_return));
vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);
[-- Attachment #3: Type: text/plain, Size: 286 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
[-- Attachment #4: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 7+ messages in thread[parent not found: <10EA09EFD8728347A513008B6B0DA77A0174CED7-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: [PATCH] MSR_EFER acceleration for lighweight VM Exit [not found] ` <10EA09EFD8728347A513008B6B0DA77A0174CED7-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-05-20 7:28 ` Avi Kivity [not found] ` <464FF88C.6090004-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 7+ messages in thread From: Avi Kivity @ 2007-05-20 7:28 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: > Avi: > > This patch is to avoid saving and restoring of msr_efer on > lightweight vmexit. > > With this patch, the Kernel build get 10% increasement for > 64bits on 64 bits, > and 5-8% increasement for 32bits on 64 bits. > Vmexit.flat can see ~1185 cycles save for 64 bits on 64 bits, > which is 29% > save of total VM Exit. It can save ~1206 cycles for 32bits on > 64bits, which is > 27.5% save of total. > BTW, I use 4 core clovertown processor, the total VM Exit time > reported by > vmexit.flat is about 2860 cycles now. > > Very impressive speedup! Comments below. > > +void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) > static function, please. > +{ > + __u64 efer; > + > + rdmsrl(MSR_EFER, efer); > + if ((vcpu->cpuid_nent >= 1) && !(efer & EFER_NX)) { > + vcpu->cpuid_entries[1].edx &= ~(1<<20); > Nothing guarantees that function 1 is in entry 1. You need to iterate over the loop. > + printk(KERN_INFO ": guest NX capibility removed\n"); > The warning should be printed only if the capability is actually present in the cpuid_entry. Otherwise non-nx capable processors will always show the warning. > @@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b; > #else > #define HOST_IS_64 0 > #endif > +#define EFER_SR_BITS EFER_SCE > I'm guessing "SR" is save/restore? Please use the full name. > > static struct vmcs_descriptor { > int size; > @@ -56,6 +57,10 @@ static struct vmcs_descriptor { > .ar_bytes = GUEST_##seg##_AR_BYTES, \ > } > > +#define efer_msr_srbits_changed(vcpu) > \ > + ((vcpu->host_msrs[vcpu->msr_offset_efer].data & EFER_SR_BITS)!= > \ > + (vcpu->guest_msrs[vcpu->msr_offset_efer].data & EFER_SR_BITS)) > static function instead of macro. > + > static struct kvm_vmx_segment_field { > unsigned selector; > unsigned base; > @@ -264,6 +269,18 @@ static void reload_tss(void) > #endif > } > > +void load_transition_efer(struct kvm_vcpu *vcpu) > static function > +{ > + __u64 msr_efer_trans; > kernel code should use u64, not __u64. __u64 is for kernel code that is shared with userspace. > > static void vmx_load_host_state(struct kvm_vcpu *vcpu) > @@ -335,6 +354,8 @@ static void vmx_load_host_state(struct kvm_vcpu > *vcpu) > } > save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); > load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); > + if (efer_msr_srbits_changed(vcpu)) > + load_msrs(vcpu->host_msrs+vcpu->msr_offset_efer, 1); > space around '+'. -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ^ permalink raw reply [flat|nested] 7+ messages in thread
[parent not found: <464FF88C.6090004-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: [PATCH] MSR_EFER acceleration for lighweight VM Exit [not found] ` <464FF88C.6090004-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-05-20 13:54 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A014E8ABF-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 7+ messages in thread From: Dong, Eddie @ 2007-05-20 13:54 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel [-- Attachment #1: Type: text/plain, Size: 8115 bytes --] Avi Kivity wrote: > Dong, Eddie wrote: >> Avi: >> >> This patch is to avoid saving and restoring of msr_efer on >> lightweight vmexit. >> >> With this patch, the Kernel build get 10% increasement for 64bits >> on 64 bits, and 5-8% increasement for 32bits on 64 bits. >> Vmexit.flat can see ~1185 cycles save for 64 bits on 64 bits, which >> is 29% save of total VM Exit. It can save ~1206 cycles for 32bits >> on 64bits, which is >> 27.5% save of total. >> BTW, I use 4 core clovertown processor, the total VM Exit time >> reported by vmexit.flat is about 2860 cycles now. >> >> > > Very impressive speedup! > > Comments below. > Here is the reformatted one, with the patch 64bits guest Kernel Build performance on my platform exceeds Xen by ~7%. BTW, the previous fix for typo is a predendency. kvm.h | 2 ++ kvm_main.c | 23 +++++++++++++++++++++++ vmx.c | 56 ++++++++++++++++++++++++++++++++++++++------------------ 3 files changed, 63 insertions(+), 18 deletions(-) commit 7d3596f123f95d168bbd1721c5d74bf7b13d0099 Author: root <root@vt32-pae.(none)> Date: Sun May 20 21:35:07 2007 +0800 KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit MSR_EFER.LME/LMA bits are automatically save/restored by VMX hardware, KVM only needs to save NX/SCE bits at time of heavy weight VM Exit. But clearing NX bits in host envirnment may cause system hang if the host page table is using EXB bits, thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we can do guest page table EXB bits check before inserting a shadow pte (though no guest is expecting to see this kind of gp fault). If host NX=0, we present guest no Execute-Disable feature to guest, thus no host NX=0, guest NX=1 combination. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 5f056d9..e35434b 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -254,6 +254,7 @@ struct kvm_stat { u32 request_irq_exits; u32 irq_exits; u32 light_exits; + u32 efer_reload; }; struct kvm_vcpu { @@ -288,6 +289,7 @@ struct kvm_vcpu { u64 ia32_misc_enable_msr; int nmsrs; int save_nmsrs; + int msr_offset_efer; #ifdef CONFIG_X86_64 int msr_offset_kernel_gs_base; #endif diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 1288cff..a2ac108 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item { { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, { "light_exits", STAT_OFFSET(light_exits) }, + { "efer_reload", STAT_OFFSET(efer_reload) }, { NULL } }; @@ -2377,6 +2378,27 @@ out: return r; } +static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +{ + u64 efer; + int i; + struct kvm_cpuid_entry *e, *entry; + + rdmsrl(MSR_EFER, efer); + entry = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == 0x80000001) { + entry = e; + break; + } + } + if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + entry->edx &= ~(1<<20); + printk(KERN_INFO ": guest NX capibility removed\n"); + } +} + static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) @@ -2391,6 +2413,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out; vcpu->cpuid_nent = cpuid->nent; + cpuid_fix_nx_cap(vcpu); return 0; out: diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 9bf8c04..aad5b63 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b; #else #define HOST_IS_64 0 #endif +#define EFER_SAVE_RESTORE_BITS EFER_SCE static struct vmcs_descriptor { int size; @@ -56,6 +57,8 @@ static struct vmcs_descriptor { .ar_bytes = GUEST_##seg##_AR_BYTES, \ } +#define msr_efer_save_restore_bits(x) ((x).data & EFER_SAVE_RESTORE_BITS) + static struct kvm_vmx_segment_field { unsigned selector; unsigned base; @@ -84,6 +87,13 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) +static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) +{ + int efer_offset = vcpu->msr_offset_efer; + return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); +} + static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -264,6 +274,19 @@ static void reload_tss(void) #endif } +static void load_transition_efer(struct kvm_vcpu *vcpu) +{ + u64 trans_efer; + int efer_offset = vcpu->msr_offset_efer; + + trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer &= ~EFER_SAVE_RESTORE_BITS; + trans_efer |= msr_efer_save_restore_bits( + vcpu->guest_msrs[efer_offset]); + wrmsrl(MSR_EFER, trans_efer); + vcpu->stat.efer_reload++; +} + static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vmx_host_state *hs = &vcpu->vmx_host_state; @@ -307,6 +330,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) } #endif load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -335,6 +360,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) } save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); } /* @@ -508,22 +535,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu) vcpu->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - index = __find_msr_index(vcpu, MSR_EFER); - if (index >= 0) - save_nmsrs = 1; - else { - save_nmsrs = 0; - index = 0; - } - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_STORE_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, - virt_to_phys(vcpu->host_msrs + index)); - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs); + vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -610,10 +622,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vmx_msr_entry *msr; + int ret; + switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: - return kvm_set_msr_common(vcpu, msr_index, data); + ret = kvm_set_msr_common(vcpu, msr_index, data); + if (vcpu->vmx_host_state.loaded) + load_transition_efer(vcpu); + return ret; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; @@ -638,7 +655,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) if (msr) { msr->data = data; if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs); + load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); break; } return kvm_set_msr_common(vcpu, msr_index, data); @@ -1325,6 +1342,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) asm ("mov $.Lkvm_vmx_return, %0" : "=g"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); [-- Attachment #2: efer55.patch --] [-- Type: application/octet-stream, Size: 6807 bytes --] commit 7d3596f123f95d168bbd1721c5d74bf7b13d0099 Author: root <root@vt32-pae.(none)> Date: Sun May 20 21:35:07 2007 +0800 KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit MSR_EFER.LME/LMA bits are automatically save/restored by VMX hardware, KVM only needs to save NX/SCE bits at time of heavy weight VM Exit. But clearing NX bits in host envirnment may cause system hang if the host page table is using EXB bits, thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we can do guest page table EXB bits check before inserting a shadow pte (though no guest is expecting to see this kind of gp fault). If host NX=0, we present guest no Execute-Disable feature to guest, thus no host NX=0, guest NX=1 combination. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 5f056d9..e35434b 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -254,6 +254,7 @@ struct kvm_stat { u32 request_irq_exits; u32 irq_exits; u32 light_exits; + u32 efer_reload; }; struct kvm_vcpu { @@ -288,6 +289,7 @@ struct kvm_vcpu { u64 ia32_misc_enable_msr; int nmsrs; int save_nmsrs; + int msr_offset_efer; #ifdef CONFIG_X86_64 int msr_offset_kernel_gs_base; #endif diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 1288cff..a2ac108 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item { { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, { "light_exits", STAT_OFFSET(light_exits) }, + { "efer_reload", STAT_OFFSET(efer_reload) }, { NULL } }; @@ -2377,6 +2378,27 @@ out: return r; } +static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +{ + u64 efer; + int i; + struct kvm_cpuid_entry *e, *entry; + + rdmsrl(MSR_EFER, efer); + entry = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == 0x80000001) { + entry = e; + break; + } + } + if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + entry->edx &= ~(1<<20); + printk(KERN_INFO ": guest NX capibility removed\n"); + } +} + static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) @@ -2391,6 +2413,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out; vcpu->cpuid_nent = cpuid->nent; + cpuid_fix_nx_cap(vcpu); return 0; out: diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 9bf8c04..aad5b63 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b; #else #define HOST_IS_64 0 #endif +#define EFER_SAVE_RESTORE_BITS EFER_SCE static struct vmcs_descriptor { int size; @@ -56,6 +57,8 @@ static struct vmcs_descriptor { .ar_bytes = GUEST_##seg##_AR_BYTES, \ } +#define msr_efer_save_restore_bits(x) ((x).data & EFER_SAVE_RESTORE_BITS) + static struct kvm_vmx_segment_field { unsigned selector; unsigned base; @@ -84,6 +87,13 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) +static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) +{ + int efer_offset = vcpu->msr_offset_efer; + return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); +} + static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -264,6 +274,19 @@ static void reload_tss(void) #endif } +static void load_transition_efer(struct kvm_vcpu *vcpu) +{ + u64 trans_efer; + int efer_offset = vcpu->msr_offset_efer; + + trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer &= ~EFER_SAVE_RESTORE_BITS; + trans_efer |= msr_efer_save_restore_bits( + vcpu->guest_msrs[efer_offset]); + wrmsrl(MSR_EFER, trans_efer); + vcpu->stat.efer_reload++; +} + static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vmx_host_state *hs = &vcpu->vmx_host_state; @@ -307,6 +330,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) } #endif load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -335,6 +360,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) } save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); } /* @@ -508,22 +535,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu) vcpu->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - index = __find_msr_index(vcpu, MSR_EFER); - if (index >= 0) - save_nmsrs = 1; - else { - save_nmsrs = 0; - index = 0; - } - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_STORE_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, - virt_to_phys(vcpu->host_msrs + index)); - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs); + vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -610,10 +622,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vmx_msr_entry *msr; + int ret; + switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: - return kvm_set_msr_common(vcpu, msr_index, data); + ret = kvm_set_msr_common(vcpu, msr_index, data); + if (vcpu->vmx_host_state.loaded) + load_transition_efer(vcpu); + return ret; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; @@ -638,7 +655,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) if (msr) { msr->data = data; if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs); + load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); break; } return kvm_set_msr_common(vcpu, msr_index, data); @@ -1325,6 +1342,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) asm ("mov $.Lkvm_vmx_return, %0" : "=g"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); [-- Attachment #3: Type: text/plain, Size: 286 bytes --] ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ [-- Attachment #4: Type: text/plain, Size: 186 bytes --] _______________________________________________ kvm-devel mailing list kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org https://lists.sourceforge.net/lists/listinfo/kvm-devel ^ permalink raw reply related [flat|nested] 7+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A014E8ABF-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: [PATCH] MSR_EFER acceleration for lighweight VM Exit [not found] ` <10EA09EFD8728347A513008B6B0DA77A014E8ABF-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-05-20 15:42 ` Avi Kivity [not found] ` <46506C4D.9070102-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 7+ messages in thread From: Avi Kivity @ 2007-05-20 15:42 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: > Here is the reformatted one, with the patch 64bits guest Kernel Build > performance > on my platform exceeds Xen by ~7%. > Hey, this came earlier than I expected :) > > +#define msr_efer_save_restore_bits(x) ((x).data & > EFER_SAVE_RESTORE_BITS) > + > This needs to be a static function too, please. -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ^ permalink raw reply [flat|nested] 7+ messages in thread
[parent not found: <46506C4D.9070102-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: [PATCH] MSR_EFER acceleration for lighweight VM Exit [not found] ` <46506C4D.9070102-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-05-21 1:10 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A0174D33F-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 7+ messages in thread From: Dong, Eddie @ 2007-05-21 1:10 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel [-- Attachment #1: Type: text/plain, Size: 7160 bytes --] Try again. Eddie kvm.h | 2 ++ kvm_main.c | 23 +++++++++++++++++++++++ vmx.c | 59 +++++++++++++++++++++++++++++++++++++++++------------------ 3 files changed, 66 insertions(+), 18 deletions(-) commit 7de9b4bff794317ae05c0f3b2fec9b3d710fed2b Author: root <root@vt32-pae.(none)> Date: Mon May 21 09:05:08 2007 +0800 KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit MSR_EFER.LME/LMA bits are automatically save/restored by VMX hardware, KVM only needs to save NX/SCE bits at time of heavy weight VM Exit. But clearing NX bits in host envirnment may cause system hang if the host page table is using EXB bits, thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we can do guest page table EXB bits check before inserting a shadow pte (though no guest is expecting to see this kind of gp fault). If host NX=0, we present guest no Execute-Disable feature to guest, thus no host NX=0, guest NX=1 combination. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 5f056d9..e35434b 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -254,6 +254,7 @@ struct kvm_stat { u32 request_irq_exits; u32 irq_exits; u32 light_exits; + u32 efer_reload; }; struct kvm_vcpu { @@ -288,6 +289,7 @@ struct kvm_vcpu { u64 ia32_misc_enable_msr; int nmsrs; int save_nmsrs; + int msr_offset_efer; #ifdef CONFIG_X86_64 int msr_offset_kernel_gs_base; #endif diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 1288cff..a2ac108 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item { { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, { "light_exits", STAT_OFFSET(light_exits) }, + { "efer_reload", STAT_OFFSET(efer_reload) }, { NULL } }; @@ -2377,6 +2378,27 @@ out: return r; } +static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +{ + u64 efer; + int i; + struct kvm_cpuid_entry *e, *entry; + + rdmsrl(MSR_EFER, efer); + entry = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == 0x80000001) { + entry = e; + break; + } + } + if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + entry->edx &= ~(1<<20); + printk(KERN_INFO ": guest NX capibility removed\n"); + } +} + static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) @@ -2391,6 +2413,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out; vcpu->cpuid_nent = cpuid->nent; + cpuid_fix_nx_cap(vcpu); return 0; out: diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 9bf8c04..f8633a8 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b; #else #define HOST_IS_64 0 #endif +#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) static struct vmcs_descriptor { int size; @@ -84,6 +85,18 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) +static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry *msr) +{ + return (u64)msr->data & EFER_SAVE_RESTORE_BITS; +} + +static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) +{ + int efer_offset = vcpu->msr_offset_efer; + return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); +} + static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -264,6 +277,19 @@ static void reload_tss(void) #endif } +static void load_transition_efer(struct kvm_vcpu *vcpu) +{ + u64 trans_efer; + int efer_offset = vcpu->msr_offset_efer; + + trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer &= ~EFER_SAVE_RESTORE_BITS; + trans_efer |= msr_efer_save_restore_bits( + vcpu->guest_msrs[efer_offset]); + wrmsrl(MSR_EFER, trans_efer); + vcpu->stat.efer_reload++; +} + static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vmx_host_state *hs = &vcpu->vmx_host_state; @@ -307,6 +333,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) } #endif load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -335,6 +363,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) } save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); } /* @@ -508,22 +538,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu) vcpu->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - index = __find_msr_index(vcpu, MSR_EFER); - if (index >= 0) - save_nmsrs = 1; - else { - save_nmsrs = 0; - index = 0; - } - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_STORE_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, - virt_to_phys(vcpu->host_msrs + index)); - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs); + vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -610,10 +625,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vmx_msr_entry *msr; + int ret; + switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: - return kvm_set_msr_common(vcpu, msr_index, data); + ret = kvm_set_msr_common(vcpu, msr_index, data); + if (vcpu->vmx_host_state.loaded) + load_transition_efer(vcpu); + return ret; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; @@ -638,7 +658,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) if (msr) { msr->data = data; if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs); + load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); break; } return kvm_set_msr_common(vcpu, msr_index, data); @@ -1325,6 +1345,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) asm ("mov $.Lkvm_vmx_return, %0" : "=g"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); [-- Attachment #2: efer55.patch --] [-- Type: application/octet-stream, Size: 6679 bytes --] commit 7de9b4bff794317ae05c0f3b2fec9b3d710fed2b Author: root <root@vt32-pae.(none)> Date: Mon May 21 09:05:08 2007 +0800 KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit MSR_EFER.LME/LMA bits are automatically save/restored by VMX hardware, KVM only needs to save NX/SCE bits at time of heavy weight VM Exit. But clearing NX bits in host envirnment may cause system hang if the host page table is using EXB bits, thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we can do guest page table EXB bits check before inserting a shadow pte (though no guest is expecting to see this kind of gp fault). If host NX=0, we present guest no Execute-Disable feature to guest, thus no host NX=0, guest NX=1 combination. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 5f056d9..e35434b 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -254,6 +254,7 @@ struct kvm_stat { u32 request_irq_exits; u32 irq_exits; u32 light_exits; + u32 efer_reload; }; struct kvm_vcpu { @@ -288,6 +289,7 @@ struct kvm_vcpu { u64 ia32_misc_enable_msr; int nmsrs; int save_nmsrs; + int msr_offset_efer; #ifdef CONFIG_X86_64 int msr_offset_kernel_gs_base; #endif diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 1288cff..a2ac108 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item { { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, { "light_exits", STAT_OFFSET(light_exits) }, + { "efer_reload", STAT_OFFSET(efer_reload) }, { NULL } }; @@ -2377,6 +2378,27 @@ out: return r; } +static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +{ + u64 efer; + int i; + struct kvm_cpuid_entry *e, *entry; + + rdmsrl(MSR_EFER, efer); + entry = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == 0x80000001) { + entry = e; + break; + } + } + if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + entry->edx &= ~(1<<20); + printk(KERN_INFO ": guest NX capibility removed\n"); + } +} + static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) @@ -2391,6 +2413,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out; vcpu->cpuid_nent = cpuid->nent; + cpuid_fix_nx_cap(vcpu); return 0; out: diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 9bf8c04..f8633a8 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b; #else #define HOST_IS_64 0 #endif +#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) static struct vmcs_descriptor { int size; @@ -84,6 +85,18 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) +static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry *msr) +{ + return (u64)msr->data & EFER_SAVE_RESTORE_BITS; +} + +static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) +{ + int efer_offset = vcpu->msr_offset_efer; + return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); +} + static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -264,6 +277,19 @@ static void reload_tss(void) #endif } +static void load_transition_efer(struct kvm_vcpu *vcpu) +{ + u64 trans_efer; + int efer_offset = vcpu->msr_offset_efer; + + trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer &= ~EFER_SAVE_RESTORE_BITS; + trans_efer |= msr_efer_save_restore_bits( + vcpu->guest_msrs[efer_offset]); + wrmsrl(MSR_EFER, trans_efer); + vcpu->stat.efer_reload++; +} + static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vmx_host_state *hs = &vcpu->vmx_host_state; @@ -307,6 +333,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) } #endif load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -335,6 +363,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) } save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); } /* @@ -508,22 +538,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu) vcpu->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - index = __find_msr_index(vcpu, MSR_EFER); - if (index >= 0) - save_nmsrs = 1; - else { - save_nmsrs = 0; - index = 0; - } - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_STORE_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, - virt_to_phys(vcpu->host_msrs + index)); - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs); + vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -610,10 +625,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vmx_msr_entry *msr; + int ret; + switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: - return kvm_set_msr_common(vcpu, msr_index, data); + ret = kvm_set_msr_common(vcpu, msr_index, data); + if (vcpu->vmx_host_state.loaded) + load_transition_efer(vcpu); + return ret; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; @@ -638,7 +658,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) if (msr) { msr->data = data; if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs); + load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); break; } return kvm_set_msr_common(vcpu, msr_index, data); @@ -1325,6 +1345,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) asm ("mov $.Lkvm_vmx_return, %0" : "=g"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); [-- Attachment #3: Type: text/plain, Size: 286 bytes --] ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ [-- Attachment #4: Type: text/plain, Size: 186 bytes --] _______________________________________________ kvm-devel mailing list kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org https://lists.sourceforge.net/lists/listinfo/kvm-devel ^ permalink raw reply related [flat|nested] 7+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A0174D33F-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: [PATCH] MSR_EFER acceleration for lighweight VM Exit [not found] ` <10EA09EFD8728347A513008B6B0DA77A0174D33F-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-05-21 2:41 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A0174D46A-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 7+ messages in thread From: Dong, Eddie @ 2007-05-21 2:41 UTC (permalink / raw) To: Dong, Eddie, Avi Kivity; +Cc: kvm-devel [-- Attachment #1: Type: text/plain, Size: 7064 bytes --] kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org wrote: > Try again. Wrong attachment. Please use this one. commit 5ad9d2ec2cd1f324aa75f2bb0fd7a2de1cb769c3 Author: root <root@vt32-pae.(none)> Date: Mon May 21 10:37:34 2007 +0800 KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit MSR_EFER.LME/LMA bits are automatically save/restored by VMX hardware, KVM only needs to save NX/SCE bits at time of heavy weight VM Exit. But clearing NX bits in host envirnment may cause system hang if the host page table is using EXB bits, thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we can do guest page table EXB bits check before inserting a shadow pte (though no guest is expecting to see this kind of gp fault). If host NX=0, we present guest no Execute-Disable feature to guest, thus no host NX=0, guest NX=1 combination. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 5f056d9..e35434b 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -254,6 +254,7 @@ struct kvm_stat { u32 request_irq_exits; u32 irq_exits; u32 light_exits; + u32 efer_reload; }; struct kvm_vcpu { @@ -288,6 +289,7 @@ struct kvm_vcpu { u64 ia32_misc_enable_msr; int nmsrs; int save_nmsrs; + int msr_offset_efer; #ifdef CONFIG_X86_64 int msr_offset_kernel_gs_base; #endif diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 1288cff..a2ac108 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item { { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, { "light_exits", STAT_OFFSET(light_exits) }, + { "efer_reload", STAT_OFFSET(efer_reload) }, { NULL } }; @@ -2377,6 +2378,27 @@ out: return r; } +static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +{ + u64 efer; + int i; + struct kvm_cpuid_entry *e, *entry; + + rdmsrl(MSR_EFER, efer); + entry = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == 0x80000001) { + entry = e; + break; + } + } + if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + entry->edx &= ~(1<<20); + printk(KERN_INFO ": guest NX capibility removed\n"); + } +} + static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) @@ -2391,6 +2413,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out; vcpu->cpuid_nent = cpuid->nent; + cpuid_fix_nx_cap(vcpu); return 0; out: diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 9bf8c04..8701a79 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b; #else #define HOST_IS_64 0 #endif +#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) static struct vmcs_descriptor { int size; @@ -84,6 +85,18 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) +static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr) +{ + return (u64)msr.data & EFER_SAVE_RESTORE_BITS; +} + +static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) +{ + int efer_offset = vcpu->msr_offset_efer; + return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); +} + static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -264,6 +277,19 @@ static void reload_tss(void) #endif } +static void load_transition_efer(struct kvm_vcpu *vcpu) +{ + u64 trans_efer; + int efer_offset = vcpu->msr_offset_efer; + + trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer &= ~EFER_SAVE_RESTORE_BITS; + trans_efer |= msr_efer_save_restore_bits( + vcpu->guest_msrs[efer_offset]); + wrmsrl(MSR_EFER, trans_efer); + vcpu->stat.efer_reload++; +} + static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vmx_host_state *hs = &vcpu->vmx_host_state; @@ -307,6 +333,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) } #endif load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -335,6 +363,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) } save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); } /* @@ -508,22 +538,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu) vcpu->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - index = __find_msr_index(vcpu, MSR_EFER); - if (index >= 0) - save_nmsrs = 1; - else { - save_nmsrs = 0; - index = 0; - } - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_STORE_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, - virt_to_phys(vcpu->host_msrs + index)); - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs); + vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -610,10 +625,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vmx_msr_entry *msr; + int ret; + switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: - return kvm_set_msr_common(vcpu, msr_index, data); + ret = kvm_set_msr_common(vcpu, msr_index, data); + if (vcpu->vmx_host_state.loaded) + load_transition_efer(vcpu); + return ret; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; @@ -638,7 +658,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) if (msr) { msr->data = data; if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs); + load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); break; } return kvm_set_msr_common(vcpu, msr_index, data); @@ -1325,6 +1345,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) asm ("mov $.Lkvm_vmx_return, %0" : "=g"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); [-- Attachment #2: efer55.patch --] [-- Type: application/octet-stream, Size: 6677 bytes --] commit 5ad9d2ec2cd1f324aa75f2bb0fd7a2de1cb769c3 Author: root <root@vt32-pae.(none)> Date: Mon May 21 10:37:34 2007 +0800 KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit MSR_EFER.LME/LMA bits are automatically save/restored by VMX hardware, KVM only needs to save NX/SCE bits at time of heavy weight VM Exit. But clearing NX bits in host envirnment may cause system hang if the host page table is using EXB bits, thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we can do guest page table EXB bits check before inserting a shadow pte (though no guest is expecting to see this kind of gp fault). If host NX=0, we present guest no Execute-Disable feature to guest, thus no host NX=0, guest NX=1 combination. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 5f056d9..e35434b 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -254,6 +254,7 @@ struct kvm_stat { u32 request_irq_exits; u32 irq_exits; u32 light_exits; + u32 efer_reload; }; struct kvm_vcpu { @@ -288,6 +289,7 @@ struct kvm_vcpu { u64 ia32_misc_enable_msr; int nmsrs; int save_nmsrs; + int msr_offset_efer; #ifdef CONFIG_X86_64 int msr_offset_kernel_gs_base; #endif diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 1288cff..a2ac108 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item { { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, { "light_exits", STAT_OFFSET(light_exits) }, + { "efer_reload", STAT_OFFSET(efer_reload) }, { NULL } }; @@ -2377,6 +2378,27 @@ out: return r; } +static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +{ + u64 efer; + int i; + struct kvm_cpuid_entry *e, *entry; + + rdmsrl(MSR_EFER, efer); + entry = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == 0x80000001) { + entry = e; + break; + } + } + if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + entry->edx &= ~(1<<20); + printk(KERN_INFO ": guest NX capibility removed\n"); + } +} + static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) @@ -2391,6 +2413,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out; vcpu->cpuid_nent = cpuid->nent; + cpuid_fix_nx_cap(vcpu); return 0; out: diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 9bf8c04..8701a79 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -41,6 +41,7 @@ static struct page *vmx_io_bitmap_b; #else #define HOST_IS_64 0 #endif +#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) static struct vmcs_descriptor { int size; @@ -84,6 +85,18 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) +static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr) +{ + return (u64)msr.data & EFER_SAVE_RESTORE_BITS; +} + +static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) +{ + int efer_offset = vcpu->msr_offset_efer; + return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); +} + static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -264,6 +277,19 @@ static void reload_tss(void) #endif } +static void load_transition_efer(struct kvm_vcpu *vcpu) +{ + u64 trans_efer; + int efer_offset = vcpu->msr_offset_efer; + + trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer &= ~EFER_SAVE_RESTORE_BITS; + trans_efer |= msr_efer_save_restore_bits( + vcpu->guest_msrs[efer_offset]); + wrmsrl(MSR_EFER, trans_efer); + vcpu->stat.efer_reload++; +} + static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vmx_host_state *hs = &vcpu->vmx_host_state; @@ -307,6 +333,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) } #endif load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -335,6 +363,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) } save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); } /* @@ -508,22 +538,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu) vcpu->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - index = __find_msr_index(vcpu, MSR_EFER); - if (index >= 0) - save_nmsrs = 1; - else { - save_nmsrs = 0; - index = 0; - } - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_STORE_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, - virt_to_phys(vcpu->host_msrs + index)); - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs); + vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -610,10 +625,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vmx_msr_entry *msr; + int ret; + switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: - return kvm_set_msr_common(vcpu, msr_index, data); + ret = kvm_set_msr_common(vcpu, msr_index, data); + if (vcpu->vmx_host_state.loaded) + load_transition_efer(vcpu); + return ret; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; @@ -638,7 +658,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) if (msr) { msr->data = data; if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs); + load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); break; } return kvm_set_msr_common(vcpu, msr_index, data); @@ -1325,6 +1345,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) asm ("mov $.Lkvm_vmx_return, %0" : "=g"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); [-- Attachment #3: Type: text/plain, Size: 286 bytes --] ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ [-- Attachment #4: Type: text/plain, Size: 186 bytes --] _______________________________________________ kvm-devel mailing list kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org https://lists.sourceforge.net/lists/listinfo/kvm-devel ^ permalink raw reply related [flat|nested] 7+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A0174D46A-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: [PATCH] MSR_EFER acceleration for lighweight VM Exit [not found] ` <10EA09EFD8728347A513008B6B0DA77A0174D46A-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-05-21 4:29 ` Avi Kivity 0 siblings, 0 replies; 7+ messages in thread From: Avi Kivity @ 2007-05-21 4:29 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: > kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org wrote: > >> Try again. >> > Wrong attachment. Please use this one. > > Applied & thanks. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2007-05-21 4:29 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-18 5:09 [PATCH] MSR_EFER acceleration for lighweight VM Exit Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A0174CED7-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-05-20 7:28 ` Avi Kivity
[not found] ` <464FF88C.6090004-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-05-20 13:54 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A014E8ABF-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-05-20 15:42 ` Avi Kivity
[not found] ` <46506C4D.9070102-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-05-21 1:10 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A0174D33F-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-05-21 2:41 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A0174D46A-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-05-21 4:29 ` Avi Kivity
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox