From: Dongxiao Xu <dongxiao.xu@intel.com>
To: xen-devel@lists.xensource.com
Cc: eddie.dong@intel.com, xiantao.zhang@intel.com, jun.nakajima@intel.com
Subject: [PATCH 4/4] nested vmx: enable VMCS shadowing feature
Date: Thu, 17 Jan 2013 13:37:32 +0800 [thread overview]
Message-ID: <1358401052-14036-5-git-send-email-dongxiao.xu@intel.com> (raw)
In-Reply-To: <1358401052-14036-1-git-send-email-dongxiao.xu@intel.com>
The current logic for handling the non-root VMREAD/VMWRITE is by
VM-Exit and emulate, which may bring certain overhead.
On new Intel platform, it introduces a new feature called VMCS
shadowing, where non-root VMREAD/VMWRITE will not trigger VM-Exit,
and the hardware will read/write the virtual VMCS instead.
This is proved to have performance improvement with the feature.
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
---
xen/arch/x86/hvm/vmx/vmcs.c | 63 +++++++++++++++++++++++++++++++++++-
xen/arch/x86/hvm/vmx/vvmx.c | 23 +++++++++++++
xen/include/asm-x86/hvm/vmx/vmcs.h | 19 ++++++++++-
3 files changed, 103 insertions(+), 2 deletions(-)
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 4b0e8e0..190113f 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -91,6 +91,7 @@ static void __init vmx_display_features(void)
P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery");
+ P(cpu_has_vmx_vmcs_shadowing, "VMCS shadowing");
#undef P
if ( !printed )
@@ -132,6 +133,7 @@ static int vmx_init_vmcs_config(void)
u32 _vmx_cpu_based_exec_control;
u32 _vmx_secondary_exec_control = 0;
u64 _vmx_ept_vpid_cap = 0;
+ u64 _vmx_misc_cap = 0;
u32 _vmx_vmexit_control;
u32 _vmx_vmentry_control;
bool_t mismatch = 0;
@@ -179,6 +181,9 @@ static int vmx_init_vmcs_config(void)
SECONDARY_EXEC_ENABLE_RDTSCP |
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
SECONDARY_EXEC_ENABLE_INVPCID);
+ rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
+ if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
+ opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
if ( opt_vpid_enabled )
opt |= SECONDARY_EXEC_ENABLE_VPID;
if ( opt_unrestricted_guest_enabled )
@@ -382,6 +387,8 @@ static void __vmx_clear_vmcs(void *info)
if ( arch_vmx->active_cpu == smp_processor_id() )
{
__vmpclear(virt_to_maddr(arch_vmx->vmcs));
+ if ( arch_vmx->shadow_vmcs_pa && arch_vmx->shadow_vmcs_pa != ~0ul )
+ __vmpclear(arch_vmx->shadow_vmcs_pa);
arch_vmx->active_cpu = -1;
arch_vmx->launched = 0;
@@ -710,6 +717,8 @@ void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to)
spin_lock(&vmx->vmcs_lock);
__vmpclear(virt_to_maddr(from));
+ if ( vmx->shadow_vmcs_pa && vmx->shadow_vmcs_pa != ~0ul )
+ __vmpclear(vmx->shadow_vmcs_pa);
__vmptrld(virt_to_maddr(to));
vmx->vmcs = to;
@@ -761,6 +770,7 @@ static int construct_vmcs(struct vcpu *v)
unsigned long sysenter_eip;
u32 vmexit_ctl = vmx_vmexit_control;
u32 vmentry_ctl = vmx_vmentry_control;
+ int ret = 0;
vmx_vmcs_enter(v);
@@ -816,7 +826,10 @@ static int construct_vmcs(struct vcpu *v)
unsigned long *msr_bitmap = alloc_xenheap_page();
if ( msr_bitmap == NULL )
- return -ENOMEM;
+ {
+ ret = -ENOMEM;
+ goto out;
+ }
memset(msr_bitmap, ~0, PAGE_SIZE);
v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
@@ -843,6 +856,45 @@ static int construct_vmcs(struct vcpu *v)
}
}
+ /* non-root VMREAD/VMWRITE bitmap. */
+ if ( cpu_has_vmx_vmcs_shadowing )
+ {
+ unsigned long *vmread_bitmap, *vmwrite_bitmap;
+
+ vmread_bitmap = alloc_xenheap_page();
+ if ( !vmread_bitmap )
+ {
+ gdprintk(XENLOG_ERR, "nest: allocation for vmread bitmap failed\n");
+ ret = -ENOMEM;
+ goto out1;
+ }
+ v->arch.hvm_vmx.vmread_bitmap = vmread_bitmap;
+
+ vmwrite_bitmap = alloc_xenheap_page();
+ if ( !vmwrite_bitmap )
+ {
+ gdprintk(XENLOG_ERR, "nest: allocation for vmwrite bitmap failed\n");
+ ret = -ENOMEM;
+ goto out2;
+ }
+ v->arch.hvm_vmx.vmwrite_bitmap = vmwrite_bitmap;
+
+ memset(vmread_bitmap, 0, PAGE_SIZE);
+ memset(vmwrite_bitmap, 0, PAGE_SIZE);
+
+ /*
+ * For the following 4 encodings, we need to handle them in VMM.
+ * Let them vmexit as usual.
+ */
+ set_bit(IO_BITMAP_A, vmwrite_bitmap);
+ set_bit(IO_BITMAP_A_HIGH, vmwrite_bitmap);
+ set_bit(IO_BITMAP_B, vmwrite_bitmap);
+ set_bit(IO_BITMAP_B_HIGH, vmwrite_bitmap);
+
+ __vmwrite(VMREAD_BITMAP, virt_to_maddr(vmread_bitmap));
+ __vmwrite(VMWRITE_BITMAP, virt_to_maddr(vmwrite_bitmap));
+ }
+
/* I/O access bitmap. */
__vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
__vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));
@@ -997,6 +1049,13 @@ static int construct_vmcs(struct vcpu *v)
vmx_vlapic_msr_changed(v);
return 0;
+
+out2:
+ free_xenheap_page(v->arch.hvm_vmx.vmread_bitmap);
+out1:
+ free_xenheap_page(v->arch.hvm_vmx.msr_bitmap);
+out:
+ return ret;
}
int vmx_read_guest_msr(u32 msr, u64 *val)
@@ -1154,6 +1213,8 @@ void vmx_destroy_vmcs(struct vcpu *v)
free_xenheap_page(v->arch.hvm_vmx.host_msr_area);
free_xenheap_page(v->arch.hvm_vmx.msr_area);
free_xenheap_page(v->arch.hvm_vmx.msr_bitmap);
+ free_xenheap_page(v->arch.hvm_vmx.vmread_bitmap);
+ free_xenheap_page(v->arch.hvm_vmx.vmwrite_bitmap);
}
void vm_launch_fail(void)
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 9aba89e..e75e997 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -994,6 +994,24 @@ static bool_t nvmx_vpid_enabled(struct nestedvcpu *nvcpu)
return 0;
}
+static void nvmx_set_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
+{
+ paddr_t vvmcs_pa = virt_to_maddr(vvmcs);
+
+ __vmpclear(vvmcs_pa);
+ vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
+ v->arch.hvm_vmx.shadow_vmcs_pa = vvmcs_pa;
+ __vmwrite(VMCS_LINK_POINTER, vvmcs_pa);
+}
+
+static void nvmx_clear_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
+{
+ __vmpclear(virt_to_maddr(vvmcs));
+ vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
+ v->arch.hvm_vmx.shadow_vmcs_pa = ~0ul;
+ __vmwrite(VMCS_LINK_POINTER, ~0ul);
+}
+
static void virtual_vmentry(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
@@ -1431,6 +1449,9 @@ int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
__map_msr_bitmap(v);
}
+ if ( cpu_has_vmx_vmcs_shadowing )
+ nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx);
+
vmreturn(regs, VMSUCCEED);
out:
@@ -1481,6 +1502,8 @@ int nvmx_handle_vmclear(struct cpu_user_regs *regs)
if ( gpa == nvcpu->nv_vvmcxaddr )
{
+ if ( cpu_has_vmx_vmcs_shadowing )
+ nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
clear_vvmcs_launched(&nvmx->launched_list, virt_to_maddr(nvcpu->nv_vvmcx));
nvmx_purge_vvmcs(v);
}
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 901652d..61c6655 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -81,6 +81,8 @@ struct vmx_domain {
struct arch_vmx_struct {
/* Virtual address of VMCS. */
struct vmcs_struct *vmcs;
+ /* Physical address of shadow VMCS. */
+ paddr_t shadow_vmcs_pa;
/* Protects remote usage of VMCS (VMPTRLD/VMCLEAR). */
spinlock_t vmcs_lock;
@@ -125,6 +127,10 @@ struct arch_vmx_struct {
/* Remember EFLAGS while in virtual 8086 mode */
uint32_t vm86_saved_eflags;
int hostenv_migrated;
+
+ /* Bitmap to control vmexit policy for Non-root VMREAD/VMWRITE */
+ unsigned long *vmread_bitmap;
+ unsigned long *vmwrite_bitmap;
};
int vmx_create_vmcs(struct vcpu *v);
@@ -191,6 +197,7 @@ extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMCS_SHADOWING 0x00004000
extern u32 vmx_secondary_exec_control;
extern bool_t cpu_has_vmx_ins_outs_instr_info;
@@ -205,6 +212,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
#define VMX_EPT_INVEPT_SINGLE_CONTEXT 0x02000000
#define VMX_EPT_INVEPT_ALL_CONTEXT 0x04000000
+#define VMX_MISC_VMWRITE_ALL 0x20000000
+
#define VMX_VPID_INVVPID_INSTRUCTION 0x100000000ULL
#define VMX_VPID_INVVPID_INDIVIDUAL_ADDR 0x10000000000ULL
#define VMX_VPID_INVVPID_SINGLE_CONTEXT 0x20000000000ULL
@@ -244,7 +253,11 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
(vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT)
#define cpu_has_vmx_virtual_intr_delivery \
(vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
-#define cpu_has_vmx_vmcs_shadowing 0
+#define cpu_has_vmx_vmcs_shadowing \
+ (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VMCS_SHADOWING)
+
+#define VMCS_RID_TYPE_MASK 0x80000000
+
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
#define VMX_INTR_SHADOW_MOV_SS 0x00000002
@@ -304,6 +317,10 @@ enum vmcs_field {
EOI_EXIT_BITMAP2_HIGH = 0x00002021,
EOI_EXIT_BITMAP3 = 0x00002022,
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ VMREAD_BITMAP = 0x00002026,
+ VMREAD_BITMAP_HIGH = 0x00002027,
+ VMWRITE_BITMAP = 0x00002028,
+ VMWRITE_BITMAP_HIGH = 0x00002029,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
--
1.7.1
prev parent reply other threads:[~2013-01-17 5:37 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-17 5:37 [PATCH 0/4] nested vmx: enable VMCS shadowing feature Dongxiao Xu
2013-01-17 5:37 ` [PATCH 1/4] nested vmx: Use a list to store the launched vvmcs for L1 VMM Dongxiao Xu
2013-01-17 11:38 ` Jan Beulich
2013-01-17 12:39 ` Xu, Dongxiao
2013-01-17 12:58 ` Jan Beulich
2013-01-17 5:37 ` [PATCH 2/4] nested vmx: use VMREAD/VMWRITE to construct vVMCS if enabled VMCS shadowing Dongxiao Xu
2013-01-17 11:40 ` Jan Beulich
2013-01-17 5:37 ` [PATCH 3/4] nested vmx: optimize for bulk access of virtual VMCS Dongxiao Xu
2013-01-17 11:48 ` Jan Beulich
2013-01-17 5:37 ` Dongxiao Xu [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1358401052-14036-5-git-send-email-dongxiao.xu@intel.com \
--to=dongxiao.xu@intel.com \
--cc=eddie.dong@intel.com \
--cc=jun.nakajima@intel.com \
--cc=xen-devel@lists.xensource.com \
--cc=xiantao.zhang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).