From: Dongxiao Xu <dongxiao.xu@intel.com>
To: xen-devel@lists.xensource.com
Cc: JBeulich@suse.com, eddie.dong@intel.com, xiantao.zhang@intel.com,
jun.nakajima@intel.com
Subject: [PATCH v5 rebased 4/4] nested vmx: enable VMCS shadowing feature
Date: Wed, 23 Jan 2013 22:32:12 +0800 [thread overview]
Message-ID: <1358951532-20302-5-git-send-email-dongxiao.xu@intel.com> (raw)
In-Reply-To: <1358951532-20302-1-git-send-email-dongxiao.xu@intel.com>
The current logic for handling the non-root VMREAD/VMWRITE is by
VM-Exit and emulate, which may bring certain overhead.
On new Intel platform, it introduces a new feature called VMCS
shadowing, where non-root VMREAD/VMWRITE will not trigger VM-Exit,
and the hardware will read/write the virtual VMCS instead.
This is proved to have performance improvement with the feature.
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
---
xen/arch/x86/hvm/vmx/vmcs.c | 9 ++++
xen/arch/x86/hvm/vmx/vvmx.c | 82 ++++++++++++++++++++++++++++++++++++
xen/include/asm-x86/hvm/vmx/vmcs.h | 18 +++++++-
3 files changed, 108 insertions(+), 1 deletions(-)
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index f89ea93..69b7dae 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -92,6 +92,7 @@ static void __init vmx_display_features(void)
P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery");
+ P(cpu_has_vmx_vmcs_shadowing, "VMCS shadowing");
#undef P
if ( !printed )
@@ -133,6 +134,7 @@ static int vmx_init_vmcs_config(void)
u32 _vmx_cpu_based_exec_control;
u32 _vmx_secondary_exec_control = 0;
u64 _vmx_ept_vpid_cap = 0;
+ u64 _vmx_misc_cap = 0;
u32 _vmx_vmexit_control;
u32 _vmx_vmentry_control;
bool_t mismatch = 0;
@@ -180,6 +182,9 @@ static int vmx_init_vmcs_config(void)
SECONDARY_EXEC_ENABLE_RDTSCP |
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
SECONDARY_EXEC_ENABLE_INVPCID);
+ rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
+ if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
+ opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
if ( opt_vpid_enabled )
opt |= SECONDARY_EXEC_ENABLE_VPID;
if ( opt_unrestricted_guest_enabled )
@@ -383,6 +388,8 @@ static void __vmx_clear_vmcs(void *info)
if ( arch_vmx->active_cpu == smp_processor_id() )
{
__vmpclear(virt_to_maddr(arch_vmx->vmcs));
+ if ( arch_vmx->vmcs_shadow_maddr )
+ __vmpclear(arch_vmx->vmcs_shadow_maddr);
arch_vmx->active_cpu = -1;
arch_vmx->launched = 0;
@@ -720,6 +727,8 @@ void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to)
spin_lock(&vmx->vmcs_lock);
__vmpclear(virt_to_maddr(from));
+ if ( vmx->vmcs_shadow_maddr )
+ __vmpclear(vmx->vmcs_shadow_maddr);
__vmptrld(virt_to_maddr(to));
vmx->vmcs = to;
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 1e1ad56..3c152c5 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -64,6 +64,48 @@ int nvmx_vcpu_initialise(struct vcpu *v)
gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs failed\n");
goto out;
}
+
+ /* non-root VMREAD/VMWRITE bitmap. */
+ if ( cpu_has_vmx_vmcs_shadowing )
+ {
+ struct page_info *vmread_bitmap, *vmwrite_bitmap;
+ unsigned long *vr, *vw;
+
+ vmread_bitmap = alloc_domheap_page(NULL, 0);
+ if ( !vmread_bitmap )
+ {
+ gdprintk(XENLOG_ERR, "nest: allocation for vmread bitmap failed\n");
+ goto out1;
+ }
+ v->arch.hvm_vmx.vmread_bitmap = vmread_bitmap;
+
+ vmwrite_bitmap = alloc_domheap_page(NULL, 0);
+ if ( !vmwrite_bitmap )
+ {
+ gdprintk(XENLOG_ERR, "nest: allocation for vmwrite bitmap failed\n");
+ goto out2;
+ }
+ v->arch.hvm_vmx.vmwrite_bitmap = vmwrite_bitmap;
+
+ vr = __map_domain_page(vmread_bitmap);
+ vw = __map_domain_page(vmwrite_bitmap);
+
+ clear_page(vr);
+ clear_page(vw);
+
+ /*
+ * For the following 4 encodings, we need to handle them in VMM.
+ * Let them vmexit as usual.
+ */
+ set_bit(IO_BITMAP_A, vw);
+ set_bit(IO_BITMAP_A_HIGH, vw);
+ set_bit(IO_BITMAP_B, vw);
+ set_bit(IO_BITMAP_B_HIGH, vw);
+
+ unmap_domain_page(vr);
+ unmap_domain_page(vw);
+ }
+
nvmx->ept.enabled = 0;
nvmx->guest_vpid = 0;
nvmx->vmxon_region_pa = 0;
@@ -76,6 +118,10 @@ int nvmx_vcpu_initialise(struct vcpu *v)
nvmx->msrbitmap = NULL;
INIT_LIST_HEAD(&nvmx->launched_list);
return 0;
+out2:
+ free_domheap_page(v->arch.hvm_vmx.vmread_bitmap);
+out1:
+ free_xenheap_page(nvcpu->nv_n2vmcx);
out:
return -ENOMEM;
}
@@ -106,6 +152,11 @@ void nvmx_vcpu_destroy(struct vcpu *v)
list_del(&item->node);
xfree(item);
}
+
+ if ( v->arch.hvm_vmx.vmread_bitmap )
+ free_domheap_page(v->arch.hvm_vmx.vmread_bitmap);
+ if ( v->arch.hvm_vmx.vmwrite_bitmap )
+ free_domheap_page(v->arch.hvm_vmx.vmwrite_bitmap);
}
void nvmx_domain_relinquish_resources(struct domain *d)
@@ -1035,6 +1086,32 @@ static bool_t nvmx_vpid_enabled(struct nestedvcpu *nvcpu)
return 0;
}
+static void nvmx_set_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
+{
+ unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
+ paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
+
+ __vmpclear(vvmcs_maddr);
+ vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
+ v->arch.hvm_vmx.vmcs_shadow_maddr = vvmcs_maddr;
+ __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr);
+ __vmwrite(VMREAD_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmread_bitmap));
+ __vmwrite(VMWRITE_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmwrite_bitmap));
+}
+
+static void nvmx_clear_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
+{
+ unsigned long vvmcs_mfn = domain_page_map_to_mfn(vvmcs);
+ paddr_t vvmcs_maddr = vvmcs_mfn << PAGE_SHIFT;
+
+ __vmpclear(vvmcs_maddr);
+ vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
+ v->arch.hvm_vmx.vmcs_shadow_maddr = 0;
+ __vmwrite(VMCS_LINK_POINTER, ~0ul);
+ __vmwrite(VMREAD_BITMAP, 0);
+ __vmwrite(VMWRITE_BITMAP, 0);
+}
+
static void virtual_vmentry(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
@@ -1476,6 +1553,9 @@ int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
__map_msr_bitmap(v);
}
+ if ( cpu_has_vmx_vmcs_shadowing )
+ nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx);
+
vmreturn(regs, VMSUCCEED);
out:
@@ -1526,6 +1606,8 @@ int nvmx_handle_vmclear(struct cpu_user_regs *regs)
if ( gpa == nvcpu->nv_vvmcxaddr )
{
+ if ( cpu_has_vmx_vmcs_shadowing )
+ nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
clear_vvmcs_launched(&nvmx->launched_list,
domain_page_map_to_mfn(nvcpu->nv_vvmcx));
nvmx_purge_vvmcs(v);
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 652dc21..ba02221 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -81,6 +81,8 @@ struct vmx_domain {
struct arch_vmx_struct {
/* Virtual address of VMCS. */
struct vmcs_struct *vmcs;
+ /* VMCS shadow machine address. */
+ paddr_t vmcs_shadow_maddr;
/* Protects remote usage of VMCS (VMPTRLD/VMCLEAR). */
spinlock_t vmcs_lock;
@@ -125,6 +127,10 @@ struct arch_vmx_struct {
/* Remember EFLAGS while in virtual 8086 mode */
uint32_t vm86_saved_eflags;
int hostenv_migrated;
+
+ /* Bitmap to control vmexit policy for Non-root VMREAD/VMWRITE */
+ struct page_info *vmread_bitmap;
+ struct page_info *vmwrite_bitmap;
};
int vmx_create_vmcs(struct vcpu *v);
@@ -191,6 +197,7 @@ extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMCS_SHADOWING 0x00004000
extern u32 vmx_secondary_exec_control;
extern bool_t cpu_has_vmx_ins_outs_instr_info;
@@ -205,6 +212,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
#define VMX_EPT_INVEPT_SINGLE_CONTEXT 0x02000000
#define VMX_EPT_INVEPT_ALL_CONTEXT 0x04000000
+#define VMX_MISC_VMWRITE_ALL 0x20000000
+
#define VMX_VPID_INVVPID_INSTRUCTION 0x100000000ULL
#define VMX_VPID_INVVPID_INDIVIDUAL_ADDR 0x10000000000ULL
#define VMX_VPID_INVVPID_SINGLE_CONTEXT 0x20000000000ULL
@@ -244,7 +253,10 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
(vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT)
#define cpu_has_vmx_virtual_intr_delivery \
(vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
-#define cpu_has_vmx_vmcs_shadowing 0
+#define cpu_has_vmx_vmcs_shadowing \
+ (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VMCS_SHADOWING)
+
+#define VMCS_RID_TYPE_MASK 0x80000000
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
@@ -305,6 +317,10 @@ enum vmcs_field {
EOI_EXIT_BITMAP2_HIGH = 0x00002021,
EOI_EXIT_BITMAP3 = 0x00002022,
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ VMREAD_BITMAP = 0x00002026,
+ VMREAD_BITMAP_HIGH = 0x00002027,
+ VMWRITE_BITMAP = 0x00002028,
+ VMWRITE_BITMAP_HIGH = 0x00002029,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
--
1.7.1
next prev parent reply other threads:[~2013-01-23 14:32 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-23 14:32 [PATCH v5 rebased 0/4] nested vmx: enable VMCS shadowing feature Dongxiao Xu
2013-01-23 14:32 ` [PATCH v5 rebased 1/4] nested vmx: Use a list to store the launched vvmcs for L1 VMM Dongxiao Xu
2013-01-25 2:13 ` Dong, Eddie
2013-01-23 14:32 ` [PATCH v5 rebased 2/4] nested vmx: use VMREAD/VMWRITE to construct vVMCS if enabled VMCS shadowing Dongxiao Xu
2013-01-25 2:26 ` Dong, Eddie
2013-01-23 14:32 ` [PATCH v5 rebased 3/4] nested vmx: optimize for bulk access of virtual VMCS Dongxiao Xu
2013-01-25 2:27 ` Dong, Eddie
2013-01-23 14:32 ` Dongxiao Xu [this message]
2013-01-23 14:55 ` [PATCH v5 rebased 4/4] nested vmx: enable VMCS shadowing feature Nakajima, Jun
2013-01-25 2:28 ` Dong, Eddie
2013-01-29 10:19 ` [PATCH v5 rebased 0/4] " Joerg Roedel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1358951532-20302-5-git-send-email-dongxiao.xu@intel.com \
--to=dongxiao.xu@intel.com \
--cc=JBeulich@suse.com \
--cc=eddie.dong@intel.com \
--cc=jun.nakajima@intel.com \
--cc=xen-devel@lists.xensource.com \
--cc=xiantao.zhang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).