From: Bandan Das <bsd@redhat.com>
To: kvm@vger.kernel.org
Cc: pbonzini@redhat.com, david@redhat.com, rkrcmar@redhat.com,
jmattson@google.com, linux-kernel@vger.kernel.org
Subject: [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
Date: Fri, 28 Jul 2017 15:52:45 -0400 [thread overview]
Message-ID: <20170728195245.1018-4-bsd@redhat.com> (raw)
In-Reply-To: <20170728195245.1018-1-bsd@redhat.com>
When L2 uses vmfunc, L0 utilizes the associated vmexit to
emulate a switching of the ept pointer by reloading the
guest MMU.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
arch/x86/include/asm/vmx.h | 6 +++
arch/x86/kvm/vmx.c | 124 ++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 124 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da5375e..5f63a2e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -115,6 +115,10 @@
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
+/* VMFUNC functions */
+#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
+#define VMFUNC_EPTP_ENTRIES 512
+
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
return vmx_basic & GENMASK_ULL(30, 0);
@@ -200,6 +204,8 @@ enum vmcs_field {
EOI_EXIT_BITMAP2_HIGH = 0x00002021,
EOI_EXIT_BITMAP3 = 0x00002022,
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ EPTP_LIST_ADDRESS = 0x00002024,
+ EPTP_LIST_ADDRESS_HIGH = 0x00002025,
VMREAD_BITMAP = 0x00002026,
VMWRITE_BITMAP = 0x00002028,
XSS_EXIT_BITMAP = 0x0000202C,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fe8f5fc..f1ab783 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -246,6 +246,7 @@ struct __packed vmcs12 {
u64 eoi_exit_bitmap1;
u64 eoi_exit_bitmap2;
u64 eoi_exit_bitmap3;
+ u64 eptp_list_address;
u64 xss_exit_bitmap;
u64 guest_physical_address;
u64 vmcs_link_pointer;
@@ -771,6 +772,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
+ FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
@@ -1402,6 +1404,13 @@ static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
}
+static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has_vmfunc(vmcs12) &&
+ (vmcs12->vm_function_control &
+ VMX_VMFUNC_EPTP_SWITCHING);
+}
+
static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
if (cpu_has_vmx_vmfunc()) {
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_VMFUNC;
- vmx->nested.nested_vmx_vmfunc_controls = 0;
+ /*
+ * Advertise EPTP switching unconditionally
+ * since we emulate it
+ */
+ vmx->nested.nested_vmx_vmfunc_controls =
+ VMX_VMFUNC_EPTP_SWITCHING;
}
/*
@@ -7767,6 +7781,85 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
return 1;
}
+static bool check_ept_address_valid(struct kvm_vcpu *vcpu, u64 address)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u64 mask = VMX_EPT_RWX_MASK;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
+ /* Check for execute_only validity */
+ if ((address & mask) == VMX_EPT_EXECUTABLE_MASK) {
+ if (!(vmx->nested.nested_vmx_ept_caps &
+ VMX_EPT_EXECUTE_ONLY_BIT))
+ return false;
+ }
+
+ /* Bits 5:3 must be 3 */
+ if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW)
+ return false;
+
+ /* Reserved bits should not be set */
+ if (address >> maxphyaddr || ((address >> 7) & 0x1f))
+ return false;
+
+ /* AD, if set, should be supported */
+ if ((address & VMX_EPT_AD_ENABLE_BIT)) {
+ if (!enable_ept_ad_bits)
+ return false;
+ mmu->ept_ad = true;
+ } else
+ mmu->ept_ad = false;
+
+ return true;
+}
+
+static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
+ u64 *l1_eptp_list, address;
+ struct page *page;
+
+ if (!nested_cpu_has_eptp_switching(vmcs12) ||
+ !nested_cpu_has_ept(vmcs12))
+ return 1;
+
+ if (index >= VMFUNC_EPTP_ENTRIES)
+ return 1;
+
+ page = nested_get_page(vcpu, vmcs12->eptp_list_address);
+ if (!page)
+ return 1;
+
+ l1_eptp_list = kmap(page);
+ address = l1_eptp_list[index];
+
+ /*
+ * If the (L2) guest does a vmfunc to the currently
+ * active ept pointer, we don't have to do anything else
+ */
+ if (vmcs12->ept_pointer != address) {
+ if (!check_ept_address_valid(vcpu, address)) {
+ kunmap(page);
+ nested_release_page_clean(page);
+ return 1;
+ }
+ kvm_mmu_unload(vcpu);
+ vmcs12->ept_pointer = address;
+ /*
+ * TODO: Check what's the correct approach in case
+ * mmu reload fails. Currently, we just let the next
+ * reload potentially fail
+ */
+ kvm_mmu_reload(vcpu);
+ }
+
+ kunmap(page);
+ nested_release_page_clean(page);
+ return 0;
+}
+
static int handle_vmfunc(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7786,7 +7879,16 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
vmcs12 = get_vmcs12(vcpu);
if ((vmcs12->vm_function_control & (1 << function)) == 0)
goto fail;
- WARN_ONCE(1, "VMCS12 VM function control should have been zero");
+
+ switch (function) {
+ case 0:
+ if (nested_vmx_eptp_switching(vcpu, vmcs12))
+ goto fail;
+ break;
+ default:
+ goto fail;
+ }
+ return kvm_skip_emulated_instruction(vcpu);
fail:
nested_vmx_vmexit(vcpu, vmx->exit_reason,
@@ -10354,10 +10456,20 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx->nested.nested_vmx_entry_ctls_high))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
- if (nested_cpu_has_vmfunc(vmcs12) &&
- (vmcs12->vm_function_control &
- ~vmx->nested.nested_vmx_vmfunc_controls))
- return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_cpu_has_vmfunc(vmcs12)) {
+ if (vmcs12->vm_function_control &
+ ~vmx->nested.nested_vmx_vmfunc_controls)
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ if (nested_cpu_has_eptp_switching(vmcs12)) {
+ if (!nested_cpu_has_ept(vmcs12) ||
+ (vmcs12->eptp_list_address >>
+ cpuid_maxphyaddr(vcpu)) ||
+ !IS_ALIGNED(vmcs12->eptp_list_address, 4096))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ }
+ }
+
if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
--
2.9.4
next prev parent reply other threads:[~2017-07-28 19:52 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-28 19:52 [PATCH v5 0/3] Expose VMFUNC to the nested hypervisor Bandan Das
2017-07-28 19:52 ` [PATCH v5 1/3] KVM: vmx: Enable VMFUNCs Bandan Das
2017-07-28 19:52 ` [PATCH v5 2/3] KVM: nVMX: Enable VMFUNC for the L1 hypervisor Bandan Das
2017-07-28 19:52 ` Bandan Das [this message]
2017-07-31 11:59 ` [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching " David Hildenbrand
2017-07-31 19:32 ` Bandan Das
2017-08-01 11:40 ` David Hildenbrand
2017-08-01 14:55 ` Radim Krčmář
2017-08-01 15:17 ` Radim Krčmář
2017-08-01 18:30 ` Bandan Das
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170728195245.1018-4-bsd@redhat.com \
--to=bsd@redhat.com \
--cc=david@redhat.com \
--cc=jmattson@google.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=rkrcmar@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.