From: Bandan Das <bsd@redhat.com>
To: Eugene Korenevsky <ekorenevsky@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>,
kvm@vger.kernel.org, Wincy Van <fanwenyi0529@gmail.com>,
Jan Kiszka <jan.kiszka@web.de>
Subject: Re: [PATCH] KVM: x86: nVMX: support for MSR loading/storing
Date: Tue, 09 Dec 2014 19:35:04 +0530 [thread overview]
Message-ID: <jpgy4qgly0f.fsf@redhat.com> (raw)
In-Reply-To: 20141209004139.GA8883@gnote
Eugene Korenevsky <ekorenevsky@gmail.com> writes:
> Several hypervisors use MSR loading/storing to run guests.
> This patch implements emulation of this feature and allows these
> hypervisors to work in L1.
>
> The following is emulated:
> - Loading MSRs on VM-entries
> - Saving MSRs on VM-exits
> - Loading MSRs on VM-exits
>
> Actions taken on loading MSRs:
> - MSR load area is verified
> - For each MSR entry from load area:
> - MSR load entry is read and verified
> - MSR value is safely written
> Actions taken on storing MSRs:
> - MSR store area is verified
> - For each MSR entry from store area:
> - MSR entry is read and verified
> - MSR value is safely read using MSR index from MSR entry
> - MSR value is written to MSR entry
>
> The code performs checks required by Intel Software Developer Manual.
>
Thank you for the commit message.
> This patch is partially based on Wincy Wan's work.
Typo in name -> ^
I have added Jan and Wincy to the CC list since they reviewed your earlier proposal.
I think it would be better to split this up as I mentioned earlier, however,
if the other reviewers and the maintainer don't have objections, I am ok :)
>
> Signed-off-by: Eugene Korenevsky <ekorenevsky@gmail.com>
> ---
> arch/x86/include/asm/vmx.h | 6 +
> arch/x86/include/uapi/asm/msr-index.h | 3 +
> arch/x86/include/uapi/asm/vmx.h | 2 +
> arch/x86/kvm/vmx.c | 210 ++++++++++++++++++++++++++++++++--
> virt/kvm/kvm_main.c | 1 +
> 5 files changed, 215 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 45afaee..8bdb247 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -457,6 +457,12 @@ struct vmx_msr_entry {
> #define ENTRY_FAIL_VMCS_LINK_PTR 4
>
> /*
> + * VMX Abort codes
> + */
> +#define VMX_ABORT_MSR_STORE_FAILURE 1
> +#define VMX_ABORT_MSR_LOAD_FAILURE 4
> +
> +/*
> * VM-instruction error numbers
> */
> enum vm_instruction_error_number {
> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
> index e21331c..3c9c601 100644
> --- a/arch/x86/include/uapi/asm/msr-index.h
> +++ b/arch/x86/include/uapi/asm/msr-index.h
> @@ -316,6 +316,9 @@
> #define MSR_IA32_UCODE_WRITE 0x00000079
> #define MSR_IA32_UCODE_REV 0x0000008b
>
> +#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
> +#define MSR_IA32_SMBASE 0x0000009e
> +
> #define MSR_IA32_PERF_STATUS 0x00000198
> #define MSR_IA32_PERF_CTL 0x00000199
> #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
> diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
> index b813bf9..52ad8e2 100644
> --- a/arch/x86/include/uapi/asm/vmx.h
> +++ b/arch/x86/include/uapi/asm/vmx.h
> @@ -56,6 +56,7 @@
> #define EXIT_REASON_MSR_READ 31
> #define EXIT_REASON_MSR_WRITE 32
> #define EXIT_REASON_INVALID_STATE 33
> +#define EXIT_REASON_MSR_LOAD_FAILURE 34
> #define EXIT_REASON_MWAIT_INSTRUCTION 36
> #define EXIT_REASON_MONITOR_INSTRUCTION 39
> #define EXIT_REASON_PAUSE_INSTRUCTION 40
> @@ -116,6 +117,7 @@
> { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
> { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
> { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
> + { EXIT_REASON_MSR_LOAD_FAILURE, "MSR_LOAD_FAILURE" }, \
> { EXIT_REASON_INVD, "INVD" }, \
> { EXIT_REASON_INVVPID, "INVVPID" }, \
> { EXIT_REASON_INVPCID, "INVPCID" }, \
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 9bcc871..86dc7db 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -8571,6 +8571,168 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
> }
>
> +static bool vmx_msr_switch_area_verify(struct kvm_vcpu *vcpu,
> + unsigned long count_field,
> + unsigned long addr_field,
> + int maxphyaddr)
> +{
> + u64 count, addr;
> +
> + BUG_ON(vmcs12_read_any(vcpu, count_field, &count));
> + BUG_ON(vmcs12_read_any(vcpu, addr_field, &addr));
BUG_ON() is a bit harsh, please use WARN_ON and bail out.
> + if (!IS_ALIGNED(addr, 16))
> + goto fail;
> + if (addr >> maxphyaddr)
> + goto fail;
> + if ((addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr)
> + goto fail;
> + return true;
> +fail:
> + pr_warn_ratelimited(
> + "nVMX: invalid MSR switch (0x%lx, 0x%lx, %d, %llu, 0x%08llx)",
> + count_field, addr_field, maxphyaddr, count, addr);
> + return false;
> +}
> +
> +static bool nested_vmx_msr_switch_verify(struct kvm_vcpu *vcpu,
> + struct vmcs12 *vmcs12)
> +{
> + int maxphyaddr;
> +
> + if (vmcs12->vm_exit_msr_load_count == 0 &&
> + vmcs12->vm_exit_msr_store_count == 0 &&
> + vmcs12->vm_entry_msr_load_count == 0)
> + return true; /* Fast path */
> + maxphyaddr = cpuid_maxphyaddr(vcpu);
> + return vmx_msr_switch_area_verify(vcpu, VM_EXIT_MSR_LOAD_COUNT,
> + VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) &&
> + vmx_msr_switch_area_verify(vcpu, VM_EXIT_MSR_STORE_COUNT,
> + VM_EXIT_MSR_STORE_ADDR,
> + maxphyaddr) &&
> + vmx_msr_switch_area_verify(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
> + VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr);
> +}
> +
> +static bool vmx_load_msr_entry_verify(struct kvm_vcpu *vcpu,
> + struct vmx_msr_entry *e)
> +{
> + if (e->index == MSR_FS_BASE || e->index == MSR_GS_BASE)
> + return false;
> + /* SMM is not supported */
> + if (e->index == MSR_IA32_SMM_MONITOR_CTL)
> + return false;
> + /* x2APIC MSR accesses are not allowed */
> + if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
> + return false;
> + if (e->reserved != 0)
> + return false;
> + return true;
> +}
> +
> +static bool vmx_msr_switch_is_protected_msr(u32 msr_index)
> +{
> + /* Intel SDM: a processor MAY prevent writing to these MSRs when
> + * loading guest states on VM entries or saving guest states on VM
> + * exits.
> + * Assume our emulated processor DOES prevent writing */
Just a nit: please end the comment on a newline to be consistent.
> + return msr_index == MSR_IA32_UCODE_WRITE ||
> + msr_index == MSR_IA32_UCODE_REV;
> +}
> +
> +static unsigned int nested_vmx_load_msrs(struct kvm_vcpu *vcpu,
> + u32 count, u64 addr)
> +{
> + unsigned int i;
> + struct vmx_msr_entry msr_entry;
> + struct msr_data msr;
> +
> + for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
> + if (kvm_read_guest(vcpu->kvm, addr,
> + &msr_entry, sizeof(msr_entry))) {
> + pr_warn_ratelimited(
> + "%s MSR %u: cannot read GPA: 0x%llx\n",
> + __func__, i, addr);
> + return i;
> + }
> + if (!vmx_load_msr_entry_verify(vcpu, &msr_entry)) {
> + pr_warn_ratelimited(
> + "%s MSR %u: invalid MSR entry 0x%x 0x%x\n",
> + __func__, i, msr_entry.index,
> + msr_entry.reserved);
> + return i;
> + }
> + msr.host_initiated = false;
> + msr.index = msr_entry.index;
> + msr.data = msr_entry.value;
> + if (vmx_msr_switch_is_protected_msr(msr.index)) {
> + pr_warn_ratelimited(
> + "%s MSR %u: prevent writing to MSR 0x%x\n",
> + __func__, i, msr.index);
> + return i;
> + }
> + if (kvm_set_msr(vcpu, &msr)) {
> + pr_warn_ratelimited(
> + "%s MSR %u: cannot write 0x%llx to MSR 0x%x\n",
> + __func__, i, msr.data, msr.index);
> + return i;
> + }
> + }
> + return 0;
> +}
> +
> +static bool vmx_store_msr_entry_verify(struct kvm_vcpu *vcpu,
> + struct vmx_msr_entry *e)
> +{
> + /* x2APIC MSR accesses are not allowed */
> + if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
> + return false;
> + /* SMM is not supported */
> + if (e->index == MSR_IA32_SMBASE)
> + return false;
> + if (e->reserved != 0)
> + return false;
> + return true;
> +}
> +
> +static unsigned int nested_vmx_store_msrs(struct kvm_vcpu *vcpu,
> + u32 count, u64 addr)
> +{
> + unsigned int i;
> + struct vmx_msr_entry msr_entry;
> +
> + for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
> + if (kvm_read_guest(vcpu->kvm, addr,
> + &msr_entry, 2 * sizeof(u32))) {
> + pr_warn_ratelimited(
> + "%s MSR %u: cannot read GPA: 0x%llx\n",
> + __func__, i, addr);
> + return i;
> + }
> + if (!vmx_store_msr_entry_verify(vcpu, &msr_entry)) {
> + pr_warn_ratelimited(
> + "%s MSR %u: invalid MSR entry 0x%x 0x%x\n",
> + __func__, i, msr_entry.index,
> + msr_entry.reserved);
> + return i;
> + }
> + if (vmx_get_msr(vcpu, msr_entry.index, &msr_entry.value)) {
> + pr_warn_ratelimited(
> + "%s MSR %u: cannot read MSR 0x%x\n",
> + __func__, i, msr_entry.index);
> + return i;
> + }
> + if (kvm_write_guest(vcpu->kvm,
> + addr + offsetof(struct vmx_msr_entry, value),
> + &msr_entry.value, sizeof(msr_entry.value))) {
> + pr_warn_ratelimited(
> + "%s MSR %u: cannot write to GPA: 0x%llx\n",
> + __func__, i, addr);
> + return i;
> + }
> + }
> + return 0;
> +}
> +
> /*
> * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
> * for running an L2 nested guest.
> @@ -8580,6 +8742,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
> struct vmcs12 *vmcs12;
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> int cpu;
> + unsigned int msr;
> struct loaded_vmcs *vmcs02;
> bool ia32e;
>
> @@ -8629,11 +8792,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
> return 1;
> }
>
> - if (vmcs12->vm_entry_msr_load_count > 0 ||
> - vmcs12->vm_exit_msr_load_count > 0 ||
> - vmcs12->vm_exit_msr_store_count > 0) {
> - pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
> - __func__);
> + if (!nested_vmx_msr_switch_verify(vcpu, vmcs12)) {
> nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
> return 1;
> }
> @@ -8739,10 +8898,20 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>
> vmx_segment_cache_clear(vmx);
>
> - vmcs12->launch_state = 1;
> -
> prepare_vmcs02(vcpu, vmcs12);
>
> + msr = nested_vmx_load_msrs(vcpu, vmcs12->vm_entry_msr_load_count,
> + vmcs12->vm_entry_msr_load_addr);
> + if (msr) {
> + leave_guest_mode(vcpu);
> + vmx_load_vmcs01(vcpu);
> + nested_vmx_entry_failure(vcpu, vmcs12,
> + EXIT_REASON_MSR_LOAD_FAILURE, msr);
> + return 1;
> + }
> +
> + vmcs12->launch_state = 1;
> +
> if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
> return kvm_emulate_halt(vcpu);
>
> @@ -9040,6 +9209,15 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> kvm_clear_interrupt_queue(vcpu);
> }
>
> +static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 abort_code)
> +{
> + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +
> + vmcs12->abort = abort_code;
> + pr_warn("Nested VMX abort %u\n", abort_code);
> + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
> +}
> +
> /*
> * A part of what we need to when the nested L2 guest exits and we want to
> * run its L1 parent, is to reset L1's guest state to the host state specified
> @@ -9172,6 +9350,19 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
>
> kvm_set_dr(vcpu, 7, 0x400);
> vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> +
> + if (nested_vmx_load_msrs(vcpu, vmcs12->vm_exit_msr_load_count,
> + vmcs12->vm_exit_msr_load_addr))
> + nested_vmx_abort(vcpu, VMX_ABORT_MSR_LOAD_FAILURE);
> +}
> +
> +static bool vmx_is_entry_failure(u32 exit_reason)
> +{
> + if ((exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) == 0)
> + return false;
> + exit_reason &= ~VMX_EXIT_REASONS_FAILED_VMENTRY;
> + return exit_reason == EXIT_REASON_INVALID_STATE ||
> + exit_reason == EXIT_REASON_MSR_LOAD_FAILURE;
> }
>
> /*
> @@ -9193,6 +9384,11 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
> prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
> exit_qualification);
>
> + if (nested_vmx_store_msrs(vcpu,
> + vmcs12->vm_exit_msr_store_count,
> + vmcs12->vm_exit_msr_store_addr))
> + nested_vmx_abort(vcpu, VMX_ABORT_MSR_STORE_FAILURE);
> +
> vmx_load_vmcs01(vcpu);
>
> if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index c5c186a..77179c5 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1579,6 +1579,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
> }
> return 0;
> }
> +EXPORT_SYMBOL_GPL(kvm_write_guest);
>
> int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
> gpa_t gpa, unsigned long len)
next prev parent reply other threads:[~2014-12-09 14:05 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-12-09 0:41 [PATCH] KVM: x86: nVMX: support for MSR loading/storing Eugene Korenevsky
2014-12-09 14:05 ` Bandan Das [this message]
2014-12-09 21:12 ` Eugene Korenevsky
2014-12-10 2:00 ` Wincy Van
2014-12-10 9:07 ` Eugene Korenevsky
2014-12-10 9:01 ` Wanpeng Li
2014-12-10 9:35 ` Wincy Van
2014-12-10 9:44 ` Bandan Das
-- strict thread matches above, loose matches on Subject: below --
2014-12-09 21:18 Eugene Korenevsky
2014-12-09 21:59 ` Radim Krčmář
2014-12-10 9:08 ` Eugene Korenevsky
2014-12-10 9:13 ` Eugene Korenevsky
2014-12-10 9:02 ` Wanpeng Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=jpgy4qgly0f.fsf@redhat.com \
--to=bsd@redhat.com \
--cc=ekorenevsky@gmail.com \
--cc=fanwenyi0529@gmail.com \
--cc=jan.kiszka@web.de \
--cc=kvm@vger.kernel.org \
--cc=pbonzini@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox