From: "Huang, Kai" <kai.huang@linux.intel.com>
To: Bandan Das <bsd@redhat.com>, kvm@vger.kernel.org
Cc: pbonzini@redhat.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 2/3] nVMX: Implement emulated Page Modification Logging
Date: Wed, 10 May 2017 22:48:44 +1200 [thread overview]
Message-ID: <85f58713-67f6-fb06-9426-8d03809cea07@linux.intel.com> (raw)
In-Reply-To: <20170505192515.27833-3-bsd@redhat.com>
On 5/6/2017 7:25 AM, Bandan Das wrote:
> With EPT A/D enabled, processor access to L2 guest
> paging structures will result in a write violation.
> When this happens, write the GUEST_PHYSICAL_ADDRESS
> to the pml buffer provided by L1 if the access is
> write and the dirty bit is being set.
>
> This patch also adds necessary checks during VMEntry if L1
> has enabled PML. If the PML index overflows, we change the
> exit reason and run L1 to simulate a PML full event.
>
> Signed-off-by: Bandan Das <bsd@redhat.com>
> ---
> arch/x86/kvm/vmx.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2211697..8b9e942 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -248,6 +248,7 @@ struct __packed vmcs12 {
> u64 xss_exit_bitmap;
> u64 guest_physical_address;
> u64 vmcs_link_pointer;
> + u64 pml_address;
> u64 guest_ia32_debugctl;
> u64 guest_ia32_pat;
> u64 guest_ia32_efer;
> @@ -369,6 +370,7 @@ struct __packed vmcs12 {
> u16 guest_ldtr_selector;
> u16 guest_tr_selector;
> u16 guest_intr_status;
> + u16 guest_pml_index;
> u16 host_es_selector;
> u16 host_cs_selector;
> u16 host_ss_selector;
> @@ -407,6 +409,7 @@ struct nested_vmx {
> /* Has the level1 guest done vmxon? */
> bool vmxon;
> gpa_t vmxon_ptr;
> + bool pml_full;
>
> /* The guest-physical address of the current VMCS L1 keeps for L2 */
> gpa_t current_vmptr;
> @@ -742,6 +745,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
> FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
> FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
> FIELD(GUEST_INTR_STATUS, guest_intr_status),
> + FIELD(GUEST_PML_INDEX, guest_pml_index),
> FIELD(HOST_ES_SELECTOR, host_es_selector),
> FIELD(HOST_CS_SELECTOR, host_cs_selector),
> FIELD(HOST_SS_SELECTOR, host_ss_selector),
> @@ -767,6 +771,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
> FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
> FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
> FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
> + FIELD64(PML_ADDRESS, pml_address),
> FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
> FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
> FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
> @@ -1349,6 +1354,11 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
> vmx_xsaves_supported();
> }
>
> +static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
> +{
> + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
> +}
> +
> static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
> {
> return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
> @@ -9368,13 +9378,20 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
> struct x86_exception *fault)
> {
> struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> u32 exit_reason;
> + unsigned long exit_qualification = vcpu->arch.exit_qualification;
>
> - if (fault->error_code & PFERR_RSVD_MASK)
> + if (vmx->nested.pml_full) {
> + exit_reason = EXIT_REASON_PML_FULL;
> + vmx->nested.pml_full = false;
> + exit_qualification &= INTR_INFO_UNBLOCK_NMI;
Sorry I cannot recall the details. probably better to add a comment to
indicate why mask out INTR_INFO_UNBLOCK_NMI?
> + } else if (fault->error_code & PFERR_RSVD_MASK)
> exit_reason = EXIT_REASON_EPT_MISCONFIG;
> else
> exit_reason = EXIT_REASON_EPT_VIOLATION;
> - nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
> +
> + nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
> vmcs12->guest_physical_address = fault->address;
> }
>
> @@ -9717,6 +9734,22 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
> return 0;
> }
>
> +static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
> + struct vmcs12 *vmcs12)
> +{
> + u64 address = vmcs12->pml_address;
> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +
> + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
> + if (!nested_cpu_has_ept(vmcs12) ||
> + !IS_ALIGNED(address, 4096) ||
> + address >> maxphyaddr)
> + return -EINVAL;
> + }
Do we also need to check whether EPT A/D has been enabled for vmcs12 to
make vmentry work? I cannot recall details but probably not necessary.
> +
> + return 0;
> +}
> +
> static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
> struct vmx_msr_entry *e)
> {
> @@ -10252,6 +10285,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
> return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>
> + if (nested_vmx_check_pml_controls(vcpu, vmcs12))
> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
> +
> if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
> vmx->nested.nested_vmx_procbased_ctls_low,
> vmx->nested.nested_vmx_procbased_ctls_high) ||
> @@ -11146,6 +11182,46 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
> kvm_flush_pml_buffers(kvm);
> }
>
> +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
> +{
> + struct vmcs12 *vmcs12;
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + gpa_t gpa;
> + struct page *page = NULL;
> + u64 *pml_address;
> +
> + if (is_guest_mode(vcpu)) {
> + WARN_ON_ONCE(vmx->nested.pml_full);
> +
> + /*
> + * Check if PML is enabled for the nested guest.
> + * Whether eptp bit 6 is set is already checked
> + * as part of A/D emulation.
> + */
> + vmcs12 = get_vmcs12(vcpu);
> + if (!nested_cpu_has_pml(vmcs12))
> + return 0;
Since above nested_vmx_check_pml_controls doesn't check EPT A/D bit in
L1, seems we need to add this check here.
> +
> + if (vmcs12->guest_pml_index > PML_ENTITY_NUM) {
> + vmx->nested.pml_full = true;
> + return 1;
> + }
Is the purpose of returning 1 to make upper layer code to inject PML
full VMEXIt to L1 in nested_ept_inject_page_fault?
> +
> + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
> +
> + page = nested_get_page(vcpu, vmcs12->pml_address);
> + if (!page)
> + return 0;
If PML is enabled in L1, I think nested_get_page should never return a
NULL PML page (unless L1 does something wrong)? Probably better to
return 1 rather than 0, and handle error in nested_ept_inject_page_fault
according to vmcs12->pml_address?
> +
> + pml_address = kmap(page);
> + pml_address[vmcs12->guest_pml_index--] = gpa;
This gpa is L2 guest's GPA. Do we also need to mark L1's GPA (which is
related to L2 guest's GPA above) in to dirty-log? Or has this already
been done?
Thanks,
-Kai
> + kunmap(page);
> + nested_release_page_clean(page);
> + }
> +
> + return 0;
> +}
> +
> static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
> struct kvm_memory_slot *memslot,
> gfn_t offset, unsigned long mask)
> @@ -11505,6 +11581,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
> .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
> .flush_log_dirty = vmx_flush_log_dirty,
> .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
> + .write_log_dirty = vmx_write_pml_buffer,
>
> .pre_block = vmx_pre_block,
> .post_block = vmx_post_block,
>
next prev parent reply other threads:[~2017-05-10 10:48 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-05-05 19:25 [PATCH v2 0/3] nVMX: Emulated Page Modification Logging for Nested Virtualization Bandan Das
2017-05-05 19:25 ` [PATCH v2 1/3] kvm: x86: Add a hook for arch specific dirty logging emulation Bandan Das
2017-05-10 10:49 ` Huang, Kai
2017-05-10 15:53 ` Bandan Das
2017-05-10 23:23 ` Huang, Kai
2017-05-11 18:36 ` Bandan Das
2017-05-05 19:25 ` [PATCH v2 2/3] nVMX: Implement emulated Page Modification Logging Bandan Das
2017-05-10 10:48 ` Huang, Kai [this message]
2017-05-10 14:46 ` Paolo Bonzini
2017-05-10 16:00 ` Bandan Das
2017-05-10 23:21 ` Huang, Kai
2017-05-05 19:25 ` [PATCH v2 3/3] nVMX: Advertise PML to L1 hypervisor Bandan Das
2017-05-09 15:22 ` [PATCH v2 0/3] nVMX: Emulated Page Modification Logging for Nested Virtualization Paolo Bonzini
2017-05-09 16:03 ` Bandan Das
2017-05-09 16:04 ` Paolo Bonzini
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=85f58713-67f6-fb06-9426-8d03809cea07@linux.intel.com \
--to=kai.huang@linux.intel.com \
--cc=bsd@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=pbonzini@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.