public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: "Huang, Kai" <kai.huang@linux.intel.com>
To: Bandan Das <bsd@redhat.com>, kvm@vger.kernel.org
Cc: pbonzini@redhat.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 2/3] nVMX: Implement emulated Page Modification Logging
Date: Wed, 10 May 2017 22:48:44 +1200	[thread overview]
Message-ID: <85f58713-67f6-fb06-9426-8d03809cea07@linux.intel.com> (raw)
In-Reply-To: <20170505192515.27833-3-bsd@redhat.com>



On 5/6/2017 7:25 AM, Bandan Das wrote:
> With EPT A/D enabled, processor access to L2 guest
> paging structures will result in a write violation.
> When this happens, write the GUEST_PHYSICAL_ADDRESS
> to the pml buffer provided by L1 if the access is
> write and the dirty bit is being set.
>
> This patch also adds necessary checks during VMEntry if L1
> has enabled PML. If the PML index overflows, we change the
> exit reason and run L1 to simulate a PML full event.
>
> Signed-off-by: Bandan Das <bsd@redhat.com>
> ---
>  arch/x86/kvm/vmx.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2211697..8b9e942 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -248,6 +248,7 @@ struct __packed vmcs12 {
>  	u64 xss_exit_bitmap;
>  	u64 guest_physical_address;
>  	u64 vmcs_link_pointer;
> +	u64 pml_address;
>  	u64 guest_ia32_debugctl;
>  	u64 guest_ia32_pat;
>  	u64 guest_ia32_efer;
> @@ -369,6 +370,7 @@ struct __packed vmcs12 {
>  	u16 guest_ldtr_selector;
>  	u16 guest_tr_selector;
>  	u16 guest_intr_status;
> +	u16 guest_pml_index;
>  	u16 host_es_selector;
>  	u16 host_cs_selector;
>  	u16 host_ss_selector;
> @@ -407,6 +409,7 @@ struct nested_vmx {
>  	/* Has the level1 guest done vmxon? */
>  	bool vmxon;
>  	gpa_t vmxon_ptr;
> +	bool pml_full;
>
>  	/* The guest-physical address of the current VMCS L1 keeps for L2 */
>  	gpa_t current_vmptr;
> @@ -742,6 +745,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
>  	FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
>  	FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
>  	FIELD(GUEST_INTR_STATUS, guest_intr_status),
> +	FIELD(GUEST_PML_INDEX, guest_pml_index),
>  	FIELD(HOST_ES_SELECTOR, host_es_selector),
>  	FIELD(HOST_CS_SELECTOR, host_cs_selector),
>  	FIELD(HOST_SS_SELECTOR, host_ss_selector),
> @@ -767,6 +771,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
>  	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
>  	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
>  	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
> +	FIELD64(PML_ADDRESS, pml_address),
>  	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
>  	FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
>  	FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
> @@ -1349,6 +1354,11 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
>  		vmx_xsaves_supported();
>  }
>
> +static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
> +{
> +	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
> +}
> +
>  static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
>  {
>  	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
> @@ -9368,13 +9378,20 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
>  		struct x86_exception *fault)
>  {
>  	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	u32 exit_reason;
> +	unsigned long exit_qualification = vcpu->arch.exit_qualification;
>
> -	if (fault->error_code & PFERR_RSVD_MASK)
> +	if (vmx->nested.pml_full) {
> +		exit_reason = EXIT_REASON_PML_FULL;
> +		vmx->nested.pml_full = false;
> +		exit_qualification &= INTR_INFO_UNBLOCK_NMI;

Sorry I cannot recall the details. probably better to add a comment to 
indicate why mask out INTR_INFO_UNBLOCK_NMI?


> +	} else if (fault->error_code & PFERR_RSVD_MASK)
>  		exit_reason = EXIT_REASON_EPT_MISCONFIG;
>  	else
>  		exit_reason = EXIT_REASON_EPT_VIOLATION;
> -	nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
> +
> +	nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
>  	vmcs12->guest_physical_address = fault->address;
>  }
>
> @@ -9717,6 +9734,22 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>
> +static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
> +					 struct vmcs12 *vmcs12)
> +{
> +	u64 address = vmcs12->pml_address;
> +	int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +
> +	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
> +		if (!nested_cpu_has_ept(vmcs12) ||
> +		    !IS_ALIGNED(address, 4096)  ||
> +		    address >> maxphyaddr)
> +			return -EINVAL;
> +	}

Do we also need to check whether EPT A/D has been enabled for vmcs12 to 
make vmentry work? I cannot recall details but probably not necessary.

> +
> +	return 0;
> +}
> +
>  static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
>  				       struct vmx_msr_entry *e)
>  {
> @@ -10252,6 +10285,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
>  		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>
> +	if (nested_vmx_check_pml_controls(vcpu, vmcs12))
> +		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
> +
>  	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
>  				vmx->nested.nested_vmx_procbased_ctls_low,
>  				vmx->nested.nested_vmx_procbased_ctls_high) ||
> @@ -11146,6 +11182,46 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
>  	kvm_flush_pml_buffers(kvm);
>  }
>
> +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
> +{
> +	struct vmcs12 *vmcs12;
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	gpa_t gpa;
> +	struct page *page = NULL;
> +	u64 *pml_address;
> +
> +	if (is_guest_mode(vcpu)) {
> +		WARN_ON_ONCE(vmx->nested.pml_full);
> +
> +		/*
> +		 * Check if PML is enabled for the nested guest.
> +		 * Whether eptp bit 6 is set is already checked
> +		 * as part of A/D emulation.
> +		 */
> +		vmcs12 = get_vmcs12(vcpu);
> +		if (!nested_cpu_has_pml(vmcs12))
> +			return 0;

Since above nested_vmx_check_pml_controls doesn't check EPT A/D bit in 
L1, seems we need to add this check here.

> +
> +		if (vmcs12->guest_pml_index > PML_ENTITY_NUM) {
> +			vmx->nested.pml_full = true;
> +			return 1;
> +		}

Is the purpose of returning 1 to make upper layer code to inject PML 
full VMEXIt to L1 in nested_ept_inject_page_fault?

> +
> +		gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
> +
> +		page = nested_get_page(vcpu, vmcs12->pml_address);
> +		if (!page)
> +			return 0;

If PML is enabled in L1, I think nested_get_page should never return a 
NULL PML page (unless L1 does something wrong)? Probably better to 
return 1 rather than 0, and handle error in nested_ept_inject_page_fault 
according to vmcs12->pml_address?

> +
> +		pml_address = kmap(page);
> +		pml_address[vmcs12->guest_pml_index--] = gpa;

This gpa is L2 guest's GPA. Do we also need to mark L1's GPA (which is 
related to L2 guest's GPA above) in to dirty-log? Or has this already 
been done?

Thanks,
-Kai

> +		kunmap(page);
> +		nested_release_page_clean(page);
> +	}
> +
> +	return 0;
> +}
> +
>  static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
>  					   struct kvm_memory_slot *memslot,
>  					   gfn_t offset, unsigned long mask)
> @@ -11505,6 +11581,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
>  	.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
>  	.flush_log_dirty = vmx_flush_log_dirty,
>  	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
> +	.write_log_dirty = vmx_write_pml_buffer,
>
>  	.pre_block = vmx_pre_block,
>  	.post_block = vmx_post_block,
>

  reply	other threads:[~2017-05-10 10:48 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-05 19:25 [PATCH v2 0/3] nVMX: Emulated Page Modification Logging for Nested Virtualization Bandan Das
2017-05-05 19:25 ` [PATCH v2 1/3] kvm: x86: Add a hook for arch specific dirty logging emulation Bandan Das
2017-05-10 10:49   ` Huang, Kai
2017-05-10 15:53     ` Bandan Das
2017-05-10 23:23       ` Huang, Kai
2017-05-11 18:36         ` Bandan Das
2017-05-05 19:25 ` [PATCH v2 2/3] nVMX: Implement emulated Page Modification Logging Bandan Das
2017-05-10 10:48   ` Huang, Kai [this message]
2017-05-10 14:46     ` Paolo Bonzini
2017-05-10 16:00       ` Bandan Das
2017-05-10 23:21         ` Huang, Kai
2017-05-05 19:25 ` [PATCH v2 3/3] nVMX: Advertise PML to L1 hypervisor Bandan Das
2017-05-09 15:22 ` [PATCH v2 0/3] nVMX: Emulated Page Modification Logging for Nested Virtualization Paolo Bonzini
2017-05-09 16:03   ` Bandan Das
2017-05-09 16:04     ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=85f58713-67f6-fb06-9426-8d03809cea07@linux.intel.com \
    --to=kai.huang@linux.intel.com \
    --cc=bsd@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox