Re: [PATCH 1/2] KVM: nVMX: fix EPT permissions as reported in exit qualification

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Xiao Guangrong <guangrong.xiao@gmail.com>
To: Paolo Bonzini <pbonzini@redhat.com>,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: "Peter Feiner" <pfeiner@google.com>,
	"David Matlack" <dmatlack@google.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Xiao Guangrong" <xiaoguangrong@tencent.com>,
	"Wanpeng Li" <wanpeng.li@hotmail.com>
Subject: Re: [PATCH 1/2] KVM: nVMX: fix EPT permissions as reported in exit qualification
Date: Fri, 12 May 2017 11:59:24 +0800	[thread overview]
Message-ID: <482f1c96-ce6a-8b77-ce18-ec5f19cde800@gmail.com> (raw)
In-Reply-To: <1494501810-11822-2-git-send-email-pbonzini@redhat.com>



On 05/11/2017 07:23 PM, Paolo Bonzini wrote:
> This fixes the new ept_access_test_read_only and ept_access_test_read_write
> testcases from vmx.flat.
> 
> The problem is that gpte_access moves bits around to switch from EPT
> bit order (XWR) to ACC_*_MASK bit order (RWX).  This results in an
> incorrect exit qualification.  To fix this, make pt_access and
> pte_access operate on raw PTE values (only with NX flipped to mean
> "can execute") and call gpte_access at the end of the walk.  This
> lets us use pte_access to compute the exit qualification with XWR
> bit order.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   arch/x86/kvm/paging_tmpl.h | 35 +++++++++++++++++++++--------------
>   1 file changed, 21 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 56241746abbd..b0454c7e4cff 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>   	pt_element_t pte;
>   	pt_element_t __user *uninitialized_var(ptep_user);
>   	gfn_t table_gfn;
> -	unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
> +	u64 pt_access, pte_access;
> +	unsigned index, accessed_dirty, pte_pkey;
>   	unsigned nested_access;
>   	gpa_t pte_gpa;
>   	bool have_ad;
>   	int offset;
> +	u64 walk_nx_mask = 0;
>   	const int write_fault = access & PFERR_WRITE_MASK;
>   	const int user_fault  = access & PFERR_USER_MASK;
>   	const int fetch_fault = access & PFERR_FETCH_MASK;
> @@ -302,6 +304,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>   	have_ad       = PT_HAVE_ACCESSED_DIRTY(mmu);
>   
>   #if PTTYPE == 64
> +	walk_nx_mask = 1ULL << PT64_NX_SHIFT;

We can always make walk_nx_mask = 1ULL << PT64_NX_SHIFT, as:
- for EPT, this bit is useless
- for 32bit, bit 63 is always ZERO, so that the final result should be ZERO too,

>   	if (walker->level == PT32E_ROOT_LEVEL) {
>   		pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
>   		trace_kvm_mmu_paging_element(pte, walker->level);
> @@ -313,8 +316,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>   	walker->max_level = walker->level;
>   	ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
>   
> -	accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
> -
>   	/*
>   	 * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
>   	 * by the MOV to CR instruction are treated as reads and do not cause the
> @@ -322,14 +323,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>   	 */
>   	nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
>   
> -	pt_access = pte_access = ACC_ALL;
> +	pte_access = ~0;
>   	++walker->level;
>   
>   	do {
>   		gfn_t real_gfn;
>   		unsigned long host_addr;
>   
> -		pt_access &= pte_access;
> +		pt_access = pte_access;
>   		--walker->level;
>   
>   		index = PT_INDEX(addr, walker->level);
> @@ -371,6 +372,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>   
>   		trace_kvm_mmu_paging_element(pte, walker->level);
>   
> +		/*
> +		 * Inverting the NX it lets us AND it like other
> +		 * permission bits.
> +		 */
> +		pte_access = pt_access & (pte ^ walk_nx_mask);
> +
>   		if (unlikely(!FNAME(is_present_gpte)(pte)))
>   			goto error;
>   
> @@ -379,14 +386,16 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>   			goto error;
>   		}
>   
> -		accessed_dirty &= pte;
> -		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
> -
>   		walker->ptes[walker->level - 1] = pte;
>   	} while (!is_last_gpte(mmu, walker->level, pte));
>   
>   	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
> -	errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access);
> +	accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
> +
> +	/* Convert to ACC_*_MASK flags for struct guest_walker.  */
> +	walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
> +	walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask);
> +	errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
>   	if (unlikely(errcode))
>   		goto error;
>   
> @@ -403,7 +412,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>   	walker->gfn = real_gpa >> PAGE_SHIFT;
>   
>   	if (!write_fault)
> -		FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
> +		FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
>   	else
>   		/*
>   		 * On a write fault, fold the dirty bit into accessed_dirty.
> @@ -421,10 +430,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>   			goto retry_walk;
>   	}
>   
> -	walker->pt_access = pt_access;
> -	walker->pte_access = pte_access;
>   	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
> -		 __func__, (u64)pte, pte_access, pt_access);
> +		 __func__, (u64)pte, walker->pte_access, walker->pt_access);
>   	return 1;
>   
>   error:
> @@ -452,7 +459,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>   	 */
>   	if (!(errcode & PFERR_RSVD_MASK)) {
>   		vcpu->arch.exit_qualification &= 0x187;
> -		vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;
                                                                 ^ here, the original code
is buggy as pt_access and pte have different bit order, fortunately, this patch fixes it
too. :)

Otherwise it looks good to me, thanks for your fix.

Reviewed-by: Xiao Guangrong <xiaoguangrong@tencent.com>

next prev parent reply	other threads:[~2017-05-12  3:59 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-11 11:23 [PATCH 0/2] KVM: nVMX: nested EPT fixes Paolo Bonzini
2017-05-11 11:23 ` [PATCH 1/2] KVM: nVMX: fix EPT permissions as reported in exit qualification Paolo Bonzini
2017-05-12  3:59   ` Xiao Guangrong [this message]
2017-05-12  5:13     ` Xiao Guangrong
2017-05-11 11:23 ` [PATCH 2/2] KVM: nVMX: fix nEPT handling of guest page table accesses Paolo Bonzini
2017-05-12  7:38   ` Xiao Guangrong
2017-05-12  8:39     ` Paolo Bonzini
2017-05-16 14:03 ` [PATCH 0/2] KVM: nVMX: nested EPT fixes Radim Krčmář

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=482f1c96-ce6a-8b77-ce18-ec5f19cde800@gmail.com \
    --to=guangrong.xiao@gmail.com \
    --cc=dmatlack@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=pfeiner@google.com \
    --cc=rkrcmar@redhat.com \
    --cc=wanpeng.li@hotmail.com \
    --cc=xiaoguangrong@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox