From mboxrd@z Thu Jan 1 00:00:00 1970 From: Xiao Guangrong Subject: Re: [PATCH 1/2] KVM: nVMX: fix EPT permissions as reported in exit qualification Date: Fri, 12 May 2017 11:59:24 +0800 Message-ID: <482f1c96-ce6a-8b77-ce18-ec5f19cde800@gmail.com> References: <1494501810-11822-1-git-send-email-pbonzini@redhat.com> <1494501810-11822-2-git-send-email-pbonzini@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Cc: Peter Feiner , David Matlack , =?UTF-8?B?UmFkaW0gS3LEjW3DocWZ?= , Xiao Guangrong , Wanpeng Li To: Paolo Bonzini , linux-kernel@vger.kernel.org, kvm@vger.kernel.org Return-path: In-Reply-To: <1494501810-11822-2-git-send-email-pbonzini@redhat.com> Content-Language: en-US Sender: linux-kernel-owner@vger.kernel.org List-Id: kvm.vger.kernel.org On 05/11/2017 07:23 PM, Paolo Bonzini wrote: > This fixes the new ept_access_test_read_only and ept_access_test_read_write > testcases from vmx.flat. > > The problem is that gpte_access moves bits around to switch from EPT > bit order (XWR) to ACC_*_MASK bit order (RWX). This results in an > incorrect exit qualification. To fix this, make pt_access and > pte_access operate on raw PTE values (only with NX flipped to mean > "can execute") and call gpte_access at the end of the walk. This > lets us use pte_access to compute the exit qualification with XWR > bit order. > > Signed-off-by: Paolo Bonzini > --- > arch/x86/kvm/paging_tmpl.h | 35 +++++++++++++++++++++-------------- > 1 file changed, 21 insertions(+), 14 deletions(-) > > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 56241746abbd..b0454c7e4cff 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > pt_element_t pte; > pt_element_t __user *uninitialized_var(ptep_user); > gfn_t table_gfn; > - unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; > + u64 pt_access, pte_access; > + unsigned index, accessed_dirty, pte_pkey; > unsigned nested_access; > gpa_t pte_gpa; > bool have_ad; > int offset; > + u64 walk_nx_mask = 0; > const int write_fault = access & PFERR_WRITE_MASK; > const int user_fault = access & PFERR_USER_MASK; > const int fetch_fault = access & PFERR_FETCH_MASK; > @@ -302,6 +304,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); > > #if PTTYPE == 64 > + walk_nx_mask = 1ULL << PT64_NX_SHIFT; We can always make walk_nx_mask = 1ULL << PT64_NX_SHIFT, as: - for EPT, this bit is useless - for 32bit, bit 63 is always ZERO, so that the final result should be ZERO too, > if (walker->level == PT32E_ROOT_LEVEL) { > pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); > trace_kvm_mmu_paging_element(pte, walker->level); > @@ -313,8 +316,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > walker->max_level = walker->level; > ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); > > - accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0; > - > /* > * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging > * by the MOV to CR instruction are treated as reads and do not cause the > @@ -322,14 +323,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > */ > nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; > > - pt_access = pte_access = ACC_ALL; > + pte_access = ~0; > ++walker->level; > > do { > gfn_t real_gfn; > unsigned long host_addr; > > - pt_access &= pte_access; > + pt_access = pte_access; > --walker->level; > > index = PT_INDEX(addr, walker->level); > @@ -371,6 +372,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > > trace_kvm_mmu_paging_element(pte, walker->level); > > + /* > + * Inverting the NX it lets us AND it like other > + * permission bits. > + */ > + pte_access = pt_access & (pte ^ walk_nx_mask); > + > if (unlikely(!FNAME(is_present_gpte)(pte))) > goto error; > > @@ -379,14 +386,16 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > goto error; > } > > - accessed_dirty &= pte; > - pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); > - > walker->ptes[walker->level - 1] = pte; > } while (!is_last_gpte(mmu, walker->level, pte)); > > pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); > - errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access); > + accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; > + > + /* Convert to ACC_*_MASK flags for struct guest_walker. */ > + walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask); > + walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask); > + errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); > if (unlikely(errcode)) > goto error; > > @@ -403,7 +412,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > walker->gfn = real_gpa >> PAGE_SHIFT; > > if (!write_fault) > - FNAME(protect_clean_gpte)(mmu, &pte_access, pte); > + FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte); > else > /* > * On a write fault, fold the dirty bit into accessed_dirty. > @@ -421,10 +430,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > goto retry_walk; > } > > - walker->pt_access = pt_access; > - walker->pte_access = pte_access; > pgprintk("%s: pte %llx pte_access %x pt_access %x\n", > - __func__, (u64)pte, pte_access, pt_access); > + __func__, (u64)pte, walker->pte_access, walker->pt_access); > return 1; > > error: > @@ -452,7 +459,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > */ > if (!(errcode & PFERR_RSVD_MASK)) { > vcpu->arch.exit_qualification &= 0x187; > - vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; ^ here, the original code is buggy as pt_access and pte have different bit order, fortunately, this patch fixes it too. :) Otherwise it looks good to me, thanks for your fix. Reviewed-by: Xiao Guangrong