From mboxrd@z Thu Jan 1 00:00:00 1970 From: Avi Kivity Subject: Re: [PATCH 02/10] nEPT: MMU context for nested EPT Date: Sun, 13 Nov 2011 16:32:52 +0200 Message-ID: <4EBFD514.4030501@redhat.com> References: <1320919040-nyh@il.ibm.com> <201111100958.pAA9wrIv019614@rice.haifa.ibm.com> <4EBBC848.7050400@redhat.com> <20111110144027.GB3327@fermat.math.technion.ac.il> <4EBBEB65.8050600@redhat.com> <20111110200532.GA17475@fermat.math.technion.ac.il> <4EBE4CE0.7070708@redhat.com> <20111112213747.GA741@fermat.math.technion.ac.il> <4EBFAA47.406@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Cc: "Nadav Har'El" , kvm@vger.kernel.org, "Roedel, Joerg" , abelg@il.ibm.com To: Orit Wasserman Return-path: Received: from mx1.redhat.com ([209.132.183.28]:62961 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753781Ab1KMOdF (ORCPT ); Sun, 13 Nov 2011 09:33:05 -0500 In-Reply-To: <4EBFAA47.406@redhat.com> Sender: kvm-owner@vger.kernel.org List-ID: On 11/13/2011 01:30 PM, Orit Wasserman wrote: > Maybe this patch can help, this is roughly what Avi wants (I hope) done very quickly. > I'm sorry I don't have setup to run nested VMX at the moment so i can't test it. > > Orit > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 9335e1b..bbe212f 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -3180,6 +3180,10 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, > #include "paging_tmpl.h" > #undef PTTYPE > > +#define PTTYPE EPT > +#include "paging_tmpl.h" > +#undef PTTYPE > + Yes, that's the key. > int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); > void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); > int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 507e2b8..70d4cfd 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -39,6 +39,21 @@ > #define CMPXCHG cmpxchg64 > #define PT_MAX_FULL_LEVELS 2 > #endif > +#elif PTTYPE == EPT > + #define pt_element_t u64 > + #define FNAME(name) EPT_##name > + #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK > + #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) > + #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) > + #define PT_INDEX(addr, level) PT64_INDEX(addr, level) > + #define PT_LEVEL_BITS PT64_LEVEL_BITS > + #ifdef CONFIG_X86_64 > + #define PT_MAX_FULL_LEVELS 4 > + #define CMPXCHG cmpxchg > + #else > + #define CMPXCHG cmpxchg64 > + #define PT_MAX_FULL_LEVELS 2 > + #endif The various masks should be defined here, to avoid lots of #ifdefs later. > #elif PTTYPE == 32 > #define pt_element_t u32 > #define guest_walker guest_walker32 > @@ -106,14 +121,19 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, > { > unsigned access; > > +#if PTTYPE == EPT > access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; > +#else > + access = (gpte & EPT_WRITABLE_MASK) | EPT_EXEC_MASK; > if (last && !is_dirty_gpte(gpte)) > access &= ~ACC_WRITE_MASK; > +#endif Like here, you could make is_dirty_gpte() local to paging_tmpl() returning true for EPT and the dirty bit otherwise. > > #if PTTYPE == 64 > if (vcpu->arch.mmu.nx) > access &= ~(gpte >> PT64_NX_SHIFT); The ept X bit is lost. Could do something like access &= (gpte >> PT_X_NX_SHIFT) ^ PT_X_NX_SENSE; > +#if PTTYPE == EPT > + const int write_fault = access & EPT_WRITABLE_MASK; > + const int user_fault = 0; > + const int fetch_fault = 0; > +#else EPT has fetch permissions (but not user permissions); anyway translate_nested_gpa() already does this. > const int write_fault = access & PFERR_WRITE_MASK; > const int user_fault = access & PFERR_USER_MASK; > const int fetch_fault = access & PFERR_FETCH_MASK; > +#endif > u16 errcode = 0; > > trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, > @@ -174,6 +200,9 @@ retry_walk: > (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); > > pt_access = ACC_ALL; > +#if PTTYPE == EPT > + pt_access = PT_PRESENT_MASK | EPT_WRITABLE_MASK | EPT_EXEC_MASK; > +#endif pt_access is not in EPT or ia32 format - it's our own format (xwu). So this doesn't need changing. Updating gpte_access() is sufficient. > > for (;;) { > gfn_t real_gfn; > @@ -186,9 +215,14 @@ retry_walk: > pte_gpa = gfn_to_gpa(table_gfn) + offset; > walker->table_gfn[walker->level - 1] = table_gfn; > walker->pte_gpa[walker->level - 1] = pte_gpa; > - > +#if PTTYPE == EPT > + real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), > + EPT_WRITABLE_MASK); > +#else > real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), > PFERR_USER_MASK|PFERR_WRITE_MASK); > +#endif > + Unneeded, I think. > if (unlikely(real_gfn == UNMAPPED_GVA)) > goto error; > real_gfn = gpa_to_gfn(real_gfn); > @@ -221,6 +255,7 @@ retry_walk: > eperm = true; > #endif > > +#if PTTYPE != EPT > if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { > int ret; > trace_kvm_mmu_set_accessed_bit(table_gfn, index, > @@ -235,7 +270,7 @@ retry_walk: > mark_page_dirty(vcpu->kvm, table_gfn); > pte |= PT_ACCESSED_MASK; > } > - > +#endif If PT_ACCESSED_MASK is 0 for EPT, this goes away without #ifdef. > +#if PTTYPE != EPT > /* check if the kernel is fetching from user page */ > if (unlikely(pte_access & PT_USER_MASK) && > kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) > if (fetch_fault && !user_fault) > eperm = true; > - > +#endif Same here. -- error compiling committee.c: too many arguments to function