From mboxrd@z Thu Jan 1 00:00:00 1970 From: Avi Kivity Subject: Re: RFC: Add reserved bits check Date: Fri, 27 Mar 2009 12:34:35 +0300 Message-ID: <49CC9DAB.8090802@redhat.com> References: <9832F13BD22FB94A829F798DA4A8280501A21068EF@pdsmsx503.ccr.corp.intel.com> <9832F13BD22FB94A829F798DA4A8280501A2106E6A@pdsmsx503.ccr.corp.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: "kvm@vger.kernel.org" To: "Dong, Eddie" Return-path: Received: from mx2.redhat.com ([66.187.237.31]:39244 "EHLO mx2.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757906AbZC0Jd7 (ORCPT ); Fri, 27 Mar 2009 05:33:59 -0400 In-Reply-To: <9832F13BD22FB94A829F798DA4A8280501A2106E6A@pdsmsx503.ccr.corp.intel.com> Sender: kvm-owner@vger.kernel.org List-ID: Dong, Eddie wrote: > > > > > Current KVM doesn't check reserved bits of guest page table, while may use reserved bits to bypass guest #PF in VMX. > > > > This patch add this check while leaving shadow pte un-constructed if guest RSVD=1. > > > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -261,6 +261,8 @@ struct kvm_mmu { > union kvm_mmu_page_role base_role; > > u64 *pae_root; > + u64 rsvd_bits_mask[4]; > + u64 large_page_rsvd_mask; > }; Make large_page_rsvd_mask() an array too, in preparation for 1GB pages? Perhaps u64 rsvd_bits_mask[2][4]. First index is bit 7 of the pte, second index is the level. Makes for faster run time too. > #define PT_DIRECTORY_LEVEL 2 > @@ -179,6 +180,13 @@ static u64 __read_mostly shadow_user_mask; > static u64 __read_mostly shadow_accessed_mask; > static u64 __read_mostly shadow_dirty_mask; > static u64 __read_mostly shadow_mt_mask; > +extern struct kvm_cpuid_entry2 *kvm_find_cpuid_entry( > + struct kvm_vcpu *vcpu, u32 function, u32 index); > This needs to be in a header file, so we don't get random breakage when the signature changes. > > +static int is_rsvd_bits_set(struct kvm_vcpu *vcpu, unsigned long pte, int level) > u64 pte... (and bool for return type) s/pte/gpte/ to make it clear. > @@ -2184,6 +2215,18 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) > > static int paging64_init_context(struct kvm_vcpu *vcpu) > { > + struct kvm_mmu *context = &vcpu->arch.mmu; > + int maxphyaddr = cpuid_maxphyaddr(vcpu); > + > + context->rsvd_bits_mask[3] = > + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); > + context->rsvd_bits_mask[2] = > + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); > + context->rsvd_bits_mask[1] = > + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); > + context->rsvd_bits_mask[0] = rsvd_bits(maxphyaddr, 51); > + context->large_page_rsvd_mask = /* 2MB PDE */ > + rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20); > return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); > } > Isn't bit 63 reserved if NX is disabled? > > @@ -2206,6 +2258,18 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) > > static int paging32E_init_context(struct kvm_vcpu *vcpu) > { > + struct kvm_mmu *context = &vcpu->arch.mmu; > + int maxphyaddr = cpuid_maxphyaddr(vcpu); > + > + /* 3 levels */ > + context->rsvd_bits_mask[2] = rsvd_bits(maxphyaddr, 63) | > + rsvd_bits(7, 8) | rsvd_bits(1,2); /* PDPTE */ > Will never be use, PDPTEs are loaded by set_cr3(), not walk_addr(). > > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 7314c09..844efe9 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, > gfn_t table_gfn; > unsigned index, pt_access, pte_access; > gpa_t pte_gpa; > + int rsvd_fault; > > pgprintk("%s: addr %lx\n", __func__, addr); > walk: > @@ -153,10 +154,13 @@ walk: > walker->level - 1, table_gfn); > > kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); > + rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); > Does a not present pte set PFERR_RSVD? > > if (!is_present_pte(pte)) > goto not_present; > > + if (rsvd_fault) > + goto access_error; > if (write_fault && !is_writeble_pte(pte)) > if (user_fault || is_write_protection(vcpu)) > goto access_error; > @@ -233,6 +237,8 @@ err: > walker->error_code |= PFERR_USER_MASK; > if (fetch_fault) > walker->error_code |= PFERR_FETCH_MASK; > + if (rsvd_fault) > + walker->error_code |= PFERR_RSVD_MASK; > return 0; > } > > > > -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain.