From mboxrd@z Thu Jan 1 00:00:00 1970 From: Christoph Egger Subject: Re: [PATCH] support 1gb pages in guest page table walker Date: Wed, 27 Jul 2011 15:15:49 +0200 Message-ID: <4E300F85.6050405@amd.com> References: <4E298E8F.1040306@amd.com> <4E29B6A7020000780004F3B6@nat28.tlf.novell.com> <4E2D3538.1060106@amd.com> <4E2D53F9020000780004F67E@nat28.tlf.novell.com> <4E2D3B81.9000908@amd.com> <20110725102546.GA8970@whitby.uk.xensource.com> <20110725103028.GB8970@whitby.uk.xensource.com> <4E2EAEFD.2090405@amd.com> <20110726131907.GK8970@whitby.uk.xensource.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------010003000906070003000500" Return-path: In-Reply-To: <20110726131907.GK8970@whitby.uk.xensource.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: Tim Deegan Cc: Jan, "xen-devel@lists.xensource.com" , Beulich List-Id: xen-devel@lists.xenproject.org --------------010003000906070003000500 Content-Type: text/plain; charset="ISO-8859-1"; format=flowed Content-Transfer-Encoding: 7bit On 07/26/11 15:19, Tim Deegan wrote: > At 14:11 +0200 on 26 Jul (1311689501), Christoph Egger wrote: >> On 07/25/11 12:30, Tim Deegan wrote: >>> At 11:25 +0100 on 25 Jul (1311593146), Tim Deegan wrote: >>>> Also this function should be called from the CPUID trap handler to make >>>> sure we never advertise PSE1GB when we're not going to support it. >>> >>> Er, not this function exactly, since CPUID should report the feature >>> even when the guest's not in long_mode. I think it needs a >>> hvm_pse1G_supported that can be called from CPUID, and then >>> guest_supports_1G_superpages() boils down to >>> "(GUEST_PAGING_LEVEL>= 4)&& hvm_pse1G_supported(v)" >> >> New version attached. I removed the fake l1e calculation. > > If you just remove it, you need to update hap_p2m_ga_to_gfn to figure > out the GFN some other way! I expect that's what's causing your > problem. You should either provide the fake l1e, and say why in the > comment, or audit all callers of the function to make sure they don't > need it. Ah, I see now. I misunderstood you. I forgot to increase the pfn by the right number of 2m pages for the l1e calculation. Now it works for me: The l2 guest boots and passes my tests. -- ---to satisfy European Law for business letters: Advanced Micro Devices GmbH Einsteinring 24, 85689 Dornach b. Muenchen Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen Registergericht Muenchen, HRB Nr. 43632 --------------010003000906070003000500 Content-Type: text/plain; name="xen_pagewalk.diff" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="xen_pagewalk.diff" Content-Description: xen_pagewalk.diff # HG changeset patch # User cegger # Date 1311772079 -7200 support 1gb pages for guests Signed-off-by: Christoph Egger diff -r 4f2c59fb28e6 -r ea91116ba394 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2389,6 +2389,7 @@ void hvm_cpuid(unsigned int input, unsig unsigned int *ecx, unsigned int *edx) { struct vcpu *v = current; + struct domain *d = v->domain; unsigned int count = *ecx; if ( cpuid_viridian_leaves(input, eax, ebx, ecx, edx) ) @@ -2397,7 +2398,7 @@ void hvm_cpuid(unsigned int input, unsig if ( cpuid_hypervisor_leaves(input, count, eax, ebx, ecx, edx) ) return; - domain_cpuid(v->domain, input, *ecx, eax, ebx, ecx, edx); + domain_cpuid(d, input, *ecx, eax, ebx, ecx, edx); switch ( input ) { @@ -2433,7 +2434,7 @@ void hvm_cpuid(unsigned int input, unsig { if ( !(v->arch.xcr0 & (1ULL << sub_leaf)) ) continue; - domain_cpuid(v->domain, input, sub_leaf, &_eax, &_ebx, &_ecx, + domain_cpuid(d, input, sub_leaf, &_eax, &_ebx, &_ecx, &_edx); if ( (_eax + _ebx) > *ebx ) *ebx = _eax + _ebx; @@ -2444,9 +2445,13 @@ void hvm_cpuid(unsigned int input, unsig case 0x80000001: /* We expose RDTSCP feature to guest only when tsc_mode == TSC_MODE_DEFAULT and host_tsc_is_safe() returns 1 */ - if ( v->domain->arch.tsc_mode != TSC_MODE_DEFAULT || + if ( d->arch.tsc_mode != TSC_MODE_DEFAULT || !host_tsc_is_safe() ) *edx &= ~cpufeat_mask(X86_FEATURE_RDTSCP); + /* Expose 1gb page feature for HVM HAP guests and hw support is + * available. */ + if (!hvm_pse1gb_supported(d)) + *edx &= ~cpufeat_mask(X86_FEATURE_PAGE1GB); break; } } diff -r 4f2c59fb28e6 -r ea91116ba394 xen/arch/x86/mm/guest_walk.c --- a/xen/arch/x86/mm/guest_walk.c +++ b/xen/arch/x86/mm/guest_walk.c @@ -134,7 +134,8 @@ guest_walk_tables(struct vcpu *v, struct guest_l4e_t *l4p; #endif uint32_t gflags, mflags, iflags, rc = 0; - int pse, smep; + int smep; + bool_t pse1G = 0, pse2M = 0; perfc_incr(guest_walk); memset(gw, 0, sizeof(*gw)); @@ -156,6 +157,9 @@ guest_walk_tables(struct vcpu *v, struct if ( smep ) mflags |= _PAGE_USER; +#define GUEST_L2_GFN_ALIGN (1 << (GUEST_L2_PAGETABLE_SHIFT - \ + GUEST_L1_PAGETABLE_SHIFT)) + #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ @@ -182,6 +186,49 @@ guest_walk_tables(struct vcpu *v, struct if ( rc & _PAGE_PRESENT ) goto out; + pse1G = (guest_supports_1G_superpages(v) && + (guest_l3e_get_flags(gw->l3e) & _PAGE_PSE)); + + if ( pse1G ) + { + /* Shadow paging doesn't support 1gb pages but hap_p2m_ga_to_gfn + * does, so we generate a shadow l1 table entry. */ + gfn_t start = guest_l3e_get_gfn(gw->l3e); + /* Grant full access in the l1e, since all the guest entry's + * access controls are enforced in the shadow l3e. */ + int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| + _PAGE_ACCESSED|_PAGE_DIRTY); + /* Import cache-control bits. Note that _PAGE_PAT is actually + * _PAGE_PSE, and it is always set. We will clear it in case + * _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. */ + flags |= (guest_l3e_get_flags(gw->l3e) + & (_PAGE_PAT|_PAGE_PWT|_PAGE_PCD)); + if ( !(gfn_x(start) & 1) ) + /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ + flags &= ~_PAGE_PAT; + +#define GUEST_L3_GFN_ALIGN (1U << (GUEST_L3_PAGETABLE_SHIFT - \ + GUEST_L1_PAGETABLE_SHIFT)) + if ( gfn_x(start) & (GUEST_L3_GFN_ALIGN - 1) & ~0x1 ) + { + rc |= _PAGE_INVALID_BITS; + } + + /* Increment the pfn by the right number of 2m pages. + * Mask out PAT and invalid bits. */ + start = _gfn((gfn_x(start) & ~(GUEST_L3_GFN_ALIGN - 1)) + + (guest_l2_table_offset(va) << + (GUEST_L2_PAGETABLE_SHIFT - GUEST_L1_PAGETABLE_SHIFT))); + /* Increment the pfn by the right number of 4k pages. + * Mask out PAT and invalid bits. */ + start = _gfn((gfn_x(start) & ~(GUEST_L2_GFN_ALIGN - 1)) + + guest_l1_table_offset(va)); + gw->l1e = guest_l1e_from_gfn(start, flags); + gw->l2mfn = gw->l1mfn = _mfn(INVALID_MFN); + goto set_ad; +#undef GUEST_L3_GFN_ALIGN + } + #else /* PAE only... */ /* Get the l3e and check its flag */ @@ -219,10 +266,10 @@ guest_walk_tables(struct vcpu *v, struct if ( rc & _PAGE_PRESENT ) goto out; - pse = (guest_supports_superpages(v) && + pse2M = (guest_supports_superpages(v) && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)); - if ( pse ) + if ( pse2M ) { /* Special case: this guest VA is in a PSE superpage, so there's * no guest l1e. We make one up so that the propagation code @@ -242,8 +289,6 @@ guest_walk_tables(struct vcpu *v, struct /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ flags &= ~_PAGE_PAT; -#define GUEST_L2_GFN_ALIGN (1 << (GUEST_L2_PAGETABLE_SHIFT - \ - GUEST_L1_PAGETABLE_SHIFT)) if ( gfn_x(start) & (GUEST_L2_GFN_ALIGN - 1) & ~0x1 ) { #if GUEST_PAGING_LEVELS == 2 @@ -282,6 +327,9 @@ guest_walk_tables(struct vcpu *v, struct rc |= ((gflags & mflags) ^ mflags); } +#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ +set_ad: +#endif /* Now re-invert the user-mode requirement for SMEP. */ if ( smep ) rc ^= _PAGE_USER; @@ -295,17 +343,21 @@ guest_walk_tables(struct vcpu *v, struct #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */ if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) ) paging_mark_dirty(d, mfn_x(gw->l4mfn)); - if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) ) + if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, + (pse1G && (pfec & PFEC_write_access))) ) paging_mark_dirty(d, mfn_x(gw->l3mfn)); #endif - if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e, - (pse && (pfec & PFEC_write_access))) ) - paging_mark_dirty(d, mfn_x(gw->l2mfn)); - if ( !pse ) + if ( !pse1G ) { - if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, - (pfec & PFEC_write_access)) ) - paging_mark_dirty(d, mfn_x(gw->l1mfn)); + if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e, + (pse2M && (pfec & PFEC_write_access))) ) + paging_mark_dirty(d, mfn_x(gw->l2mfn)); + if ( !pse2M ) + { + if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, + (pfec & PFEC_write_access)) ) + paging_mark_dirty(d, mfn_x(gw->l1mfn)); + } } } diff -r 4f2c59fb28e6 -r ea91116ba394 xen/include/asm-x86/guest_pt.h --- a/xen/include/asm-x86/guest_pt.h +++ b/xen/include/asm-x86/guest_pt.h @@ -194,6 +194,12 @@ guest_supports_superpages(struct vcpu *v } static inline int +guest_supports_1G_superpages(struct vcpu *v) +{ + return (GUEST_PAGING_LEVELS >= 4 && guest_supports_superpages(v)); +} + +static inline int guest_supports_nx(struct vcpu *v) { if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx ) diff -r 4f2c59fb28e6 -r ea91116ba394 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -224,6 +224,9 @@ int hvm_girq_dest_2_vcpu_id(struct domai #define hvm_hap_has_2mb(d) \ (hvm_funcs.hap_capabilities & HVM_HAP_SUPERPAGE_2MB) +#define hvm_pse1gb_supported(d) \ + (cpu_has_page1gb && hvm_hap_has_1gb(d) && paging_mode_hap(d)) + #ifdef __x86_64__ #define hvm_long_mode_enabled(v) \ ((v)->arch.hvm_vcpu.guest_efer & EFER_LMA) --------------010003000906070003000500 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel --------------010003000906070003000500--