From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wei Wang Subject: Re: RFC: AMD support for paging Date: Wed, 15 Feb 2012 13:09:11 +0100 Message-ID: <4F3BA067.4010303@amd.com> References: <91f45eaceac6f38f9df39ed7d60c47a7.squirrel@webmail.lagarcavilla.org> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <91f45eaceac6f38f9df39ed7d60c47a7.squirrel@webmail.lagarcavilla.org> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: andres@lagarcavilla.org Cc: olaf@aepfle.de, xen-devel@lists.xensource.com, tim@xen.org, keir.xen@gmail.com, JBeulich@suse.com, adin@gridcentric.ca List-Id: xen-devel@lists.xenproject.org On 02/14/2012 08:05 PM, Andres Lagar-Cavilla wrote: > We started hashing out some AMD support for mem_paging and mem_access. > Right now my VMs boot, page out a bit, and then die on an HVM triple > fault. > > Most importantly, I want to get somebody from AMD to comment/help out on > this. It feels like we're inches away from enabling support for this very > nice feature. I'm not sure who exactly on AMD monitors the list for these > kinds of things. It'd be great to have you on board! > > For starters, the changes to the p2m code are relatively mild, but it'd be > great if somebody spots a red flag. > > Another issue: comments indicate that bits 59-62 in NPT entries are used > for IOMMU flags but effectively bits 61-62 are. Repossessing one bit (59) > would give us enough space to support mem_access. Right now we only have 7 > bits available for Xen flags and that is not enough for paging and access. > Is bit 59 effectively reserved? Hi bit 59 is used by iommu hardware for ATS response. In most cases, for p2m_ram_rw pages, U bit must be 0. But maybe for other page types that are not potentially used by DMA, this bit could be non-zero. I could tested it on my iommu machines if you had some patches that use U bits. Thanks, Wei > > Finally, the triple fault. Maybe I'm missing something obvious. Comments > welcome. > > Patch inline below, thanks! > Andres > > Enable AMD support for paging. > > Signed-off-by: Andres Lagar-Cavilla > Signed-off-by: Adin Scannell > > diff -r 25ca78889ed4 -r 10ca4e4293ce xen/arch/x86/mm/mem_event.c > --- a/xen/arch/x86/mm/mem_event.c > +++ b/xen/arch/x86/mm/mem_event.c > @@ -537,10 +537,6 @@ int mem_event_domctl(struct domain *d, x > if ( !hap_enabled(d) ) > break; > > - /* Currently only EPT is supported */ > - if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) > - break; > - > rc = -EXDEV; > /* Disallow paging in a PoD guest */ > if ( p2m->pod.entry_count ) > diff -r 25ca78889ed4 -r 10ca4e4293ce xen/arch/x86/mm/p2m-pt.c > --- a/xen/arch/x86/mm/p2m-pt.c > +++ b/xen/arch/x86/mm/p2m-pt.c > @@ -53,6 +53,20 @@ > #define P2M_BASE_FLAGS \ > (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED) > > +#ifdef __x86_64__ > +/* l1e_from_pfn is not designed to have INVALID_MFN stored. The 0xff..ff > + * value tramples over the higher-order bits used for flags (NX, p2mt, > + * etc.) This happens for paging entries. Thus we do this clip/unclip > + * juggle for l1 entries only (no paging superpages!) */ > +#define EFF_MFN_WIDTH (PADDR_BITS-PAGE_SHIFT) /* 40 bits */ > +#define clipped_mfn(mfn) ((mfn)& ((1UL<< EFF_MFN_WIDTH) - 1)) > +#define unclip_mfn(mfn) ((clipped_mfn((mfn)) == INVALID_MFN) ? \ > + INVALID_MFN : (mfn)) > +#else > +#define clipped_mfn(mfn) (mfn) > +#define unclip_mfn(mfn) (mfn) > +#endif /* __x86_64__ */ > + > static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn) > { > unsigned long flags; > @@ -77,6 +91,9 @@ static unsigned long p2m_type_to_flags(p > case p2m_invalid: > case p2m_mmio_dm: > case p2m_populate_on_demand: > + case p2m_ram_paging_out: > + case p2m_ram_paged: > + case p2m_ram_paging_in: > default: > return flags; > case p2m_ram_ro: > @@ -168,7 +185,7 @@ p2m_next_level(struct p2m_domain *p2m, m > shift, max)) ) > return 0; > > - /* PoD: Not present doesn't imply empty. */ > + /* PoD/paging: Not present doesn't imply empty. */ > if ( !l1e_get_flags(*p2m_entry) ) > { > struct page_info *pg; > @@ -384,8 +401,9 @@ p2m_set_entry(struct p2m_domain *p2m, un > 0, L1_PAGETABLE_ENTRIES); > ASSERT(p2m_entry); > > - if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) ) > - entry_content = l1e_from_pfn(mfn_x(mfn), > + if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) || > + (p2mt == p2m_ram_paged) || (p2mt == p2m_ram_paging_in) ) > + entry_content = l1e_from_pfn(clipped_mfn(mfn_x(mfn)), > p2m_type_to_flags(p2mt, mfn)); > else > entry_content = l1e_empty(); > @@ -393,7 +411,7 @@ p2m_set_entry(struct p2m_domain *p2m, un > if ( entry_content.l1 != 0 ) > { > p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); > - old_mfn = l1e_get_pfn(*p2m_entry); > + old_mfn = unclip_mfn(l1e_get_pfn(*p2m_entry)); > } > /* level 1 entry */ > p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, > entry_content, 1); > @@ -615,11 +633,12 @@ pod_retry_l1: > sizeof(l1e)); > > if ( ret == 0 ) { > + unsigned long l1e_mfn = unclip_mfn(l1e_get_pfn(l1e)); > p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); > - ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt)); > + ASSERT( (l1e_mfn != INVALID_MFN || !p2m_is_ram(p2mt)) || > + (l1e_mfn == INVALID_MFN&& p2m_is_paging(p2mt)) ); > > - if ( p2m_flags_to_type(l1e_get_flags(l1e)) > - == p2m_populate_on_demand ) > + if ( p2mt == p2m_populate_on_demand ) > { > /* The read has succeeded, so we know that the mapping > * exits at this point. */ > @@ -641,7 +660,7 @@ pod_retry_l1: > } > > if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) ) > - mfn = _mfn(l1e_get_pfn(l1e)); > + mfn = _mfn(l1e_mfn); > else > /* XXX see above */ > p2mt = p2m_mmio_dm; > @@ -783,18 +802,26 @@ pod_retry_l2: > pod_retry_l1: > if ( (l1e_get_flags(*l1e)& _PAGE_PRESENT) == 0 ) > { > + p2m_type_t l1t = p2m_flags_to_type(l1e_get_flags(*l1e)); > /* PoD: Try to populate */ > - if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == > p2m_populate_on_demand ) > + if ( l1t == p2m_populate_on_demand ) > { > if ( q != p2m_query ) { > if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) ) > goto pod_retry_l1; > } else > *t = p2m_populate_on_demand; > + } else { > + if ( p2m_is_paging(l1t) ) > + { > + *t = l1t; > + /* No need to unclip due to check below */ > + mfn = _mfn(l1e_get_pfn(*l1e)); > + } > } > > unmap_domain_page(l1e); > - return _mfn(INVALID_MFN); > + return (l1t == p2m_ram_paging_out) ? mfn : _mfn(INVALID_MFN); > } > mfn = _mfn(l1e_get_pfn(*l1e)); > *t = p2m_flags_to_type(l1e_get_flags(*l1e)); > @@ -914,7 +941,7 @@ static void p2m_change_type_global(struc > flags = l1e_get_flags(l1e[i1]); > if ( p2m_flags_to_type(flags) != ot ) > continue; > - mfn = l1e_get_pfn(l1e[i1]); > + mfn = unclip_mfn(l1e_get_pfn(l1e[i1])); > gfn = i1 + (i2 + (i3 > #if CONFIG_PAGING_LEVELS>= 4 > + (i4 * L3_PAGETABLE_ENTRIES) > @@ -923,7 +950,7 @@ static void p2m_change_type_global(struc > * L2_PAGETABLE_ENTRIES) * L1_PAGETABLE_ENTRIES; > /* create a new 1le entry with the new type */ > flags = p2m_type_to_flags(nt, _mfn(mfn)); > - l1e_content = l1e_from_pfn(mfn, flags); > + l1e_content = l1e_from_pfn(clipped_mfn(mfn), flags); > p2m->write_p2m_entry(p2m, gfn,&l1e[i1], > l1mfn, l1e_content, 1); > } > @@ -1073,7 +1100,7 @@ long p2m_pt_audit_p2m(struct p2m_domain > entry_count++; > continue; > } > - mfn = l1e_get_pfn(l1e[i1]); > + mfn = unclip_mfn(l1e_get_pfn(l1e[i1])); > ASSERT(mfn_valid(_mfn(mfn))); > m2pfn = get_gpfn_from_mfn(mfn); > if ( m2pfn != gfn&& > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel >