From: Wei Wang <wei.wang2@amd.com>
To: andres@lagarcavilla.org
Cc: olaf@aepfle.de, xen-devel@lists.xensource.com, tim@xen.org,
keir.xen@gmail.com, JBeulich@suse.com, adin@gridcentric.ca
Subject: Re: RFC: AMD support for paging
Date: Wed, 15 Feb 2012 13:09:11 +0100 [thread overview]
Message-ID: <4F3BA067.4010303@amd.com> (raw)
In-Reply-To: <91f45eaceac6f38f9df39ed7d60c47a7.squirrel@webmail.lagarcavilla.org>
On 02/14/2012 08:05 PM, Andres Lagar-Cavilla wrote:
> We started hashing out some AMD support for mem_paging and mem_access.
> Right now my VMs boot, page out a bit, and then die on an HVM triple
> fault.
>
> Most importantly, I want to get somebody from AMD to comment/help out on
> this. It feels like we're inches away from enabling support for this very
> nice feature. I'm not sure who exactly on AMD monitors the list for these
> kinds of things. It'd be great to have you on board!
>
> For starters, the changes to the p2m code are relatively mild, but it'd be
> great if somebody spots a red flag.
>
> Another issue: comments indicate that bits 59-62 in NPT entries are used
> for IOMMU flags but effectively bits 61-62 are. Repossessing one bit (59)
> would give us enough space to support mem_access. Right now we only have 7
> bits available for Xen flags and that is not enough for paging and access.
> Is bit 59 effectively reserved?
Hi
bit 59 is used by iommu hardware for ATS response. In most cases, for
p2m_ram_rw pages, U bit must be 0. But maybe for other page types that
are not potentially used by DMA, this bit could be non-zero. I could
tested it on my iommu machines if you had some patches that use U bits.
Thanks,
Wei
>
> Finally, the triple fault. Maybe I'm missing something obvious. Comments
> welcome.
>
> Patch inline below, thanks!
> Andres
>
> Enable AMD support for paging.
>
> Signed-off-by: Andres Lagar-Cavilla<andres@lagarcavilla.org>
> Signed-off-by: Adin Scannell<adin@scannell.ca>
>
> diff -r 25ca78889ed4 -r 10ca4e4293ce xen/arch/x86/mm/mem_event.c
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -537,10 +537,6 @@ int mem_event_domctl(struct domain *d, x
> if ( !hap_enabled(d) )
> break;
>
> - /* Currently only EPT is supported */
> - if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
> - break;
> -
> rc = -EXDEV;
> /* Disallow paging in a PoD guest */
> if ( p2m->pod.entry_count )
> diff -r 25ca78889ed4 -r 10ca4e4293ce xen/arch/x86/mm/p2m-pt.c
> --- a/xen/arch/x86/mm/p2m-pt.c
> +++ b/xen/arch/x86/mm/p2m-pt.c
> @@ -53,6 +53,20 @@
> #define P2M_BASE_FLAGS \
> (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
>
> +#ifdef __x86_64__
> +/* l1e_from_pfn is not designed to have INVALID_MFN stored. The 0xff..ff
> + * value tramples over the higher-order bits used for flags (NX, p2mt,
> + * etc.) This happens for paging entries. Thus we do this clip/unclip
> + * juggle for l1 entries only (no paging superpages!) */
> +#define EFF_MFN_WIDTH (PADDR_BITS-PAGE_SHIFT) /* 40 bits */
> +#define clipped_mfn(mfn) ((mfn)& ((1UL<< EFF_MFN_WIDTH) - 1))
> +#define unclip_mfn(mfn) ((clipped_mfn((mfn)) == INVALID_MFN) ? \
> + INVALID_MFN : (mfn))
> +#else
> +#define clipped_mfn(mfn) (mfn)
> +#define unclip_mfn(mfn) (mfn)
> +#endif /* __x86_64__ */
> +
> static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn)
> {
> unsigned long flags;
> @@ -77,6 +91,9 @@ static unsigned long p2m_type_to_flags(p
> case p2m_invalid:
> case p2m_mmio_dm:
> case p2m_populate_on_demand:
> + case p2m_ram_paging_out:
> + case p2m_ram_paged:
> + case p2m_ram_paging_in:
> default:
> return flags;
> case p2m_ram_ro:
> @@ -168,7 +185,7 @@ p2m_next_level(struct p2m_domain *p2m, m
> shift, max)) )
> return 0;
>
> - /* PoD: Not present doesn't imply empty. */
> + /* PoD/paging: Not present doesn't imply empty. */
> if ( !l1e_get_flags(*p2m_entry) )
> {
> struct page_info *pg;
> @@ -384,8 +401,9 @@ p2m_set_entry(struct p2m_domain *p2m, un
> 0, L1_PAGETABLE_ENTRIES);
> ASSERT(p2m_entry);
>
> - if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
> - entry_content = l1e_from_pfn(mfn_x(mfn),
> + if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) ||
> + (p2mt == p2m_ram_paged) || (p2mt == p2m_ram_paging_in) )
> + entry_content = l1e_from_pfn(clipped_mfn(mfn_x(mfn)),
> p2m_type_to_flags(p2mt, mfn));
> else
> entry_content = l1e_empty();
> @@ -393,7 +411,7 @@ p2m_set_entry(struct p2m_domain *p2m, un
> if ( entry_content.l1 != 0 )
> {
> p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
> - old_mfn = l1e_get_pfn(*p2m_entry);
> + old_mfn = unclip_mfn(l1e_get_pfn(*p2m_entry));
> }
> /* level 1 entry */
> p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn,
> entry_content, 1);
> @@ -615,11 +633,12 @@ pod_retry_l1:
> sizeof(l1e));
>
> if ( ret == 0 ) {
> + unsigned long l1e_mfn = unclip_mfn(l1e_get_pfn(l1e));
> p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
> - ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
> + ASSERT( (l1e_mfn != INVALID_MFN || !p2m_is_ram(p2mt)) ||
> + (l1e_mfn == INVALID_MFN&& p2m_is_paging(p2mt)) );
>
> - if ( p2m_flags_to_type(l1e_get_flags(l1e))
> - == p2m_populate_on_demand )
> + if ( p2mt == p2m_populate_on_demand )
> {
> /* The read has succeeded, so we know that the mapping
> * exits at this point. */
> @@ -641,7 +660,7 @@ pod_retry_l1:
> }
>
> if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) )
> - mfn = _mfn(l1e_get_pfn(l1e));
> + mfn = _mfn(l1e_mfn);
> else
> /* XXX see above */
> p2mt = p2m_mmio_dm;
> @@ -783,18 +802,26 @@ pod_retry_l2:
> pod_retry_l1:
> if ( (l1e_get_flags(*l1e)& _PAGE_PRESENT) == 0 )
> {
> + p2m_type_t l1t = p2m_flags_to_type(l1e_get_flags(*l1e));
> /* PoD: Try to populate */
> - if ( p2m_flags_to_type(l1e_get_flags(*l1e)) ==
> p2m_populate_on_demand )
> + if ( l1t == p2m_populate_on_demand )
> {
> if ( q != p2m_query ) {
> if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) )
> goto pod_retry_l1;
> } else
> *t = p2m_populate_on_demand;
> + } else {
> + if ( p2m_is_paging(l1t) )
> + {
> + *t = l1t;
> + /* No need to unclip due to check below */
> + mfn = _mfn(l1e_get_pfn(*l1e));
> + }
> }
>
> unmap_domain_page(l1e);
> - return _mfn(INVALID_MFN);
> + return (l1t == p2m_ram_paging_out) ? mfn : _mfn(INVALID_MFN);
> }
> mfn = _mfn(l1e_get_pfn(*l1e));
> *t = p2m_flags_to_type(l1e_get_flags(*l1e));
> @@ -914,7 +941,7 @@ static void p2m_change_type_global(struc
> flags = l1e_get_flags(l1e[i1]);
> if ( p2m_flags_to_type(flags) != ot )
> continue;
> - mfn = l1e_get_pfn(l1e[i1]);
> + mfn = unclip_mfn(l1e_get_pfn(l1e[i1]));
> gfn = i1 + (i2 + (i3
> #if CONFIG_PAGING_LEVELS>= 4
> + (i4 * L3_PAGETABLE_ENTRIES)
> @@ -923,7 +950,7 @@ static void p2m_change_type_global(struc
> * L2_PAGETABLE_ENTRIES) * L1_PAGETABLE_ENTRIES;
> /* create a new 1le entry with the new type */
> flags = p2m_type_to_flags(nt, _mfn(mfn));
> - l1e_content = l1e_from_pfn(mfn, flags);
> + l1e_content = l1e_from_pfn(clipped_mfn(mfn), flags);
> p2m->write_p2m_entry(p2m, gfn,&l1e[i1],
> l1mfn, l1e_content, 1);
> }
> @@ -1073,7 +1100,7 @@ long p2m_pt_audit_p2m(struct p2m_domain
> entry_count++;
> continue;
> }
> - mfn = l1e_get_pfn(l1e[i1]);
> + mfn = unclip_mfn(l1e_get_pfn(l1e[i1]));
> ASSERT(mfn_valid(_mfn(mfn)));
> m2pfn = get_gpfn_from_mfn(mfn);
> if ( m2pfn != gfn&&
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>
next prev parent reply other threads:[~2012-02-15 12:09 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-14 19:05 RFC: AMD support for paging Andres Lagar-Cavilla
2012-02-14 19:22 ` Olaf Hering
2012-02-14 19:35 ` Andres Lagar-Cavilla
2012-02-14 21:26 ` Olaf Hering
2012-02-14 21:33 ` Andres Lagar-Cavilla
2012-02-14 21:43 ` Olaf Hering
2012-02-15 9:18 ` Olaf Hering
2012-02-15 14:59 ` Andres Lagar-Cavilla
2012-02-15 12:09 ` Wei Wang [this message]
2012-02-15 15:14 ` Andres Lagar-Cavilla
2012-02-15 15:33 ` Wei Wang
2012-02-15 16:06 ` Andres Lagar-Cavilla
2012-02-15 17:02 ` Tim Deegan
2012-02-15 17:09 ` Andres Lagar-Cavilla
2012-02-15 17:34 ` Tim Deegan
2012-02-16 15:47 ` Wei Wang
2012-02-16 10:17 ` Wei Wang
2012-02-16 14:36 ` Andres Lagar-Cavilla
2012-02-16 2:31 ` Hongkaixing
2012-02-16 5:19 ` Andres Lagar-Cavilla
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4F3BA067.4010303@amd.com \
--to=wei.wang2@amd.com \
--cc=JBeulich@suse.com \
--cc=adin@gridcentric.ca \
--cc=andres@lagarcavilla.org \
--cc=keir.xen@gmail.com \
--cc=olaf@aepfle.de \
--cc=tim@xen.org \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).