[PATCH][2/3] 1GB Page Table Support for HVM Guest

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Wei Huang <wei.huang2@amd.com>
To: Keir Fraser <keir.fraser@eu.citrix.com>,
	"Xu, Dongxiao" <dongxiao.xu@intel.com>,
	"'xen-devel@lists.xensource.com'" <xen-devel@lists.xensource.com>,
	Tim Deegan <Tim.Deegan@citrix.com>
Subject: [PATCH][2/3] 1GB Page Table Support for HVM Guest
Date: Fri, 2 Apr 2010 10:34:59 -0500	[thread overview]
Message-ID: <4BB60EA3.2000601@amd.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 195 bytes --]

This patch changes P2M code to works with 1GB page now.

Signed-off-by: Wei Huang <wei.huang2@amd.com>
Acked-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Tim Deegan <tim.deegan@citrix.com>


[-- Attachment #2: 2-Xen-hap-1gb-host-page-p2m.patch --]
[-- Type: text/x-patch, Size: 12501 bytes --]

# HG changeset patch
# User root@weisles1164.amd.com
# Date 1270220646 18000
# Node ID c2375d0545b72f1ba83072c502b6c437fbdc9770
# Parent  c94398a70118dbb68141019956ce1221d3d2ce81
change p2m.c

diff -r c94398a70118 -r c2375d0545b7 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Fri Apr 02 10:03:41 2010 -0500
+++ b/xen/arch/x86/mm/p2m.c	Fri Apr 02 10:04:06 2010 -0500
@@ -187,7 +187,36 @@
 
     ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
 
-    /* split single large page into 4KB page in P2M table */
+    /* split 1GB pages into 2MB pages */
+    if ( type == PGT_l2_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+    {
+        unsigned long flags, pfn;
+        struct page_info *pg = d->arch.p2m->alloc_page(d);
+        if ( pg == NULL )
+            return 0;
+        page_list_add_tail(pg, &d->arch.p2m->pages);
+        pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated;
+        pg->count_info = 1;
+        
+        flags = l1e_get_flags(*p2m_entry);
+        pfn = l1e_get_pfn(*p2m_entry);
+        
+        l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
+        for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+        {
+            new_entry = l1e_from_pfn(pfn + (i * L1_PAGETABLE_ENTRIES), flags);
+            paging_write_p2m_entry(d, gfn, l1_entry+i, *table_mfn, new_entry,
+                                   2);
+        }
+        unmap_domain_page(l1_entry);
+        new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
+                                 __PAGE_HYPERVISOR|_PAGE_USER); //disable PSE
+        paging_write_p2m_entry(d, gfn,
+                               p2m_entry, *table_mfn, new_entry, 3);
+    }
+
+
+    /* split single 2MB large page into 4KB page in P2M table */
     if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
     {
         unsigned long flags, pfn;
@@ -1064,6 +1093,23 @@
     if ( unlikely(d->is_dying) )
         goto out_fail;
 
+    /* Because PoD does not have cache list for 1GB pages, it has to remap
+     * 1GB region to 2MB chunks for a retry. */
+    if ( order == 18 )
+    {
+        gfn_aligned = (gfn >> order) << order;
+        /* Note that we are supposed to call set_p2m_entry() 512 times to 
+         * split 1GB into 512 2MB pages here. But We only do once here because
+         * set_p2m_entry() should automatically shatter the 1GB page into 
+         * 512 2MB pages. The rest of 511 calls are unnecessary.
+         */
+        set_p2m_entry(d, gfn_aligned, _mfn(POPULATE_ON_DEMAND_MFN), 9,
+                      p2m_populate_on_demand);
+        audit_p2m(d);
+        p2m_unlock(p2md);
+        return 0;
+    }
+
     /* If we're low, start a sweep */
     if ( order == 9 && page_list_empty(&p2md->pod.super) )
         p2m_pod_emergency_sweep_super(d);
@@ -1196,6 +1242,7 @@
     l1_pgentry_t *p2m_entry;
     l1_pgentry_t entry_content;
     l2_pgentry_t l2e_content;
+    l3_pgentry_t l3e_content;
     int rv=0;
 
     if ( tb_init_done )
@@ -1222,18 +1269,45 @@
         goto out;
 #endif
     /*
+     * Try to allocate 1GB page table if this feature is supported.
+     */
+    if ( page_order == 18 )
+    {
+        p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+                                   L3_PAGETABLE_SHIFT - PAGE_SHIFT,
+                                   L3_PAGETABLE_ENTRIES);
+        ASSERT(p2m_entry);
+        if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
+             !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+        {
+            P2M_ERROR("configure P2M table L3 entry with large page\n");
+            domain_crash(d);
+            goto out;
+        }
+        
+        if ( mfn_valid(mfn) )
+            l3e_content = l3e_from_pfn(mfn_x(mfn),
+                                       p2m_type_to_flags(p2mt) | _PAGE_PSE);
+        else
+            l3e_content = l3e_empty();
+        
+        entry_content.l1 = l3e_content.l3;
+        paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 3);
+
+    }
+    /*
      * When using PAE Xen, we only allow 33 bits of pseudo-physical
      * address in translated guests (i.e. 8 GBytes).  This restriction
      * comes from wanting to map the P2M table into the 16MB RO_MPT hole
      * in Xen's address space for translated PV guests.
      * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
      */
-    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
-                         L3_PAGETABLE_SHIFT - PAGE_SHIFT,
-                         ((CONFIG_PAGING_LEVELS == 3)
-                          ? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
-                          : L3_PAGETABLE_ENTRIES),
-                         PGT_l2_page_table) )
+    else if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                              L3_PAGETABLE_SHIFT - PAGE_SHIFT,
+                              ((CONFIG_PAGING_LEVELS == 3)
+                               ? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
+                               : L3_PAGETABLE_ENTRIES),
+                              PGT_l2_page_table) )
         goto out;
 
     if ( page_order == 0 )
@@ -1255,7 +1329,7 @@
         /* level 1 entry */
         paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
     }
-    else 
+    else if ( page_order == 9 )
     {
         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                    L2_PAGETABLE_SHIFT - PAGE_SHIFT,
@@ -1352,11 +1426,34 @@
 #else
         l3e += l3_table_offset(addr);
 #endif
+pod_retry_l3:
         if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
         {
+            if ( p2m_flags_to_type(l3e_get_flags(*l3e)) == p2m_populate_on_demand )
+            {
+                if ( q != p2m_query )
+                {
+                    if ( !p2m_pod_demand_populate(d, gfn, 18, q) )
+                        goto pod_retry_l3;
+                }
+                else
+                    *t = p2m_populate_on_demand;
+            }
             unmap_domain_page(l3e);
             return _mfn(INVALID_MFN);
         }
+        else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) )
+        {
+            mfn = _mfn(l3e_get_pfn(*l3e) +
+                       l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
+                       l1_table_offset(addr));
+            *t = p2m_flags_to_type(l3e_get_flags(*l3e));
+            unmap_domain_page(l3e);
+
+            ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+            return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+        }
+
         mfn = _mfn(l3e_get_pfn(*l3e));
         unmap_domain_page(l3e);
     }
@@ -1437,10 +1534,57 @@
     {
         l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
         l2_pgentry_t l2e = l2e_empty();
+        l3_pgentry_t l3e = l3e_empty();
         int ret;
 
         ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
                / sizeof(l1_pgentry_t));
+
+        /*
+         * Read & process L3
+         */
+        p2m_entry = (l1_pgentry_t *)
+            &__linear_l2_table[l2_linear_offset(RO_MPT_VIRT_START)
+                               + l3_linear_offset(addr)];
+    pod_retry_l3:
+        ret = __copy_from_user(&l3e, p2m_entry, sizeof(l3e));
+
+        if ( ret != 0 || !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+        {
+            if ( (l3e_get_flags(l3e) & _PAGE_PSE) &&
+                 (p2m_flags_to_type(l3e_get_flags(l3e)) == p2m_populate_on_demand) )
+            {
+                /* The read has succeeded, so we know that mapping exists */
+                if ( q != p2m_query )
+                {
+                    if ( !p2m_pod_demand_populate(current->domain, gfn, 18, q) )
+                        goto pod_retry_l3;
+                    p2mt = p2m_invalid;
+                    printk("%s: Allocate 1GB failed!\n", __func__);
+                    goto out;
+                }
+                else
+                {
+                    p2mt = p2m_populate_on_demand;
+                    goto out;
+                }
+            }
+            goto pod_retry_l2;
+        }
+
+        if ( l3e_get_flags(l3e) & _PAGE_PSE )
+        {
+            p2mt = p2m_flags_to_type(l3e_get_flags(l3e));
+            ASSERT(l3e_get_pfn(l3e) != INVALID_MFN || !p2m_is_ram(p2mt));
+            if (p2m_is_valid(p2mt) )
+                mfn = _mfn(l3e_get_pfn(l3e) + 
+                           l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + 
+                           l1_table_offset(addr));
+            else
+                p2mt = p2m_mmio_dm;
+            
+            goto out;
+        }
 
         /*
          * Read & process L2
@@ -1596,10 +1740,19 @@
     while ( todo )
     {
         if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled )
-            order = (((gfn | mfn_x(mfn) | todo) & (SUPERPAGE_PAGES - 1)) == 0) ?
-                9 : 0;
+            order = ( (((gfn | mfn_x(mfn) | todo) & ((1ul << 18) - 1)) == 0) ) ?
+                    18 :
+                    (((gfn | mfn_x(mfn) | todo) & ((1ul << 9) - 1)) == 0) ? 9 : 0;
+
         else
             order = 0;
+        
+        /* Note that we only enable hap_1gb_pgtb when CONFIG_PAGING_LEVELS==4. 
+         * So 1GB should never be enabled under 32bit or PAE modes. But for
+         * safety's reason, we double-check the page order again..
+         */
+        BUG_ON(order == 18 && CONFIG_PAGING_LEVELS < 4);
+
         if ( !d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt) )
             rc = 0;
         gfn += 1ul << order;
@@ -1867,6 +2020,31 @@
                     gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
                     continue;
                 }
+
+                /* check for 1GB super page */
+                if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE )
+                {
+                    mfn = l3e_get_pfn(l3e[i3]);
+                    ASSERT(mfn_valid(_mfn(mfn)));
+                    /* we have to cover 512x512 4K pages */
+                    for ( i2 = 0; 
+                          i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES);
+                          i2++)
+                    {
+                        m2pfn = get_gpfn_from_mfn(mfn+i2);
+                        if ( m2pfn != (gfn + i2) )
+                        {
+                            pmbad++;
+                            P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+                                       " -> gfn %#lx\n", gfn+i2, mfn+i2,
+                                       m2pfn);
+                            BUG();
+                        }
+                        gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
+                        continue;
+                    }
+                }
+
                 l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
                 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
                 {
@@ -2224,7 +2402,7 @@
     l1_pgentry_t l1e_content;
     l1_pgentry_t *l1e;
     l2_pgentry_t *l2e;
-    mfn_t l1mfn, l2mfn;
+    mfn_t l1mfn, l2mfn, l3mfn;
     unsigned long i1, i2, i3;
     l3_pgentry_t *l3e;
 #if CONFIG_PAGING_LEVELS == 4
@@ -2245,6 +2423,7 @@
 #if CONFIG_PAGING_LEVELS == 4
     l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
 #else /* CONFIG_PAGING_LEVELS == 3 */
+    l3mfn = _mfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
     l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
 #endif
 
@@ -2255,6 +2434,7 @@
         {
             continue;
         }
+        l3mfn = _mfn(l4e_get_pfn(l4e[i4]));
         l3e = map_domain_page(l4e_get_pfn(l4e[i4]));
 #endif
         for ( i3 = 0;
@@ -2265,6 +2445,20 @@
             {
                 continue;
             }
+            if ( (l3e_get_flags(l3e[i3]) & _PAGE_PSE) )
+            {
+                flags = l3e_get_flags(l3e[i3]);
+                if ( p2m_flags_to_type(flags) != ot )
+                    continue;
+                mfn = l3e_get_pfn(l3e[i3]);
+                gfn = get_gpfn_from_mfn(mfn);
+                flags = p2m_type_to_flags(nt);
+                l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
+                paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l3e[i3],
+                                       l3mfn, l1e_content, 3);
+                continue;
+            }
+
             l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
             l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
             for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

next             reply	other threads:[~2010-04-02 15:34 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-02 15:34 Wei Huang [this message]
  -- strict thread matches above, loose matches on Subject: below --
2010-03-12 18:40 [PATCH][2/3] 1GB Page Table Support for HVM Guest Wei Huang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4BB60EA3.2000601@amd.com \
    --to=wei.huang2@amd.com \
    --cc=Tim.Deegan@citrix.com \
    --cc=dongxiao.xu@intel.com \
    --cc=keir.fraser@eu.citrix.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.