[PATCH][3/4] Enable 1GB for Xen HVM host page

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Wei Huang <wei.huang2@amd.com>
To: "'xen-devel@lists.xensource.com'" <xen-devel@lists.xensource.com>,
	Keir Fraser <keir.fraser@eu.citrix.com>,
	"Xu, Dongxiao" <dongxiao.xu@intel.com>
Subject: [PATCH][3/4] Enable 1GB for Xen HVM host page
Date: Mon, 22 Feb 2010 11:18:48 -0600	[thread overview]
Message-ID: <4B82BC78.7080907@amd.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 151 bytes --]

This patch changes P2M code to works with 1GB page now.

Signed-off-by: Wei Huang <wei.huang2@amd.com>
Acked-by: Dongxiao Xu <dongxiao.xu@intel.com>



[-- Attachment #2: 3-xen-hap-fix-p2m.patch --]
[-- Type: text/x-patch, Size: 11909 bytes --]

# HG changeset patch
# User huangwei@huangwei.amd.com
# Date 1266853453 21600
# Node ID 72075d4fc39e8cd11a06bff4eb66521ab8fe952b
# Parent  c9b45664b423e11003358944bb8e6e976e735301
fix p2m code to support 1GB pages

diff -r c9b45664b423 -r 72075d4fc39e xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Mon Feb 22 09:44:09 2010 -0600
+++ b/xen/arch/x86/mm/p2m.c	Mon Feb 22 09:44:13 2010 -0600
@@ -187,7 +187,36 @@
 
     ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
 
-    /* split single large page into 4KB page in P2M table */
+    /* split 1GB pages into 2MB pages */
+    if ( type == PGT_l2_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+    {
+        unsigned long flags, pfn;
+        struct page_info *pg = d->arch.p2m->alloc_page(d);
+        if ( pg == NULL )
+            return 0;
+        page_list_add_tail(pg, &d->arch.p2m->pages);
+        pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated;
+        pg->count_info = 1;
+        
+        flags = l1e_get_flags(*p2m_entry);
+        pfn = l1e_get_pfn(*p2m_entry);
+        
+        l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
+        for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+        {
+            new_entry = l1e_from_pfn(pfn + (i * L1_PAGETABLE_ENTRIES), flags);
+            paging_write_p2m_entry(d, gfn, l1_entry+i, *table_mfn, new_entry,
+                                   2);
+        }
+        unmap_domain_page(l1_entry);
+        new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
+                                 __PAGE_HYPERVISOR|_PAGE_USER); //disable PSE
+        paging_write_p2m_entry(d, gfn,
+                               p2m_entry, *table_mfn, new_entry, 3);
+    }
+
+
+    /* split single 2MB large page into 4KB page in P2M table */
     if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
     {
         unsigned long flags, pfn;
@@ -1064,6 +1093,19 @@
     if ( unlikely(d->is_dying) )
         goto out_fail;
 
+    /* Because PoD does not have cache list for 1GB pages, it has to remap
+     * 1GB region to 2MB chunks for a retry. */
+    if ( order == 18 )
+    {
+        gfn_aligned = (gfn >> order) << order;
+        for( i = 0; i < (1 << order); i += (1 << 9) )
+            set_p2m_entry(d, gfn_aligned + i, _mfn(POPULATE_ON_DEMAND_MFN), 9,
+                          p2m_populate_on_demand);
+        audit_p2m(d);
+        p2m_unlock(p2md);
+        return 0;
+    }
+
     /* If we're low, start a sweep */
     if ( order == 9 && page_list_empty(&p2md->pod.super) )
         p2m_pod_emergency_sweep_super(d);
@@ -1196,6 +1238,7 @@
     l1_pgentry_t *p2m_entry;
     l1_pgentry_t entry_content;
     l2_pgentry_t l2e_content;
+    l3_pgentry_t l3e_content;
     int rv=0;
 
     if ( tb_init_done )
@@ -1222,18 +1265,44 @@
         goto out;
 #endif
     /*
+     * Try to allocate 1GB page table if this feature is supported.
+     *
      * When using PAE Xen, we only allow 33 bits of pseudo-physical
      * address in translated guests (i.e. 8 GBytes).  This restriction
      * comes from wanting to map the P2M table into the 16MB RO_MPT hole
      * in Xen's address space for translated PV guests.
      * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
      */
-    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
-                         L3_PAGETABLE_SHIFT - PAGE_SHIFT,
-                         ((CONFIG_PAGING_LEVELS == 3)
-                          ? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
-                          : L3_PAGETABLE_ENTRIES),
-                         PGT_l2_page_table) )
+    if ( page_order == 18 )
+    {
+        p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+                                   L3_PAGETABLE_SHIFT - PAGE_SHIFT,
+                                   L3_PAGETABLE_ENTRIES);
+        ASSERT(p2m_entry);
+        if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
+             !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+        {
+            P2M_ERROR("configure P2M table L3 entry with large page\n");
+            domain_crash(d);
+            goto out;
+        }
+        
+        if ( mfn_valid(mfn) )
+            l3e_content = l3e_from_pfn(mfn_x(mfn),
+                                       p2m_type_to_flags(p2mt) | _PAGE_PSE);
+        else
+            l3e_content = l3e_empty();
+        
+        entry_content.l1 = l3e_content.l3;
+        paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 3);
+
+    }
+    else if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                              L3_PAGETABLE_SHIFT - PAGE_SHIFT,
+                              ((CONFIG_PAGING_LEVELS == 3)
+                               ? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
+                               : L3_PAGETABLE_ENTRIES),
+                              PGT_l2_page_table) )
         goto out;
 
     if ( page_order == 0 )
@@ -1255,7 +1324,7 @@
         /* level 1 entry */
         paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
     }
-    else 
+    else if ( page_order == 9 )
     {
         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                    L2_PAGETABLE_SHIFT - PAGE_SHIFT,
@@ -1352,11 +1421,34 @@
 #else
         l3e += l3_table_offset(addr);
 #endif
+pod_retry_l3:
         if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
         {
+            if ( p2m_flags_to_type(l3e_get_flags(*l3e)) == p2m_populate_on_demand )
+            {
+                if ( q != p2m_query )
+                {
+                    if ( !p2m_pod_demand_populate(d, gfn, 18, q) )
+                        goto pod_retry_l3;
+                }
+                else
+                    *t = p2m_populate_on_demand;
+            }
             unmap_domain_page(l3e);
             return _mfn(INVALID_MFN);
         }
+        else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) )
+        {
+            mfn = _mfn(l3e_get_pfn(*l3e) +
+                       l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
+                       l1_table_offset(addr));
+            *t = p2m_flags_to_type(l3e_get_flags(*l3e));
+            unmap_domain_page(l3e);
+
+            ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+            return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+        }
+
         mfn = _mfn(l3e_get_pfn(*l3e));
         unmap_domain_page(l3e);
     }
@@ -1437,10 +1529,57 @@
     {
         l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
         l2_pgentry_t l2e = l2e_empty();
+        l3_pgentry_t l3e = l3e_empty();
         int ret;
 
         ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
                / sizeof(l1_pgentry_t));
+
+        /*
+         * Read & process L3
+         */
+        p2m_entry = (l1_pgentry_t *)
+            &__linear_l2_table[l2_linear_offset(RO_MPT_VIRT_START)
+                               + l3_linear_offset(addr)];
+    pod_retry_l3:
+        ret = __copy_from_user(&l3e, p2m_entry, sizeof(l3e));
+
+        if ( ret != 0 || !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+        {
+            if ( (l3e_get_flags(l3e) & _PAGE_PSE) &&
+                 (p2m_flags_to_type(l3e_get_flags(l3e)) == p2m_populate_on_demand) )
+            {
+                /* The read has succeeded, so we know that mapping exists */
+                if ( q != p2m_query )
+                {
+                    if ( !p2m_pod_demand_populate(current->domain, gfn, 18, q) )
+                        goto pod_retry_l3;
+                    p2mt = p2m_invalid;
+                    printk("%s: Allocate 1GB failed!\n", __func__);
+                    goto out;
+                }
+                else
+                {
+                    p2mt = p2m_populate_on_demand;
+                    goto out;
+                }
+            }
+            goto pod_retry_l2;
+        }
+
+        if ( l3e_get_flags(l3e) & _PAGE_PSE )
+        {
+            p2mt = p2m_flags_to_type(l3e_get_flags(l3e));
+            ASSERT(l3e_get_pfn(l3e) != INVALID_MFN || !p2m_is_ram(p2mt));
+            if (p2m_is_valid(p2mt) )
+                mfn = _mfn(l3e_get_pfn(l3e) + 
+                           l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + 
+                           l1_table_offset(addr));
+            else
+                p2mt = p2m_mmio_dm;
+            
+            goto out;
+        }
 
         /*
          * Read & process L2
@@ -1596,8 +1735,10 @@
     while ( todo )
     {
         if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled )
-            order = (((gfn | mfn_x(mfn) | todo) & (SUPERPAGE_PAGES - 1)) == 0) ?
-                9 : 0;
+            order = ( (((gfn | mfn_x(mfn) | todo) & ((1ul << 18) - 1)) == 0) ) ?
+                    18 :
+                    (((gfn | mfn_x(mfn) | todo) & ((1ul << 9) - 1)) == 0) ? 9 : 0;
+
         else
             order = 0;
         if ( !d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt) )
@@ -1867,6 +2008,31 @@
                     gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
                     continue;
                 }
+
+                /* check for 1GB super page */
+                if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE )
+                {
+                    mfn = l3e_get_pfn(l3e[i3]);
+                    ASSERT(mfn_valid(_mfn(mfn)));
+                    /* we have to cover 512x512 4K pages */
+                    for ( i2 = 0; 
+                          i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES);
+                          i2++)
+                    {
+                        m2pfn = get_gpfn_from_mfn(mfn+i2);
+                        if ( m2pfn != (gfn + i2) )
+                        {
+                            pmbad++;
+                            P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+                                       " -> gfn %#lx\n", gfn+i2, mfn+i2,
+                                       m2pfn);
+                            BUG();
+                        }
+                        gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
+                        continue;
+                    }
+                }
+
                 l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
                 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
                 {
@@ -2224,7 +2390,7 @@
     l1_pgentry_t l1e_content;
     l1_pgentry_t *l1e;
     l2_pgentry_t *l2e;
-    mfn_t l1mfn, l2mfn;
+    mfn_t l1mfn, l2mfn, l3mfn;
     unsigned long i1, i2, i3;
     l3_pgentry_t *l3e;
 #if CONFIG_PAGING_LEVELS == 4
@@ -2245,6 +2411,7 @@
 #if CONFIG_PAGING_LEVELS == 4
     l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
 #else /* CONFIG_PAGING_LEVELS == 3 */
+    l3mfn = _mfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
     l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
 #endif
 
@@ -2255,6 +2422,7 @@
         {
             continue;
         }
+        l3mfn = _mfn(l4e_get_pfn(l4e[i4]));
         l3e = map_domain_page(l4e_get_pfn(l4e[i4]));
 #endif
         for ( i3 = 0;
@@ -2265,6 +2433,20 @@
             {
                 continue;
             }
+            if ( (l3e_get_flags(l3e[i3]) & _PAGE_PSE) )
+            {
+                flags = l3e_get_flags(l3e[i3]);
+                if ( p2m_flags_to_type(flags) != ot )
+                    continue;
+                mfn = l3e_get_pfn(l3e[i3]);
+                gfn = get_gpfn_from_mfn(mfn);
+                flags = p2m_type_to_flags(nt);
+                l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
+                paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l3e[i3],
+                                       l3mfn, l1e_content, 3);
+                continue;
+            }
+
             l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
             l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
             for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

next             reply	other threads:[~2010-02-22 17:18 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-02-22 17:18 Wei Huang [this message]
2010-02-23 10:07 ` [PATCH][3/4] Enable 1GB for Xen HVM host page Tim Deegan
2010-02-23 16:37   ` Huang2, Wei
2010-02-24  9:13     ` Tim Deegan
2010-02-25 10:30       ` George Dunlap

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B82BC78.7080907@amd.com \
    --to=wei.huang2@amd.com \
    --cc=dongxiao.xu@intel.com \
    --cc=keir.fraser@eu.citrix.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.