All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] Xen PV support for hugepages
@ 2008-10-10 13:29 dcm
  2008-10-10 13:30 ` [PATCH 2/2] Linux support for hugepages as a Xen PV guest dcm
  2008-10-10 15:28 ` [PATCH 1/2] Xen PV support for hugepages Jan Beulich
  0 siblings, 2 replies; 11+ messages in thread
From: dcm @ 2008-10-10 13:29 UTC (permalink / raw)
  To: Keir Fraser, xen-devel

This patch adds support to Xen for hugepages in a PV environment.  The patch
is against the latest xen-unstable tree on xenbits.xensource.com.

It must be enabled via the command-line option "allowhugepage".

It is assumed that the guest has guaranteed that the hugepage is physically
aligned and contiguous.

There is no support yet for save/restore/migrate.

Signed-off-by: Dave McCracken <dave.mccracken@oracle.com>

----


--- xen-unstable//./xen/include/asm-x86/x86_32/page.h	2008-07-17 09:49:27.000000000 -0500
+++ xen-hpage/./xen/include/asm-x86/x86_32/page.h	2008-10-02 15:07:34.000000000 -0500
@@ -112,7 +112,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
  * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
  * Permit the NX bit if the hardware supports it.
  */
-#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
+#define BASE_DISALLOW_MASK (0xFFFFF118U & ~_PAGE_NX)
 
 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
 #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
--- xen-unstable//./xen/include/asm-x86/x86_64/page.h	2008-10-02 14:23:17.000000000 -0500
+++ xen-hpage/./xen/include/asm-x86/x86_64/page.h	2008-10-02 15:07:34.000000000 -0500
@@ -112,7 +112,7 @@ typedef l4_pgentry_t root_pgentry_t;
  * Permit the NX bit if the hardware supports it.
  * Note that range [62:52] is available for software use on x86/64.
  */
-#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
+#define BASE_DISALLOW_MASK (0xFF800118U & ~_PAGE_NX)
 
 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
 #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
--- xen-unstable//./xen/arch/x86/mm.c	2008-10-02 14:23:17.000000000 -0500
+++ xen-hpage/./xen/arch/x86/mm.c	2008-10-09 09:07:47.000000000 -0500
@@ -160,6 +160,9 @@ unsigned long total_pages;
 
 #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
 
+static int opt_allow_hugepage = 0;
+boolean_param("allowhugepage", opt_allow_hugepage);
+
 #define l1_disallow_mask(d)                                     \
     ((d != dom_io) &&                                           \
      (rangeset_is_empty((d)->iomem_caps) &&                     \
@@ -584,6 +587,26 @@ static int get_page_and_type_from_pagenr
     return rc;
 }
 
+static int get_data_page(struct page_info *page, struct domain *d, int writeable)
+{
+    int rc;
+
+    if ( writeable )
+        rc = get_page_and_type(page, d, PGT_writable_page);
+    else
+        rc = get_page(page, d);
+
+    return rc;
+}
+
+static void put_data_page(struct page_info *page, int writeable)
+{
+    if ( writeable )
+        put_page_and_type(page);
+    else
+        put_page(page);
+}
+
 /*
  * We allow root tables to map each other (a.k.a. linear page tables). It
  * needs some special care with reference counts and access permissions:
@@ -656,6 +679,7 @@ get_page_from_l1e(
     struct vcpu *curr = current;
     struct domain *owner;
     int okay;
+    int writeable;
 
     if ( !(l1f & _PAGE_PRESENT) )
         return 1;
@@ -698,10 +722,9 @@ get_page_from_l1e(
      * contribute to writeable mapping refcounts.  (This allows the
      * qemu-dm helper process in dom0 to map the domain's memory without
      * messing up the count of "real" writable mappings.) */
-    okay = (((l1f & _PAGE_RW) && 
-             !(unlikely(paging_mode_external(d) && (d != curr->domain))))
-            ? get_page_and_type(page, d, PGT_writable_page)
-            : get_page(page, d));
+    writeable = (l1f & _PAGE_RW) &&
+        !( unlikely(paging_mode_external(d) && (d != curr->domain)) );
+    okay = get_data_page(page, d, writeable);
     if ( !okay )
     {
         MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
@@ -759,11 +782,43 @@ get_page_from_l2e(
         MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
         return -EINVAL;
     }
+    if ( l2e_get_flags(l2e) & _PAGE_PSE )
+    {
+        unsigned long mfn = l2e_get_pfn(l2e);
+        unsigned long m, me;
+        struct page_info *page = mfn_to_page(mfn);
+        int writeable;
 
-    rc = get_page_and_type_from_pagenr(
-        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
-    if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
-        rc = 0;
+        if ( !opt_allow_hugepage )
+            return -EINVAL;
+
+        writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+        rc = get_data_page(page, d, writeable);
+        if ( unlikely(!rc) )
+            return rc;
+
+        for ( m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++ )
+        {
+            rc = get_data_page(mfn_to_page(m), d, writeable);
+            if ( unlikely(!rc) )
+            {
+                for ( --m; m > mfn; --m )
+                    put_data_page(mfn_to_page(m), writeable);
+                put_data_page(page, writeable);
+                return 0;
+            }
+        }
+#ifdef __x86_64__
+        map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, L1_PAGETABLE_ENTRIES,
+                         PAGE_HYPERVISOR | l2e_get_flags(l2e));
+#endif
+    } else {
+        rc = get_page_and_type_from_pagenr(
+            l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
+        if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+            rc = 0;
+    }
 
     return rc;
 }
@@ -955,7 +1010,18 @@ static int put_page_from_l2e(l2_pgentry_
     if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && 
          (l2e_get_pfn(l2e) != pfn) )
     {
-        put_page_and_type(l2e_get_page(l2e));
+        if ( l2e_get_flags(l2e) & _PAGE_PSE )
+        {
+            unsigned long mfn = l2e_get_pfn(l2e);
+            unsigned long m, me;
+            struct page_info *page = mfn_to_page(mfn);
+            int writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+            for ( m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++ )
+                put_data_page(mfn_to_page(m), writeable);
+            put_data_page(page, writeable);
+        } else
+            put_page_and_type(l2e_get_page(l2e));
         return 0;
     }
     return 1;

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-10-15 22:31 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-10 13:29 [PATCH 1/2] Xen PV support for hugepages dcm
2008-10-10 13:30 ` [PATCH 2/2] Linux support for hugepages as a Xen PV guest dcm
2008-10-10 13:38   ` Keir Fraser
2008-10-10 15:06     ` Dave McCracken
2008-10-10 15:35       ` [PATCH 2/2] Linux support for hugepages as a Xen PVguest Jan Beulich
2008-10-15 17:01       ` Re: [PATCH 2/2] Linux support for hugepages as a Xen PV guest Dave McCracken
2008-10-15 21:48         ` Jeremy Fitzhardinge
2008-10-15 22:12           ` Dave McCracken
2008-10-15 22:31             ` Jeremy Fitzhardinge
2008-10-10 15:32   ` [PATCH 2/2] Linux support for hugepages as a Xen PVguest Jan Beulich
2008-10-10 15:28 ` [PATCH 1/2] Xen PV support for hugepages Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.