xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Add hypercall to mark superpages to improve performance
@ 2010-04-28 14:33 Dave McCracken
  2010-04-28  6:58 ` Keir Fraser
  0 siblings, 1 reply; 14+ messages in thread
From: Dave McCracken @ 2010-04-28 14:33 UTC (permalink / raw)
  To: Keir Fraser, Jeremy Fitzhardinge; +Cc: Xen Developers List

[-- Attachment #1: Type: Text/Plain, Size: 645 bytes --]


The current method of mapping hugepages/superpages in the hypervisor involves 
updating the reference counts of every page in the superpage.  This has proved 
to be a significant performance bottleneck.

This patch adds a pair of MMUEXT hypercalls to mark and unmark a superpage.  
Once the superpage is marked, the type is locked to writable page until a 
companion unmark is done.  When that superpage is subsequently mapped, only 
the first page needs to be reference counted.

There are checks when the superpage is marked and unmarked to make sure no 
individual page mappings have skewed the reference counts.

Dave McCracken
Oracle Corp

[-- Attachment #2: xen-unstable-fhpage-1.patch --]
[-- Type: text/x-patch, Size: 8138 bytes --]

--- xen-unstable//xen/include/public/xen.h	2009-12-18 08:35:12.000000000 -0600
+++ xen-fhpage//xen/include/public/xen.h	2010-04-28 09:32:08.000000000 -0500
@@ -250,6 +250,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
  * cmd: MMUEXT_COPY_PAGE
  * mfn: Machine frame number of the destination page.
  * src_mfn: Machine frame number of the source page.
+ *
+ * cmd: MMUEXT_MARK_SUPER
+ * mfn: Machine frame number of head of superpage to be marked.
+ *
+ * cmd: MMUEXT_UNMARK_SUPER
+ * mfn: Machine frame number of head of superpage to be cleared.
  */
 #define MMUEXT_PIN_L1_TABLE      0
 #define MMUEXT_PIN_L2_TABLE      1
@@ -268,13 +274,15 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #define MMUEXT_NEW_USER_BASEPTR 15
 #define MMUEXT_CLEAR_PAGE       16
 #define MMUEXT_COPY_PAGE        17
+#define MMUEXT_MARK_SUPER       18
+#define MMUEXT_UNMARK_SUPER     19
 
 #ifndef __ASSEMBLY__
 struct mmuext_op {
     unsigned int cmd;
     union {
         /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
-         * CLEAR_PAGE, COPY_PAGE */
+         * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */
         xen_pfn_t     mfn;
         /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
         unsigned long linear_addr;
--- xen-unstable//xen/include/asm-x86/mm.h	2010-04-28 09:31:26.000000000 -0500
+++ xen-fhpage//xen/include/asm-x86/mm.h	2010-04-28 09:32:08.000000000 -0500
@@ -182,9 +182,12 @@ struct page_info
  /* Page is locked? */
 #define _PGT_locked       PG_shift(9)
 #define PGT_locked        PG_mask(1, 9)
+ /* Page is part of a superpage? */
+#define _PGT_super        PG_shift(10)
+#define PGT_super         PG_mask(1, 10)
 
  /* Count of uses of this frame as its current type. */
-#define PGT_count_width   PG_shift(9)
+#define PGT_count_width   PG_shift(10)
 #define PGT_count_mask    ((1UL<<PGT_count_width)-1)
 
  /* Cleared when the owning guest 'frees' this page. */
--- xen-unstable//xen/arch/x86/domain.c	2010-04-28 09:31:26.000000000 -0500
+++ xen-fhpage//xen/arch/x86/domain.c	2010-04-28 09:32:08.000000000 -0500
@@ -1748,6 +1748,17 @@ static int relinquish_memory(
             BUG();
         }
 
+        if ( test_and_clear_bit(_PGT_super, &page->u.inuse.type_info) )
+        {
+            unsigned long mfn = page_to_mfn(page);
+            int i;
+
+            for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++)
+            {
+                put_page_and_type(mfn_to_page(mfn));
+            }
+        }
+
         if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
             put_page(page);
 
--- xen-unstable//xen/arch/x86/mm.c	2010-04-28 09:31:26.000000000 -0500
+++ xen-fhpage//xen/arch/x86/mm.c	2010-04-28 09:32:08.000000000 -0500
@@ -894,20 +894,30 @@ get_page_from_l2e(
     }
     else
     {
-        unsigned long m = mfn;
+        struct page_info *page = mfn_to_page(mfn);
         int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW);
   
-        do {
-            if ( !mfn_valid(m) ||
-                 !get_data_page(mfn_to_page(m), d, writeable) )
-            {
-                while ( m-- > mfn )
-                    put_data_page(mfn_to_page(m), writeable);
-                return -EINVAL;
-            }
-        } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
-
-        rc = 1;
+        if ( likely(test_bit(_PGT_super, &page->u.inuse.type_info)) )
+        {
+            rc = get_data_page(page, d, writeable);
+            if ( unlikely(!rc) )
+                rc = -EINVAL;
+        }
+        else
+        {
+            unsigned long m = mfn;
+  
+            do {
+                if ( !mfn_valid(m) ||
+                     !get_data_page(mfn_to_page(m), d, writeable) )
+                {
+                    while ( m-- > mfn )
+                        put_data_page(mfn_to_page(m), writeable);
+                    return -EINVAL;
+                }
+            } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+            rc = 1;
+        }
     }
 
     return rc;
@@ -1101,13 +1111,24 @@ static int put_page_from_l2e(l2_pgentry_
 
     if ( l2e_get_flags(l2e) & _PAGE_PSE )
     {
-        unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
+        unsigned long mfn = l2e_get_pfn(l2e);
+        struct page_info *page = mfn_to_page(mfn);
         int writeable = l2e_get_flags(l2e) & _PAGE_RW;
 
         ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
-        do {
-            put_data_page(mfn_to_page(m), writeable);
-        } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+
+        if ( likely(test_bit(_PGT_super, &page->u.inuse.type_info)) )
+        {
+            put_data_page(page, writeable);
+        }
+        else
+        {
+            unsigned long m = mfn;
+
+            do {
+                put_data_page(mfn_to_page(m), writeable);
+            } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+        }
     }
     else
     {
@@ -2981,6 +3002,93 @@ int do_mmuext_op(
             break;
         }
 
+        case MMUEXT_MARK_SUPER:
+        {
+            unsigned long mfn;
+            struct page_info *page, *p;
+            unsigned long count;
+            int i;
+
+            mfn = op.arg1.mfn;
+            if (mfn & (L1_PAGETABLE_ENTRIES-1))
+            {
+                MEM_LOG("Unaligned superpage reference mfn %lx", mfn);
+                okay = 0;
+                break;
+            }
+            page = mfn_to_page(mfn);
+            if (unlikely(test_and_set_bit(_PGT_super, &page->u.inuse.type_info)) )
+            {
+                MEM_LOG("Super flag already set on mfn %lx", mfn);
+                okay = 0;
+                break;
+            }
+            count = page->u.inuse.type_info & PGT_count_mask;
+            for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++)
+            {
+                p = mfn_to_page(mfn);
+                if ((p->u.inuse.type_info & PGT_count_mask) != count)
+                {
+                    MEM_LOG("Mismatched page count, index %d, expected count %d, found %d",
+                            i, count, p->u.inuse.type_info & PGT_count_mask);
+                    okay = 0;
+                } else
+                {
+                    okay = !get_page_and_type(p, d, PGT_writable_page);
+                }
+                if (!okay)
+                {
+                    MEM_LOG("Mismatched type setting super flag");
+                    while (--i >= 0)
+                        put_page_and_type(mfn_to_page(--mfn));
+                    test_and_clear_bit(_PGT_super, &page->u.inuse.type_info);
+                    break;
+                }
+            }
+            break;
+        }
+
+        case MMUEXT_UNMARK_SUPER:
+        {
+            unsigned long mfn;
+            struct page_info *page, *p;
+            unsigned long count;
+            int i;
+
+            mfn = op.arg1.mfn;
+            if (mfn & (L1_PAGETABLE_ENTRIES-1))
+            {
+                MEM_LOG("Unaligned superpage reference mfn %lx", mfn);
+                okay = 0;
+                break;
+            }
+            page = mfn_to_page(mfn);
+            if (unlikely(!test_and_clear_bit(_PGT_super, &page->u.inuse.type_info)) )
+            {
+                MEM_LOG("Super flag already clear on mfn %lx", mfn);
+                okay = 0;
+                break;
+            }
+            count = page->u.inuse.type_info & PGT_count_mask;
+            for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++)
+            {
+                p = mfn_to_page(mfn);
+                if ((p->u.inuse.type_info & PGT_count_mask) != count)
+                {
+                    MEM_LOG("Superpage still in use.  Can not clear flag");
+                    okay = 0;
+                    while (--i >= 0)
+                    {
+                        get_page_and_type(mfn_to_page(--mfn), d, PGT_writable_page);
+                    }
+                    test_and_set_bit(_PGT_super, &page->u.inuse.type_info);
+                    break;
+                }
+                put_page_and_type(p);
+            }
+            break;
+        }
+
         default:
             MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
             rc = -ENOSYS;

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2010-05-03 16:29 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-28 14:33 [PATCH] Add hypercall to mark superpages to improve performance Dave McCracken
2010-04-28  6:58 ` Keir Fraser
2010-04-30 19:43   ` Dave McCracken
2010-04-30 21:30     ` Keir Fraser
2010-04-30 22:10       ` Keir Fraser
2010-04-30 21:34     ` Keir Fraser
2010-04-30 21:43       ` Dave McCracken
2010-04-30 22:03         ` Keir Fraser
2010-05-02 21:34           ` Dave McCracken
2010-05-02 23:54             ` Keir Fraser
2010-05-03  0:03               ` Keir Fraser
2010-05-03  1:55                 ` Dave McCracken
2010-05-03 16:09                   ` Keir Fraser
2010-05-03 16:29                     ` Keir Fraser

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).