xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Dave McCracken <dcm@mccr.org>
To: Keir Fraser <Keir.Fraser@eu.citrix.com>,
	Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Xen Developers List <xen-devel@lists.xensource.com>
Subject: [PATCH] Add hypercall to mark superpages to improve performance
Date: Wed, 28 Apr 2010 09:33:20 -0500	[thread overview]
Message-ID: <201004280933.20527.dcm@mccr.org> (raw)

[-- Attachment #1: Type: Text/Plain, Size: 645 bytes --]


The current method of mapping hugepages/superpages in the hypervisor involves 
updating the reference counts of every page in the superpage.  This has proved 
to be a significant performance bottleneck.

This patch adds a pair of MMUEXT hypercalls to mark and unmark a superpage.  
Once the superpage is marked, the type is locked to writable page until a 
companion unmark is done.  When that superpage is subsequently mapped, only 
the first page needs to be reference counted.

There are checks when the superpage is marked and unmarked to make sure no 
individual page mappings have skewed the reference counts.

Dave McCracken
Oracle Corp

[-- Attachment #2: xen-unstable-fhpage-1.patch --]
[-- Type: text/x-patch, Size: 8138 bytes --]

--- xen-unstable//xen/include/public/xen.h	2009-12-18 08:35:12.000000000 -0600
+++ xen-fhpage//xen/include/public/xen.h	2010-04-28 09:32:08.000000000 -0500
@@ -250,6 +250,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
  * cmd: MMUEXT_COPY_PAGE
  * mfn: Machine frame number of the destination page.
  * src_mfn: Machine frame number of the source page.
+ *
+ * cmd: MMUEXT_MARK_SUPER
+ * mfn: Machine frame number of head of superpage to be marked.
+ *
+ * cmd: MMUEXT_UNMARK_SUPER
+ * mfn: Machine frame number of head of superpage to be cleared.
  */
 #define MMUEXT_PIN_L1_TABLE      0
 #define MMUEXT_PIN_L2_TABLE      1
@@ -268,13 +274,15 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #define MMUEXT_NEW_USER_BASEPTR 15
 #define MMUEXT_CLEAR_PAGE       16
 #define MMUEXT_COPY_PAGE        17
+#define MMUEXT_MARK_SUPER       18
+#define MMUEXT_UNMARK_SUPER     19
 
 #ifndef __ASSEMBLY__
 struct mmuext_op {
     unsigned int cmd;
     union {
         /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
-         * CLEAR_PAGE, COPY_PAGE */
+         * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */
         xen_pfn_t     mfn;
         /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
         unsigned long linear_addr;
--- xen-unstable//xen/include/asm-x86/mm.h	2010-04-28 09:31:26.000000000 -0500
+++ xen-fhpage//xen/include/asm-x86/mm.h	2010-04-28 09:32:08.000000000 -0500
@@ -182,9 +182,12 @@ struct page_info
  /* Page is locked? */
 #define _PGT_locked       PG_shift(9)
 #define PGT_locked        PG_mask(1, 9)
+ /* Page is part of a superpage? */
+#define _PGT_super        PG_shift(10)
+#define PGT_super         PG_mask(1, 10)
 
  /* Count of uses of this frame as its current type. */
-#define PGT_count_width   PG_shift(9)
+#define PGT_count_width   PG_shift(10)
 #define PGT_count_mask    ((1UL<<PGT_count_width)-1)
 
  /* Cleared when the owning guest 'frees' this page. */
--- xen-unstable//xen/arch/x86/domain.c	2010-04-28 09:31:26.000000000 -0500
+++ xen-fhpage//xen/arch/x86/domain.c	2010-04-28 09:32:08.000000000 -0500
@@ -1748,6 +1748,17 @@ static int relinquish_memory(
             BUG();
         }
 
+        if ( test_and_clear_bit(_PGT_super, &page->u.inuse.type_info) )
+        {
+            unsigned long mfn = page_to_mfn(page);
+            int i;
+
+            for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++)
+            {
+                put_page_and_type(mfn_to_page(mfn));
+            }
+        }
+
         if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
             put_page(page);
 
--- xen-unstable//xen/arch/x86/mm.c	2010-04-28 09:31:26.000000000 -0500
+++ xen-fhpage//xen/arch/x86/mm.c	2010-04-28 09:32:08.000000000 -0500
@@ -894,20 +894,30 @@ get_page_from_l2e(
     }
     else
     {
-        unsigned long m = mfn;
+        struct page_info *page = mfn_to_page(mfn);
         int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW);
   
-        do {
-            if ( !mfn_valid(m) ||
-                 !get_data_page(mfn_to_page(m), d, writeable) )
-            {
-                while ( m-- > mfn )
-                    put_data_page(mfn_to_page(m), writeable);
-                return -EINVAL;
-            }
-        } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
-
-        rc = 1;
+        if ( likely(test_bit(_PGT_super, &page->u.inuse.type_info)) )
+        {
+            rc = get_data_page(page, d, writeable);
+            if ( unlikely(!rc) )
+                rc = -EINVAL;
+        }
+        else
+        {
+            unsigned long m = mfn;
+  
+            do {
+                if ( !mfn_valid(m) ||
+                     !get_data_page(mfn_to_page(m), d, writeable) )
+                {
+                    while ( m-- > mfn )
+                        put_data_page(mfn_to_page(m), writeable);
+                    return -EINVAL;
+                }
+            } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+            rc = 1;
+        }
     }
 
     return rc;
@@ -1101,13 +1111,24 @@ static int put_page_from_l2e(l2_pgentry_
 
     if ( l2e_get_flags(l2e) & _PAGE_PSE )
     {
-        unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
+        unsigned long mfn = l2e_get_pfn(l2e);
+        struct page_info *page = mfn_to_page(mfn);
         int writeable = l2e_get_flags(l2e) & _PAGE_RW;
 
         ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
-        do {
-            put_data_page(mfn_to_page(m), writeable);
-        } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+
+        if ( likely(test_bit(_PGT_super, &page->u.inuse.type_info)) )
+        {
+            put_data_page(page, writeable);
+        }
+        else
+        {
+            unsigned long m = mfn;
+
+            do {
+                put_data_page(mfn_to_page(m), writeable);
+            } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+        }
     }
     else
     {
@@ -2981,6 +3002,93 @@ int do_mmuext_op(
             break;
         }
 
+        case MMUEXT_MARK_SUPER:
+        {
+            unsigned long mfn;
+            struct page_info *page, *p;
+            unsigned long count;
+            int i;
+
+            mfn = op.arg1.mfn;
+            if (mfn & (L1_PAGETABLE_ENTRIES-1))
+            {
+                MEM_LOG("Unaligned superpage reference mfn %lx", mfn);
+                okay = 0;
+                break;
+            }
+            page = mfn_to_page(mfn);
+            if (unlikely(test_and_set_bit(_PGT_super, &page->u.inuse.type_info)) )
+            {
+                MEM_LOG("Super flag already set on mfn %lx", mfn);
+                okay = 0;
+                break;
+            }
+            count = page->u.inuse.type_info & PGT_count_mask;
+            for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++)
+            {
+                p = mfn_to_page(mfn);
+                if ((p->u.inuse.type_info & PGT_count_mask) != count)
+                {
+                    MEM_LOG("Mismatched page count, index %d, expected count %d, found %d",
+                            i, count, p->u.inuse.type_info & PGT_count_mask);
+                    okay = 0;
+                } else
+                {
+                    okay = !get_page_and_type(p, d, PGT_writable_page);
+                }
+                if (!okay)
+                {
+                    MEM_LOG("Mismatched type setting super flag");
+                    while (--i >= 0)
+                        put_page_and_type(mfn_to_page(--mfn));
+                    test_and_clear_bit(_PGT_super, &page->u.inuse.type_info);
+                    break;
+                }
+            }
+            break;
+        }
+
+        case MMUEXT_UNMARK_SUPER:
+        {
+            unsigned long mfn;
+            struct page_info *page, *p;
+            unsigned long count;
+            int i;
+
+            mfn = op.arg1.mfn;
+            if (mfn & (L1_PAGETABLE_ENTRIES-1))
+            {
+                MEM_LOG("Unaligned superpage reference mfn %lx", mfn);
+                okay = 0;
+                break;
+            }
+            page = mfn_to_page(mfn);
+            if (unlikely(!test_and_clear_bit(_PGT_super, &page->u.inuse.type_info)) )
+            {
+                MEM_LOG("Super flag already clear on mfn %lx", mfn);
+                okay = 0;
+                break;
+            }
+            count = page->u.inuse.type_info & PGT_count_mask;
+            for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++)
+            {
+                p = mfn_to_page(mfn);
+                if ((p->u.inuse.type_info & PGT_count_mask) != count)
+                {
+                    MEM_LOG("Superpage still in use.  Can not clear flag");
+                    okay = 0;
+                    while (--i >= 0)
+                    {
+                        get_page_and_type(mfn_to_page(--mfn), d, PGT_writable_page);
+                    }
+                    test_and_set_bit(_PGT_super, &page->u.inuse.type_info);
+                    break;
+                }
+                put_page_and_type(p);
+            }
+            break;
+        }
+
         default:
             MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
             rc = -ENOSYS;

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

             reply	other threads:[~2010-04-28 14:33 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-28 14:33 Dave McCracken [this message]
2010-04-28  6:58 ` [PATCH] Add hypercall to mark superpages to improve performance Keir Fraser
2010-04-30 19:43   ` Dave McCracken
2010-04-30 21:30     ` Keir Fraser
2010-04-30 22:10       ` Keir Fraser
2010-04-30 21:34     ` Keir Fraser
2010-04-30 21:43       ` Dave McCracken
2010-04-30 22:03         ` Keir Fraser
2010-05-02 21:34           ` Dave McCracken
2010-05-02 23:54             ` Keir Fraser
2010-05-03  0:03               ` Keir Fraser
2010-05-03  1:55                 ` Dave McCracken
2010-05-03 16:09                   ` Keir Fraser
2010-05-03 16:29                     ` Keir Fraser

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201004280933.20527.dcm@mccr.org \
    --to=dcm@mccr.org \
    --cc=Keir.Fraser@eu.citrix.com \
    --cc=jeremy@goop.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).