From: Dave McCracken <dcm@mccr.org>
To: Keir Fraser <Keir.Fraser@eu.citrix.com>,
Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Xen Developers List <xen-devel@lists.xensource.com>
Subject: [PATCH] Add hypercall to mark superpages to improve performance
Date: Wed, 28 Apr 2010 09:33:20 -0500 [thread overview]
Message-ID: <201004280933.20527.dcm@mccr.org> (raw)
[-- Attachment #1: Type: Text/Plain, Size: 645 bytes --]
The current method of mapping hugepages/superpages in the hypervisor involves
updating the reference counts of every page in the superpage. This has proved
to be a significant performance bottleneck.
This patch adds a pair of MMUEXT hypercalls to mark and unmark a superpage.
Once the superpage is marked, the type is locked to writable page until a
companion unmark is done. When that superpage is subsequently mapped, only
the first page needs to be reference counted.
There are checks when the superpage is marked and unmarked to make sure no
individual page mappings have skewed the reference counts.
Dave McCracken
Oracle Corp
[-- Attachment #2: xen-unstable-fhpage-1.patch --]
[-- Type: text/x-patch, Size: 8138 bytes --]
--- xen-unstable//xen/include/public/xen.h 2009-12-18 08:35:12.000000000 -0600
+++ xen-fhpage//xen/include/public/xen.h 2010-04-28 09:32:08.000000000 -0500
@@ -250,6 +250,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
* cmd: MMUEXT_COPY_PAGE
* mfn: Machine frame number of the destination page.
* src_mfn: Machine frame number of the source page.
+ *
+ * cmd: MMUEXT_MARK_SUPER
+ * mfn: Machine frame number of head of superpage to be marked.
+ *
+ * cmd: MMUEXT_UNMARK_SUPER
+ * mfn: Machine frame number of head of superpage to be cleared.
*/
#define MMUEXT_PIN_L1_TABLE 0
#define MMUEXT_PIN_L2_TABLE 1
@@ -268,13 +274,15 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
#define MMUEXT_NEW_USER_BASEPTR 15
#define MMUEXT_CLEAR_PAGE 16
#define MMUEXT_COPY_PAGE 17
+#define MMUEXT_MARK_SUPER 18
+#define MMUEXT_UNMARK_SUPER 19
#ifndef __ASSEMBLY__
struct mmuext_op {
unsigned int cmd;
union {
/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
- * CLEAR_PAGE, COPY_PAGE */
+ * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */
xen_pfn_t mfn;
/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
unsigned long linear_addr;
--- xen-unstable//xen/include/asm-x86/mm.h 2010-04-28 09:31:26.000000000 -0500
+++ xen-fhpage//xen/include/asm-x86/mm.h 2010-04-28 09:32:08.000000000 -0500
@@ -182,9 +182,12 @@ struct page_info
/* Page is locked? */
#define _PGT_locked PG_shift(9)
#define PGT_locked PG_mask(1, 9)
+ /* Page is part of a superpage? */
+#define _PGT_super PG_shift(10)
+#define PGT_super PG_mask(1, 10)
/* Count of uses of this frame as its current type. */
-#define PGT_count_width PG_shift(9)
+#define PGT_count_width PG_shift(10)
#define PGT_count_mask ((1UL<<PGT_count_width)-1)
/* Cleared when the owning guest 'frees' this page. */
--- xen-unstable//xen/arch/x86/domain.c 2010-04-28 09:31:26.000000000 -0500
+++ xen-fhpage//xen/arch/x86/domain.c 2010-04-28 09:32:08.000000000 -0500
@@ -1748,6 +1748,17 @@ static int relinquish_memory(
BUG();
}
+ if ( test_and_clear_bit(_PGT_super, &page->u.inuse.type_info) )
+ {
+ unsigned long mfn = page_to_mfn(page);
+ int i;
+
+ for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++)
+ {
+ put_page_and_type(mfn_to_page(mfn));
+ }
+ }
+
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
--- xen-unstable//xen/arch/x86/mm.c 2010-04-28 09:31:26.000000000 -0500
+++ xen-fhpage//xen/arch/x86/mm.c 2010-04-28 09:32:08.000000000 -0500
@@ -894,20 +894,30 @@ get_page_from_l2e(
}
else
{
- unsigned long m = mfn;
+ struct page_info *page = mfn_to_page(mfn);
int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW);
- do {
- if ( !mfn_valid(m) ||
- !get_data_page(mfn_to_page(m), d, writeable) )
- {
- while ( m-- > mfn )
- put_data_page(mfn_to_page(m), writeable);
- return -EINVAL;
- }
- } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
-
- rc = 1;
+ if ( likely(test_bit(_PGT_super, &page->u.inuse.type_info)) )
+ {
+ rc = get_data_page(page, d, writeable);
+ if ( unlikely(!rc) )
+ rc = -EINVAL;
+ }
+ else
+ {
+ unsigned long m = mfn;
+
+ do {
+ if ( !mfn_valid(m) ||
+ !get_data_page(mfn_to_page(m), d, writeable) )
+ {
+ while ( m-- > mfn )
+ put_data_page(mfn_to_page(m), writeable);
+ return -EINVAL;
+ }
+ } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+ rc = 1;
+ }
}
return rc;
@@ -1101,13 +1111,24 @@ static int put_page_from_l2e(l2_pgentry_
if ( l2e_get_flags(l2e) & _PAGE_PSE )
{
- unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
+ unsigned long mfn = l2e_get_pfn(l2e);
+ struct page_info *page = mfn_to_page(mfn);
int writeable = l2e_get_flags(l2e) & _PAGE_RW;
ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
- do {
- put_data_page(mfn_to_page(m), writeable);
- } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+
+ if ( likely(test_bit(_PGT_super, &page->u.inuse.type_info)) )
+ {
+ put_data_page(page, writeable);
+ }
+ else
+ {
+ unsigned long m = mfn;
+
+ do {
+ put_data_page(mfn_to_page(m), writeable);
+ } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+ }
}
else
{
@@ -2981,6 +3002,93 @@ int do_mmuext_op(
break;
}
+ case MMUEXT_MARK_SUPER:
+ {
+ unsigned long mfn;
+ struct page_info *page, *p;
+ unsigned long count;
+ int i;
+
+ mfn = op.arg1.mfn;
+ if (mfn & (L1_PAGETABLE_ENTRIES-1))
+ {
+ MEM_LOG("Unaligned superpage reference mfn %lx", mfn);
+ okay = 0;
+ break;
+ }
+ page = mfn_to_page(mfn);
+ if (unlikely(test_and_set_bit(_PGT_super, &page->u.inuse.type_info)) )
+ {
+ MEM_LOG("Super flag already set on mfn %lx", mfn);
+ okay = 0;
+ break;
+ }
+ count = page->u.inuse.type_info & PGT_count_mask;
+ for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++)
+ {
+ p = mfn_to_page(mfn);
+ if ((p->u.inuse.type_info & PGT_count_mask) != count)
+ {
+ MEM_LOG("Mismatched page count, index %d, expected count %d, found %d",
+ i, count, p->u.inuse.type_info & PGT_count_mask);
+ okay = 0;
+ } else
+ {
+ okay = !get_page_and_type(p, d, PGT_writable_page);
+ }
+ if (!okay)
+ {
+ MEM_LOG("Mismatched type setting super flag");
+ while (--i >= 0)
+ put_page_and_type(mfn_to_page(--mfn));
+ test_and_clear_bit(_PGT_super, &page->u.inuse.type_info);
+ break;
+ }
+ }
+ break;
+ }
+
+ case MMUEXT_UNMARK_SUPER:
+ {
+ unsigned long mfn;
+ struct page_info *page, *p;
+ unsigned long count;
+ int i;
+
+ mfn = op.arg1.mfn;
+ if (mfn & (L1_PAGETABLE_ENTRIES-1))
+ {
+ MEM_LOG("Unaligned superpage reference mfn %lx", mfn);
+ okay = 0;
+ break;
+ }
+ page = mfn_to_page(mfn);
+ if (unlikely(!test_and_clear_bit(_PGT_super, &page->u.inuse.type_info)) )
+ {
+ MEM_LOG("Super flag already clear on mfn %lx", mfn);
+ okay = 0;
+ break;
+ }
+ count = page->u.inuse.type_info & PGT_count_mask;
+ for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++)
+ {
+ p = mfn_to_page(mfn);
+ if ((p->u.inuse.type_info & PGT_count_mask) != count)
+ {
+ MEM_LOG("Superpage still in use. Can not clear flag");
+ okay = 0;
+ while (--i >= 0)
+ {
+ get_page_and_type(mfn_to_page(--mfn), d, PGT_writable_page);
+ }
+ test_and_set_bit(_PGT_super, &page->u.inuse.type_info);
+ break;
+ }
+ put_page_and_type(p);
+ }
+ break;
+ }
+
default:
MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
rc = -ENOSYS;
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
next reply other threads:[~2010-04-28 14:33 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-04-28 14:33 Dave McCracken [this message]
2010-04-28 6:58 ` [PATCH] Add hypercall to mark superpages to improve performance Keir Fraser
2010-04-30 19:43 ` Dave McCracken
2010-04-30 21:30 ` Keir Fraser
2010-04-30 22:10 ` Keir Fraser
2010-04-30 21:34 ` Keir Fraser
2010-04-30 21:43 ` Dave McCracken
2010-04-30 22:03 ` Keir Fraser
2010-05-02 21:34 ` Dave McCracken
2010-05-02 23:54 ` Keir Fraser
2010-05-03 0:03 ` Keir Fraser
2010-05-03 1:55 ` Dave McCracken
2010-05-03 16:09 ` Keir Fraser
2010-05-03 16:29 ` Keir Fraser
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201004280933.20527.dcm@mccr.org \
--to=dcm@mccr.org \
--cc=Keir.Fraser@eu.citrix.com \
--cc=jeremy@goop.org \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).