From: Dave McCracken <dcm@mccr.org>
To: Keir Fraser <Keir.Fraser@eu.citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>,
Xen Developers List <xen-devel@lists.xensource.com>
Subject: [PATCH] Implement faster superpage mapping
Date: Thu, 13 May 2010 14:40:22 -0500 [thread overview]
Message-ID: <201005131440.22479.dcm@mccr.org> (raw)
[-- Attachment #1: Type: Text/Plain, Size: 407 bytes --]
Here's my first cut of a faster superpage mapping scheme. It uses a separate
superpage table to track mappings of superpages and mappings that conflict with
using a superpage.
One new feature of this code is that it requires that every superpage be
allocated to a domain with a single call. This ensures that every page in the
superpage is allocated to the same domain.
Dave McCracken
Oracle Corp.
[-- Attachment #2: xen-unstable-newspage-1.patch --]
[-- Type: text/x-patch, Size: 17949 bytes --]
--- xen-unstable//xen/common/page_alloc.c 2010-04-19 09:23:24.000000000 -0500
+++ xen-sdev//xen/common/page_alloc.c 2010-05-13 13:02:49.000000000 -0500
@@ -1083,6 +1083,50 @@ void init_domheap_pages(paddr_t ps, padd
init_heap_pages(mfn_to_page(smfn), emfn - smfn);
}
+static void enable_superpage(
+ struct page_info *pg,
+ unsigned int order)
+{
+ struct spage_info *spage;
+ int i;
+
+ spage = page_to_spage(pg);
+ if (order < SUPERPAGE_ORDER)
+ {
+ test_and_clear_bit(_SGT_enabled, &spage->type_info);
+ return;
+ }
+ if (order == SUPERPAGE_ORDER)
+ {
+ test_and_set_bit(_SGT_enabled, &spage->type_info);
+ return;
+ }
+ order -= SUPERPAGE_ORDER;
+ for(i = 0; i < (1 << order); i++)
+ test_and_set_bit(_SGT_enabled, &spage[i].type_info);
+}
+
+static void disable_superpage(
+ struct page_info *pg,
+ unsigned int order)
+{
+ struct spage_info *spage;
+ int i;
+
+ spage = page_to_spage(pg);
+ test_and_clear_bit(_SGT_enabled, &spage->type_info);
+
+ if (order > SUPERPAGE_ORDER)
+ {
+ order -= SUPERPAGE_ORDER;
+ for(i = 1; i < (1 << order); i++)
+ {
+ BUG_ON((spage[i].type_info & SGT_count_mask) != 0);
+ test_and_clear_bit(_SGT_enabled, &spage[i].type_info);
+ }
+ }
+
+}
int assign_pages(
struct domain *d,
@@ -1128,6 +1172,9 @@ int assign_pages(
page_list_add_tail(&pg[i], &d->page_list);
}
+ if (opt_allow_superpage)
+ enable_superpage(pg, order);
+
spin_unlock(&d->page_alloc_lock);
return 0;
@@ -1201,6 +1248,9 @@ void free_domheap_pages(struct page_info
page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list);
}
+ if (opt_allow_superpage)
+ disable_superpage(pg, order);
+
d->tot_pages -= 1 << order;
drop_dom_ref = (d->tot_pages == 0);
--- xen-unstable//xen/include/asm-x86/mm.h 2010-04-28 09:31:26.000000000 -0500
+++ xen-sdev//xen/include/asm-x86/mm.h 2010-05-05 09:33:49.000000000 -0500
@@ -214,6 +214,33 @@ struct page_info
#define PGC_count_width PG_shift(9)
#define PGC_count_mask ((1UL<<PGC_count_width)-1)
+struct spage_info
+{
+ unsigned long count_info;
+ unsigned long type_info;
+};
+
+ /* The following page types are MUTUALLY EXCLUSIVE. */
+#define SGT_none PG_mask(0, 2) /* superpage not in use */
+#define SGT_superpage PG_mask(1, 2) /* mapped as a superpage */
+#define SGT_nosuper PG_mask(2, 2) /* has mappings that conflict with superpage */
+#define SGT_type_mask PG_mask(3, 2) /* Bits 30-31 or 62-63. */
+
+/* Enabled is set when the entire superpage is allocated as a block to a domain */
+#define _SGT_enabled PG_shift(3)
+#define SGT_enabled PG_mask(1, 3)
+
+ /* Count of uses of this superpage as its current type. */
+#define SGT_count_width PG_shift(3)
+#define SGT_count_mask ((1UL<<SGT_count_width)-1)
+
+static inline int spage_conflicts(unsigned long type)
+{
+ if (type && (type != PGT_writable_page))
+ return 1;
+ return 0;
+}
+
#if defined(__i386__)
#define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page))
#define is_xen_heap_mfn(mfn) ({ \
@@ -262,12 +289,15 @@ extern void share_xen_page_with_privileg
struct page_info *page, int readonly);
#define frame_table ((struct page_info *)FRAMETABLE_VIRT_START)
+#define spage_table ((struct spage_info *)SPAGETABLE_VIRT_START)
extern unsigned long max_page;
extern unsigned long total_pages;
void init_frametable(void);
#define PDX_GROUP_COUNT ((1 << L2_PAGETABLE_SHIFT) / \
(sizeof(*frame_table) & -sizeof(*frame_table)))
+#define SDX_GROUP_COUNT ((1 << L2_PAGETABLE_SHIFT) / \
+ (sizeof(*spage_table) & -sizeof(*spage_table)))
extern unsigned long pdx_group_valid[];
/* Convert between Xen-heap virtual addresses and page-info structures. */
@@ -370,7 +400,7 @@ pae_copy_root(struct vcpu *v, l3_pgentry
int check_descriptor(const struct domain *, struct desc_struct *d);
-extern int opt_allow_hugepage;
+extern int opt_allow_superpage;
extern int mem_hotplug;
/******************************************************************************
--- xen-unstable//xen/include/asm-x86/guest_pt.h 2010-04-19 09:23:24.000000000 -0500
+++ xen-sdev//xen/include/asm-x86/guest_pt.h 2010-05-13 13:05:12.000000000 -0500
@@ -186,10 +186,11 @@ guest_supports_superpages(struct vcpu *v
/* The _PAGE_PSE bit must be honoured in HVM guests, whenever
* CR4.PSE is set or the guest is in PAE or long mode.
* It's also used in the dummy PT for vcpus with CR4.PG cleared. */
- return (is_hvm_vcpu(v) &&
+ return (opt_allow_superpage ||
+ (is_hvm_vcpu(v) &&
(GUEST_PAGING_LEVELS != 2
|| !hvm_paging_enabled(v)
- || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));
+ || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE))));
}
static inline int
--- xen-unstable//xen/include/asm-x86/x86_32/page.h 2009-10-07 15:43:52.000000000 -0500
+++ xen-sdev//xen/include/asm-x86/x86_32/page.h 2010-05-04 08:14:07.000000000 -0500
@@ -6,6 +6,7 @@
#define L2_PAGETABLE_SHIFT 21
#define L3_PAGETABLE_SHIFT 30
#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT
#define ROOT_PAGETABLE_SHIFT L3_PAGETABLE_SHIFT
#define PAGETABLE_ORDER 9
@@ -13,6 +14,7 @@
#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
#define L3_PAGETABLE_ENTRIES 4
#define ROOT_PAGETABLE_ENTRIES L3_PAGETABLE_ENTRIES
+#define SUPERPAGE_ORDER PAGETABLE_ORDER
/*
* Architecturally, physical addresses may be up to 52 bits. However, the
@@ -53,6 +55,9 @@
#define virt_to_pdx(va) virt_to_mfn(va)
#define pdx_to_virt(pdx) mfn_to_virt(pdx)
+#define pfn_to_sdx(pfn) ((pfn)>>(SUPERPAGE_SHIFT-PAGE_SHIFT))
+#define sdx_to_pfn(sdx) ((sdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT))
+
static inline unsigned long __virt_to_maddr(unsigned long va)
{
ASSERT(va >= DIRECTMAP_VIRT_START && va < DIRECTMAP_VIRT_END);
--- xen-unstable//xen/include/asm-x86/config.h 2010-04-06 07:44:56.000000000 -0500
+++ xen-sdev//xen/include/asm-x86/config.h 2010-05-03 09:57:00.000000000 -0500
@@ -225,6 +225,11 @@ extern unsigned int video_mode, video_fl
/* Slot 261: xen text, static data and bss (1GB). */
#define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END)
#define XEN_VIRT_END (XEN_VIRT_START + GB(1))
+/* Slot 261: superpage information array (40MB). */
+#define SPAGETABLE_VIRT_END FRAMETABLE_VIRT_START
+#define SPAGETABLE_SIZE ((DIRECTMAP_SIZE >> SUPERPAGE_SHIFT) * \
+ sizeof(struct spage_info))
+#define SPAGETABLE_VIRT_START (SPAGETABLE_VIRT_END - SPAGETABLE_SIZE)
/* Slot 261: page-frame information array (40GB). */
#define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START
#define FRAMETABLE_SIZE ((DIRECTMAP_SIZE >> PAGE_SHIFT) * \
--- xen-unstable//xen/include/asm-x86/page.h 2009-12-18 08:35:12.000000000 -0600
+++ xen-sdev//xen/include/asm-x86/page.h 2010-05-10 11:14:21.000000000 -0500
@@ -240,6 +240,13 @@ void copy_page_sse2(void *, const void *
#define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
#define __paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
+/* Convert between machine frame numbers and spage-info structures. */
+#define __mfn_to_spage(mfn) (spage_table + pfn_to_sdx(mfn))
+#define __spage_to_mfn(pg) sdx_to_pfn((unsigned long)((pg) - spage_table))
+
+/* Convert between page-info structures and spage-info structures. */
+#define page_to_spage(page) (spage_table+(((page)-frame_table)>>(SUPERPAGE_SHIFT-PAGE_SHIFT)))
+
/*
* We define non-underscored wrappers for above conversion functions. These are
* overridden in various source files while underscored versions remain intact.
@@ -251,6 +258,8 @@ void copy_page_sse2(void *, const void *
#define maddr_to_virt(ma) __maddr_to_virt((unsigned long)(ma))
#define mfn_to_page(mfn) __mfn_to_page(mfn)
#define page_to_mfn(pg) __page_to_mfn(pg)
+#define mfn_to_spage(mfn) __mfn_to_spage(mfn)
+#define spage_to_mfn(pg) __spage_to_mfn(pg)
#define maddr_to_page(ma) __maddr_to_page(ma)
#define page_to_maddr(pg) __page_to_maddr(pg)
#define virt_to_page(va) __virt_to_page(va)
--- xen-unstable//xen/include/asm-x86/x86_64/page.h 2009-10-07 15:43:52.000000000 -0500
+++ xen-sdev//xen/include/asm-x86/x86_64/page.h 2010-05-04 10:44:38.000000000 -0500
@@ -7,6 +7,7 @@
#define L3_PAGETABLE_SHIFT 30
#define L4_PAGETABLE_SHIFT 39
#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT
#define ROOT_PAGETABLE_SHIFT L4_PAGETABLE_SHIFT
#define PAGETABLE_ORDER 9
@@ -15,6 +16,7 @@
#define L3_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
#define L4_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
#define ROOT_PAGETABLE_ENTRIES L4_PAGETABLE_ENTRIES
+#define SUPERPAGE_ORDER PAGETABLE_ORDER
#define __PAGE_OFFSET DIRECTMAP_VIRT_START
#define __XEN_VIRT_START XEN_VIRT_START
@@ -41,6 +43,8 @@ extern void pfn_pdx_hole_setup(unsigned
#define page_to_pdx(pg) ((pg) - frame_table)
#define pdx_to_page(pdx) (frame_table + (pdx))
+#define spage_to_pdx(spg) ((spg>>(SUPERPAGE_SHIFT-PAGE_SHIFT)) - spage_table)
+#define pdx_to_spage(pdx) (spage_table + ((pdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT)))
/*
* Note: These are solely for the use by page_{get,set}_owner(), and
* therefore don't need to handle the XEN_VIRT_{START,END} range.
@@ -64,6 +68,16 @@ static inline unsigned long pdx_to_pfn(u
((pdx << pfn_pdx_hole_shift) & pfn_top_mask);
}
+static inline unsigned long pfn_to_sdx(unsigned long pfn)
+{
+ return pfn_to_pdx(pfn) >> (SUPERPAGE_SHIFT-PAGE_SHIFT);
+}
+
+static inline unsigned long sdx_to_pfn(unsigned long sdx)
+{
+ return pdx_to_pfn(sdx << (SUPERPAGE_SHIFT-PAGE_SHIFT));
+}
+
static inline unsigned long __virt_to_maddr(unsigned long va)
{
ASSERT(va >= XEN_VIRT_START);
--- xen-unstable//xen/arch/x86/mm.c 2010-04-28 09:31:26.000000000 -0500
+++ xen-sdev//xen/arch/x86/mm.c 2010-05-13 13:07:57.000000000 -0500
@@ -151,8 +151,15 @@ unsigned long __read_mostly pdx_group_va
#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
-int opt_allow_hugepage;
+int opt_allow_superpage;
+static int opt_allow_hugepage;
boolean_param("allowhugepage", opt_allow_hugepage);
+boolean_param("allowsuperpage", opt_allow_superpage);
+
+static void get_spage(struct spage_info *spage);
+static void put_spage(struct spage_info *spage);
+static int get_spage_type(struct spage_info *spage, unsigned long type);
+static void put_spage_type(struct spage_info *spage);
#define l1_disallow_mask(d) \
((d != dom_io) && \
@@ -202,6 +209,28 @@ static void __init init_frametable_chunk
memset(end, -1, s - (unsigned long)end);
}
+static void __init init_spagetable(void)
+{
+ unsigned long s, start = SPAGETABLE_VIRT_START;
+ unsigned long end = SPAGETABLE_VIRT_END;
+ unsigned long step, mfn;
+ unsigned int max_entries;
+
+ step = 1UL << PAGETABLE_ORDER;
+ max_entries = (max_pdx + ((1UL<<SUPERPAGE_ORDER)-1)) >> SUPERPAGE_ORDER;
+ end = start + (((max_entries * sizeof(*spage_table)) +
+ ((1UL<<SUPERPAGE_SHIFT)-1)) & (~((1UL<<SUPERPAGE_SHIFT)-1)));
+
+ for (s = start; s < end; s += step << PAGE_SHIFT)
+ {
+ mfn = alloc_boot_pages(step, step);
+ if ( !mfn )
+ panic("Not enough memory for spage table");
+ map_pages_to_xen(s, mfn, step, PAGE_HYPERVISOR);
+ }
+ memset((void *)start, 0, end - start);
+}
+
void __init init_frametable(void)
{
unsigned int sidx, eidx, nidx;
@@ -212,6 +241,9 @@ void __init init_frametable(void)
#endif
BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1));
+ if (opt_allow_hugepage)
+ opt_allow_superpage = 1;
+
for ( sidx = 0; ; sidx = nidx )
{
eidx = find_next_zero_bit(pdx_group_valid, max_idx, sidx);
@@ -232,6 +264,8 @@ void __init init_frametable(void)
(unsigned long)pdx_to_page(max_idx * PDX_GROUP_COUNT) -
(unsigned long)pdx_to_page(max_pdx));
}
+ if (opt_allow_superpage)
+ init_spagetable();
}
void __init arch_init_memory(void)
@@ -652,19 +686,6 @@ static int get_page_and_type_from_pagenr
return rc;
}
-static int get_data_page(
- struct page_info *page, struct domain *d, int writeable)
-{
- int rc;
-
- if ( writeable )
- rc = get_page_and_type(page, d, PGT_writable_page);
- else
- rc = get_page(page, d);
-
- return rc;
-}
-
static void put_data_page(
struct page_info *page, int writeable)
{
@@ -870,6 +891,7 @@ static int
get_page_from_l2e(
l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
{
+ struct spage_info *spage;
unsigned long mfn = l2e_get_pfn(l2e);
int rc;
@@ -886,31 +908,41 @@ get_page_from_l2e(
{
rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, 0, 0);
if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
- rc = 0;
+ return 0;
+
+ return rc;
}
- else if ( !opt_allow_hugepage || (mfn & (L1_PAGETABLE_ENTRIES-1)) )
+ if ( !opt_allow_superpage )
{
- rc = -EINVAL;
+ MEM_LOG("Attempt to map superpage without allowsuperpage flag in hypervisor");
+ return -EINVAL;
}
- else
+ if ( mfn & (L1_PAGETABLE_ENTRIES-1) )
{
- unsigned long m = mfn;
- int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW);
-
- do {
- if ( !mfn_valid(m) ||
- !get_data_page(mfn_to_page(m), d, writeable) )
- {
- while ( m-- > mfn )
- put_data_page(mfn_to_page(m), writeable);
- return -EINVAL;
- }
- } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+ MEM_LOG("Unaligned superpage map attempt mfn %lx", mfn);
+ return -EINVAL;
+ }
+ spage = mfn_to_spage(mfn);
- rc = 1;
+ if (!(spage->type_info & SGT_enabled))
+ {
+ MEM_LOG("Map attempt on non-contiguous superpage, mfn %lx", mfn);
+ return -EINVAL;
}
- return rc;
+ get_spage(spage);
+
+ if (l2e_get_flags(l2e) & _PAGE_RW)
+ {
+ if (!get_spage_type(spage, SGT_superpage))
+ {
+ put_spage(spage);
+ MEM_LOG("Superpage in use as page table mfn %lx, type %lx",
+ mfn, spage->type_info);
+ return -EINVAL;
+ }
+ }
+ return 0;
}
@@ -1101,13 +1133,11 @@ static int put_page_from_l2e(l2_pgentry_
if ( l2e_get_flags(l2e) & _PAGE_PSE )
{
- unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
- int writeable = l2e_get_flags(l2e) & _PAGE_RW;
+ struct spage_info *spage = mfn_to_spage(l2e_get_pfn(l2e));
- ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
- do {
- put_data_page(mfn_to_page(m), writeable);
- } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+ put_spage(spage);
+ if (l2e_get_flags(l2e) & _PAGE_RW)
+ put_spage_type(spage);
}
else
{
@@ -2038,6 +2068,75 @@ int get_page(struct page_info *page, str
return 0;
}
+static void get_spage(struct spage_info *spage)
+{
+ unsigned long x, y = spage->count_info;
+
+ do {
+ x = y;
+ }
+ while ( (y = cmpxchg(&spage->count_info, x, x + 1)) != x );
+
+ return;
+}
+
+static void put_spage(struct spage_info *spage)
+{
+ unsigned long x, y = spage->count_info;
+
+ do {
+ ASSERT(y != 0);
+ x = y;
+ }
+ while ( (y = cmpxchg(&spage->count_info, x, x - 1)) != x );
+
+ return;
+}
+
+static int get_spage_type(struct spage_info *spage, unsigned long type)
+{
+ unsigned long x, nx, y = spage->type_info;
+
+ do {
+ x = y;
+ nx = x + 1;
+ if ( unlikely((nx & SGT_count_mask) == 0) )
+ {
+ MEM_LOG("Superpage type count overflow on pfn %lx", spage_to_mfn(spage));
+ return 0;
+ }
+ if ( unlikely((x & SGT_count_mask) == 0) )
+ {
+ nx = (nx & ~SGT_type_mask) | type;
+ }
+ else
+ {
+ if ( unlikely((x & SGT_type_mask) != type) )
+ return 0;
+ }
+ }
+ while ( (y = cmpxchg(&spage->type_info, x, nx)) != x );
+ return 1;
+}
+
+static void put_spage_type(struct spage_info *spage)
+{
+ unsigned long x, nx, y = spage->type_info;
+
+ do {
+ x = y;
+ nx = x - 1;
+ if ((x & SGT_count_mask) == 0)
+ {
+ return;
+ }
+ if ((nx & SGT_count_mask) == 0)
+ nx = (nx & ~SGT_type_mask) | SGT_none;
+ }
+ while ( (y = cmpxchg(&spage->type_info, x, nx)) != x );
+ return;
+}
+
/*
* Special version of get_page() to be used exclusively when
* - a page is known to already have a non-zero reference count
@@ -2279,6 +2378,10 @@ static int __put_page_type(struct page_i
return -EINTR;
}
+ if (opt_allow_superpage && spage_conflicts(x & PGT_type_mask))
+ {
+ put_spage_type(page_to_spage(page));
+ }
return rc;
}
@@ -2413,6 +2516,15 @@ static int __get_page_type(struct page_i
rc = alloc_page_type(page, type, preemptible);
}
+ if (opt_allow_superpage && spage_conflicts(type))
+ {
+ if (!get_spage_type(page_to_spage(page), SGT_nosuper))
+ {
+ __put_page_type(page, 0);
+ return -EINVAL;
+ }
+ }
+
if ( (x & PGT_partial) && !(nx & PGT_partial) )
put_page(page);
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
next reply other threads:[~2010-05-13 19:40 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-13 19:40 Dave McCracken [this message]
2010-05-14 7:25 ` [PATCH] Implement faster superpage mapping Keir Fraser
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201005131440.22479.dcm@mccr.org \
--to=dcm@mccr.org \
--cc=Keir.Fraser@eu.citrix.com \
--cc=jeremy@goop.org \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).