From: Dave McCracken <dcm@mccr.org>
To: Keir Fraser <Keir.Fraser@eu.citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>,
Xen Developers List <xen-devel@lists.xensource.com>
Subject: [PATCH] Implement faster superpage mapping
Date: Thu, 13 May 2010 14:40:22 -0500 [thread overview]
Message-ID: <201005131440.22479.dcm@mccr.org> (raw)
[-- Attachment #1: Type: Text/Plain, Size: 407 bytes --]
Here's my first cut of a faster superpage mapping scheme. It uses a separate
superpage table to track mappings of superpages and mappings that conflict with
using a superpage.
One new feature of this code is that it requires that every superpage be
allocated to a domain with a single call. This ensures that every page in the
superpage is allocated to the same domain.
Dave McCracken
Oracle Corp.
[-- Attachment #2: xen-unstable-newspage-1.patch --]
[-- Type: text/x-patch, Size: 17949 bytes --]
--- xen-unstable//xen/common/page_alloc.c 2010-04-19 09:23:24.000000000 -0500
+++ xen-sdev//xen/common/page_alloc.c 2010-05-13 13:02:49.000000000 -0500
@@ -1083,6 +1083,50 @@ void init_domheap_pages(paddr_t ps, padd
init_heap_pages(mfn_to_page(smfn), emfn - smfn);
}
+static void enable_superpage(
+ struct page_info *pg,
+ unsigned int order)
+{
+ struct spage_info *spage;
+ int i;
+
+ spage = page_to_spage(pg);
+ if (order < SUPERPAGE_ORDER)
+ {
+ test_and_clear_bit(_SGT_enabled, &spage->type_info);
+ return;
+ }
+ if (order == SUPERPAGE_ORDER)
+ {
+ test_and_set_bit(_SGT_enabled, &spage->type_info);
+ return;
+ }
+ order -= SUPERPAGE_ORDER;
+ for(i = 0; i < (1 << order); i++)
+ test_and_set_bit(_SGT_enabled, &spage[i].type_info);
+}
+
+static void disable_superpage(
+ struct page_info *pg,
+ unsigned int order)
+{
+ struct spage_info *spage;
+ int i;
+
+ spage = page_to_spage(pg);
+ test_and_clear_bit(_SGT_enabled, &spage->type_info);
+
+ if (order > SUPERPAGE_ORDER)
+ {
+ order -= SUPERPAGE_ORDER;
+ for(i = 1; i < (1 << order); i++)
+ {
+ BUG_ON((spage[i].type_info & SGT_count_mask) != 0);
+ test_and_clear_bit(_SGT_enabled, &spage[i].type_info);
+ }
+ }
+
+}
int assign_pages(
struct domain *d,
@@ -1128,6 +1172,9 @@ int assign_pages(
page_list_add_tail(&pg[i], &d->page_list);
}
+ if (opt_allow_superpage)
+ enable_superpage(pg, order);
+
spin_unlock(&d->page_alloc_lock);
return 0;
@@ -1201,6 +1248,9 @@ void free_domheap_pages(struct page_info
page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list);
}
+ if (opt_allow_superpage)
+ disable_superpage(pg, order);
+
d->tot_pages -= 1 << order;
drop_dom_ref = (d->tot_pages == 0);
--- xen-unstable//xen/include/asm-x86/mm.h 2010-04-28 09:31:26.000000000 -0500
+++ xen-sdev//xen/include/asm-x86/mm.h 2010-05-05 09:33:49.000000000 -0500
@@ -214,6 +214,33 @@ struct page_info
#define PGC_count_width PG_shift(9)
#define PGC_count_mask ((1UL<<PGC_count_width)-1)
+struct spage_info
+{
+ unsigned long count_info;
+ unsigned long type_info;
+};
+
+ /* The following page types are MUTUALLY EXCLUSIVE. */
+#define SGT_none PG_mask(0, 2) /* superpage not in use */
+#define SGT_superpage PG_mask(1, 2) /* mapped as a superpage */
+#define SGT_nosuper PG_mask(2, 2) /* has mappings that conflict with superpage */
+#define SGT_type_mask PG_mask(3, 2) /* Bits 30-31 or 62-63. */
+
+/* Enabled is set when the entire superpage is allocated as a block to a domain */
+#define _SGT_enabled PG_shift(3)
+#define SGT_enabled PG_mask(1, 3)
+
+ /* Count of uses of this superpage as its current type. */
+#define SGT_count_width PG_shift(3)
+#define SGT_count_mask ((1UL<<SGT_count_width)-1)
+
+static inline int spage_conflicts(unsigned long type)
+{
+ if (type && (type != PGT_writable_page))
+ return 1;
+ return 0;
+}
+
#if defined(__i386__)
#define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page))
#define is_xen_heap_mfn(mfn) ({ \
@@ -262,12 +289,15 @@ extern void share_xen_page_with_privileg
struct page_info *page, int readonly);
#define frame_table ((struct page_info *)FRAMETABLE_VIRT_START)
+#define spage_table ((struct spage_info *)SPAGETABLE_VIRT_START)
extern unsigned long max_page;
extern unsigned long total_pages;
void init_frametable(void);
#define PDX_GROUP_COUNT ((1 << L2_PAGETABLE_SHIFT) / \
(sizeof(*frame_table) & -sizeof(*frame_table)))
+#define SDX_GROUP_COUNT ((1 << L2_PAGETABLE_SHIFT) / \
+ (sizeof(*spage_table) & -sizeof(*spage_table)))
extern unsigned long pdx_group_valid[];
/* Convert between Xen-heap virtual addresses and page-info structures. */
@@ -370,7 +400,7 @@ pae_copy_root(struct vcpu *v, l3_pgentry
int check_descriptor(const struct domain *, struct desc_struct *d);
-extern int opt_allow_hugepage;
+extern int opt_allow_superpage;
extern int mem_hotplug;
/******************************************************************************
--- xen-unstable//xen/include/asm-x86/guest_pt.h 2010-04-19 09:23:24.000000000 -0500
+++ xen-sdev//xen/include/asm-x86/guest_pt.h 2010-05-13 13:05:12.000000000 -0500
@@ -186,10 +186,11 @@ guest_supports_superpages(struct vcpu *v
/* The _PAGE_PSE bit must be honoured in HVM guests, whenever
* CR4.PSE is set or the guest is in PAE or long mode.
* It's also used in the dummy PT for vcpus with CR4.PG cleared. */
- return (is_hvm_vcpu(v) &&
+ return (opt_allow_superpage ||
+ (is_hvm_vcpu(v) &&
(GUEST_PAGING_LEVELS != 2
|| !hvm_paging_enabled(v)
- || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));
+ || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE))));
}
static inline int
--- xen-unstable//xen/include/asm-x86/x86_32/page.h 2009-10-07 15:43:52.000000000 -0500
+++ xen-sdev//xen/include/asm-x86/x86_32/page.h 2010-05-04 08:14:07.000000000 -0500
@@ -6,6 +6,7 @@
#define L2_PAGETABLE_SHIFT 21
#define L3_PAGETABLE_SHIFT 30
#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT
#define ROOT_PAGETABLE_SHIFT L3_PAGETABLE_SHIFT
#define PAGETABLE_ORDER 9
@@ -13,6 +14,7 @@
#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
#define L3_PAGETABLE_ENTRIES 4
#define ROOT_PAGETABLE_ENTRIES L3_PAGETABLE_ENTRIES
+#define SUPERPAGE_ORDER PAGETABLE_ORDER
/*
* Architecturally, physical addresses may be up to 52 bits. However, the
@@ -53,6 +55,9 @@
#define virt_to_pdx(va) virt_to_mfn(va)
#define pdx_to_virt(pdx) mfn_to_virt(pdx)
+#define pfn_to_sdx(pfn) ((pfn)>>(SUPERPAGE_SHIFT-PAGE_SHIFT))
+#define sdx_to_pfn(sdx) ((sdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT))
+
static inline unsigned long __virt_to_maddr(unsigned long va)
{
ASSERT(va >= DIRECTMAP_VIRT_START && va < DIRECTMAP_VIRT_END);
--- xen-unstable//xen/include/asm-x86/config.h 2010-04-06 07:44:56.000000000 -0500
+++ xen-sdev//xen/include/asm-x86/config.h 2010-05-03 09:57:00.000000000 -0500
@@ -225,6 +225,11 @@ extern unsigned int video_mode, video_fl
/* Slot 261: xen text, static data and bss (1GB). */
#define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END)
#define XEN_VIRT_END (XEN_VIRT_START + GB(1))
+/* Slot 261: superpage information array (40MB). */
+#define SPAGETABLE_VIRT_END FRAMETABLE_VIRT_START
+#define SPAGETABLE_SIZE ((DIRECTMAP_SIZE >> SUPERPAGE_SHIFT) * \
+ sizeof(struct spage_info))
+#define SPAGETABLE_VIRT_START (SPAGETABLE_VIRT_END - SPAGETABLE_SIZE)
/* Slot 261: page-frame information array (40GB). */
#define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START
#define FRAMETABLE_SIZE ((DIRECTMAP_SIZE >> PAGE_SHIFT) * \
--- xen-unstable//xen/include/asm-x86/page.h 2009-12-18 08:35:12.000000000 -0600
+++ xen-sdev//xen/include/asm-x86/page.h 2010-05-10 11:14:21.000000000 -0500
@@ -240,6 +240,13 @@ void copy_page_sse2(void *, const void *
#define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
#define __paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
+/* Convert between machine frame numbers and spage-info structures. */
+#define __mfn_to_spage(mfn) (spage_table + pfn_to_sdx(mfn))
+#define __spage_to_mfn(pg) sdx_to_pfn((unsigned long)((pg) - spage_table))
+
+/* Convert between page-info structures and spage-info structures. */
+#define page_to_spage(page) (spage_table+(((page)-frame_table)>>(SUPERPAGE_SHIFT-PAGE_SHIFT)))
+
/*
* We define non-underscored wrappers for above conversion functions. These are
* overridden in various source files while underscored versions remain intact.
@@ -251,6 +258,8 @@ void copy_page_sse2(void *, const void *
#define maddr_to_virt(ma) __maddr_to_virt((unsigned long)(ma))
#define mfn_to_page(mfn) __mfn_to_page(mfn)
#define page_to_mfn(pg) __page_to_mfn(pg)
+#define mfn_to_spage(mfn) __mfn_to_spage(mfn)
+#define spage_to_mfn(pg) __spage_to_mfn(pg)
#define maddr_to_page(ma) __maddr_to_page(ma)
#define page_to_maddr(pg) __page_to_maddr(pg)
#define virt_to_page(va) __virt_to_page(va)
--- xen-unstable//xen/include/asm-x86/x86_64/page.h 2009-10-07 15:43:52.000000000 -0500
+++ xen-sdev//xen/include/asm-x86/x86_64/page.h 2010-05-04 10:44:38.000000000 -0500
@@ -7,6 +7,7 @@
#define L3_PAGETABLE_SHIFT 30
#define L4_PAGETABLE_SHIFT 39
#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT
#define ROOT_PAGETABLE_SHIFT L4_PAGETABLE_SHIFT
#define PAGETABLE_ORDER 9
@@ -15,6 +16,7 @@
#define L3_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
#define L4_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
#define ROOT_PAGETABLE_ENTRIES L4_PAGETABLE_ENTRIES
+#define SUPERPAGE_ORDER PAGETABLE_ORDER
#define __PAGE_OFFSET DIRECTMAP_VIRT_START
#define __XEN_VIRT_START XEN_VIRT_START
@@ -41,6 +43,8 @@ extern void pfn_pdx_hole_setup(unsigned
#define page_to_pdx(pg) ((pg) - frame_table)
#define pdx_to_page(pdx) (frame_table + (pdx))
+#define spage_to_pdx(spg) ((spg>>(SUPERPAGE_SHIFT-PAGE_SHIFT)) - spage_table)
+#define pdx_to_spage(pdx) (spage_table + ((pdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT)))
/*
* Note: These are solely for the use by page_{get,set}_owner(), and
* therefore don't need to handle the XEN_VIRT_{START,END} range.
@@ -64,6 +68,16 @@ static inline unsigned long pdx_to_pfn(u
((pdx << pfn_pdx_hole_shift) & pfn_top_mask);
}
+static inline unsigned long pfn_to_sdx(unsigned long pfn)
+{
+ return pfn_to_pdx(pfn) >> (SUPERPAGE_SHIFT-PAGE_SHIFT);
+}
+
+static inline unsigned long sdx_to_pfn(unsigned long sdx)
+{
+ return pdx_to_pfn(sdx << (SUPERPAGE_SHIFT-PAGE_SHIFT));
+}
+
static inline unsigned long __virt_to_maddr(unsigned long va)
{
ASSERT(va >= XEN_VIRT_START);
--- xen-unstable//xen/arch/x86/mm.c 2010-04-28 09:31:26.000000000 -0500
+++ xen-sdev//xen/arch/x86/mm.c 2010-05-13 13:07:57.000000000 -0500
@@ -151,8 +151,15 @@ unsigned long __read_mostly pdx_group_va
#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
-int opt_allow_hugepage;
+int opt_allow_superpage;
+static int opt_allow_hugepage;
boolean_param("allowhugepage", opt_allow_hugepage);
+boolean_param("allowsuperpage", opt_allow_superpage);
+
+static void get_spage(struct spage_info *spage);
+static void put_spage(struct spage_info *spage);
+static int get_spage_type(struct spage_info *spage, unsigned long type);
+static void put_spage_type(struct spage_info *spage);
#define l1_disallow_mask(d) \
((d != dom_io) && \
@@ -202,6 +209,28 @@ static void __init init_frametable_chunk
memset(end, -1, s - (unsigned long)end);
}
+static void __init init_spagetable(void)
+{
+ unsigned long s, start = SPAGETABLE_VIRT_START;
+ unsigned long end = SPAGETABLE_VIRT_END;
+ unsigned long step, mfn;
+ unsigned int max_entries;
+
+ step = 1UL << PAGETABLE_ORDER;
+ max_entries = (max_pdx + ((1UL<<SUPERPAGE_ORDER)-1)) >> SUPERPAGE_ORDER;
+ end = start + (((max_entries * sizeof(*spage_table)) +
+ ((1UL<<SUPERPAGE_SHIFT)-1)) & (~((1UL<<SUPERPAGE_SHIFT)-1)));
+
+ for (s = start; s < end; s += step << PAGE_SHIFT)
+ {
+ mfn = alloc_boot_pages(step, step);
+ if ( !mfn )
+ panic("Not enough memory for spage table");
+ map_pages_to_xen(s, mfn, step, PAGE_HYPERVISOR);
+ }
+ memset((void *)start, 0, end - start);
+}
+
void __init init_frametable(void)
{
unsigned int sidx, eidx, nidx;
@@ -212,6 +241,9 @@ void __init init_frametable(void)
#endif
BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1));
+ if (opt_allow_hugepage)
+ opt_allow_superpage = 1;
+
for ( sidx = 0; ; sidx = nidx )
{
eidx = find_next_zero_bit(pdx_group_valid, max_idx, sidx);
@@ -232,6 +264,8 @@ void __init init_frametable(void)
(unsigned long)pdx_to_page(max_idx * PDX_GROUP_COUNT) -
(unsigned long)pdx_to_page(max_pdx));
}
+ if (opt_allow_superpage)
+ init_spagetable();
}
void __init arch_init_memory(void)
@@ -652,19 +686,6 @@ static int get_page_and_type_from_pagenr
return rc;
}
-static int get_data_page(
- struct page_info *page, struct domain *d, int writeable)
-{
- int rc;
-
- if ( writeable )
- rc = get_page_and_type(page, d, PGT_writable_page);
- else
- rc = get_page(page, d);
-
- return rc;
-}
-
static void put_data_page(
struct page_info *page, int writeable)
{
@@ -870,6 +891,7 @@ static int
get_page_from_l2e(
l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
{
+ struct spage_info *spage;
unsigned long mfn = l2e_get_pfn(l2e);
int rc;
@@ -886,31 +908,41 @@ get_page_from_l2e(
{
rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, 0, 0);
if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
- rc = 0;
+ return 0;
+
+ return rc;
}
- else if ( !opt_allow_hugepage || (mfn & (L1_PAGETABLE_ENTRIES-1)) )
+ if ( !opt_allow_superpage )
{
- rc = -EINVAL;
+ MEM_LOG("Attempt to map superpage without allowsuperpage flag in hypervisor");
+ return -EINVAL;
}
- else
+ if ( mfn & (L1_PAGETABLE_ENTRIES-1) )
{
- unsigned long m = mfn;
- int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW);
-
- do {
- if ( !mfn_valid(m) ||
- !get_data_page(mfn_to_page(m), d, writeable) )
- {
- while ( m-- > mfn )
- put_data_page(mfn_to_page(m), writeable);
- return -EINVAL;
- }
- } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+ MEM_LOG("Unaligned superpage map attempt mfn %lx", mfn);
+ return -EINVAL;
+ }
+ spage = mfn_to_spage(mfn);
- rc = 1;
+ if (!(spage->type_info & SGT_enabled))
+ {
+ MEM_LOG("Map attempt on non-contiguous superpage, mfn %lx", mfn);
+ return -EINVAL;
}
- return rc;
+ get_spage(spage);
+
+ if (l2e_get_flags(l2e) & _PAGE_RW)
+ {
+ if (!get_spage_type(spage, SGT_superpage))
+ {
+ put_spage(spage);
+ MEM_LOG("Superpage in use as page table mfn %lx, type %lx",
+ mfn, spage->type_info);
+ return -EINVAL;
+ }
+ }
+ return 0;
}
@@ -1101,13 +1133,11 @@ static int put_page_from_l2e(l2_pgentry_
if ( l2e_get_flags(l2e) & _PAGE_PSE )
{
- unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
- int writeable = l2e_get_flags(l2e) & _PAGE_RW;
+ struct spage_info *spage = mfn_to_spage(l2e_get_pfn(l2e));
- ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
- do {
- put_data_page(mfn_to_page(m), writeable);
- } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+ put_spage(spage);
+ if (l2e_get_flags(l2e) & _PAGE_RW)
+ put_spage_type(spage);
}
else
{
@@ -2038,6 +2068,75 @@ int get_page(struct page_info *page, str
return 0;
}
+static void get_spage(struct spage_info *spage)
+{
+ unsigned long x, y = spage->count_info;
+
+ do {
+ x = y;
+ }
+ while ( (y = cmpxchg(&spage->count_info, x, x + 1)) != x );
+
+ return;
+}
+
+static void put_spage(struct spage_info *spage)
+{
+ unsigned long x, y = spage->count_info;
+
+ do {
+ ASSERT(y != 0);
+ x = y;
+ }
+ while ( (y = cmpxchg(&spage->count_info, x, x - 1)) != x );
+
+ return;
+}
+
+static int get_spage_type(struct spage_info *spage, unsigned long type)
+{
+ unsigned long x, nx, y = spage->type_info;
+
+ do {
+ x = y;
+ nx = x + 1;
+ if ( unlikely((nx & SGT_count_mask) == 0) )
+ {
+ MEM_LOG("Superpage type count overflow on pfn %lx", spage_to_mfn(spage));
+ return 0;
+ }
+ if ( unlikely((x & SGT_count_mask) == 0) )
+ {
+ nx = (nx & ~SGT_type_mask) | type;
+ }
+ else
+ {
+ if ( unlikely((x & SGT_type_mask) != type) )
+ return 0;
+ }
+ }
+ while ( (y = cmpxchg(&spage->type_info, x, nx)) != x );
+ return 1;
+}
+
+static void put_spage_type(struct spage_info *spage)
+{
+ unsigned long x, nx, y = spage->type_info;
+
+ do {
+ x = y;
+ nx = x - 1;
+ if ((x & SGT_count_mask) == 0)
+ {
+ return;
+ }
+ if ((nx & SGT_count_mask) == 0)
+ nx = (nx & ~SGT_type_mask) | SGT_none;
+ }
+ while ( (y = cmpxchg(&spage->type_info, x, nx)) != x );
+ return;
+}
+
/*
* Special version of get_page() to be used exclusively when
* - a page is known to already have a non-zero reference count
@@ -2279,6 +2378,10 @@ static int __put_page_type(struct page_i
return -EINTR;
}
+ if (opt_allow_superpage && spage_conflicts(x & PGT_type_mask))
+ {
+ put_spage_type(page_to_spage(page));
+ }
return rc;
}
@@ -2413,6 +2516,15 @@ static int __get_page_type(struct page_i
rc = alloc_page_type(page, type, preemptible);
}
+ if (opt_allow_superpage && spage_conflicts(type))
+ {
+ if (!get_spage_type(page_to_spage(page), SGT_nosuper))
+ {
+ __put_page_type(page, 0);
+ return -EINVAL;
+ }
+ }
+
if ( (x & PGT_partial) && !(nx & PGT_partial) )
put_page(page);
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
next reply other threads:[~2010-05-13 19:40 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-13 19:40 Dave McCracken [this message]
2010-05-14 7:25 ` [PATCH] Implement faster superpage mapping Keir Fraser
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201005131440.22479.dcm@mccr.org \
--to=dcm@mccr.org \
--cc=Keir.Fraser@eu.citrix.com \
--cc=jeremy@goop.org \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.