From mboxrd@z Thu Jan 1 00:00:00 1970 From: Dave McCracken Subject: [PATCH] Add hypercall to mark superpages to improve performance Date: Wed, 28 Apr 2010 09:33:20 -0500 Message-ID: <201004280933.20527.dcm@mccr.org> Mime-Version: 1.0 Content-Type: Multipart/Mixed; boundary="Boundary-00=_wcE2LEj7D1oQbVU" Return-path: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: Keir Fraser , Jeremy Fitzhardinge Cc: Xen Developers List List-Id: xen-devel@lists.xenproject.org --Boundary-00=_wcE2LEj7D1oQbVU Content-Type: Text/Plain; charset="us-ascii" Content-Transfer-Encoding: 7bit The current method of mapping hugepages/superpages in the hypervisor involves updating the reference counts of every page in the superpage. This has proved to be a significant performance bottleneck. This patch adds a pair of MMUEXT hypercalls to mark and unmark a superpage. Once the superpage is marked, the type is locked to writable page until a companion unmark is done. When that superpage is subsequently mapped, only the first page needs to be reference counted. There are checks when the superpage is marked and unmarked to make sure no individual page mappings have skewed the reference counts. Dave McCracken Oracle Corp --Boundary-00=_wcE2LEj7D1oQbVU Content-Type: text/x-patch; charset="ISO-8859-1"; name="xen-unstable-fhpage-1.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="xen-unstable-fhpage-1.patch" --- xen-unstable//xen/include/public/xen.h 2009-12-18 08:35:12.000000000 -0600 +++ xen-fhpage//xen/include/public/xen.h 2010-04-28 09:32:08.000000000 -0500 @@ -250,6 +250,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); * cmd: MMUEXT_COPY_PAGE * mfn: Machine frame number of the destination page. * src_mfn: Machine frame number of the source page. + * + * cmd: MMUEXT_MARK_SUPER + * mfn: Machine frame number of head of superpage to be marked. + * + * cmd: MMUEXT_UNMARK_SUPER + * mfn: Machine frame number of head of superpage to be cleared. */ #define MMUEXT_PIN_L1_TABLE 0 #define MMUEXT_PIN_L2_TABLE 1 @@ -268,13 +274,15 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #define MMUEXT_NEW_USER_BASEPTR 15 #define MMUEXT_CLEAR_PAGE 16 #define MMUEXT_COPY_PAGE 17 +#define MMUEXT_MARK_SUPER 18 +#define MMUEXT_UNMARK_SUPER 19 #ifndef __ASSEMBLY__ struct mmuext_op { unsigned int cmd; union { /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR - * CLEAR_PAGE, COPY_PAGE */ + * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ xen_pfn_t mfn; /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ unsigned long linear_addr; --- xen-unstable//xen/include/asm-x86/mm.h 2010-04-28 09:31:26.000000000 -0500 +++ xen-fhpage//xen/include/asm-x86/mm.h 2010-04-28 09:32:08.000000000 -0500 @@ -182,9 +182,12 @@ struct page_info /* Page is locked? */ #define _PGT_locked PG_shift(9) #define PGT_locked PG_mask(1, 9) + /* Page is part of a superpage? */ +#define _PGT_super PG_shift(10) +#define PGT_super PG_mask(1, 10) /* Count of uses of this frame as its current type. */ -#define PGT_count_width PG_shift(9) +#define PGT_count_width PG_shift(10) #define PGT_count_mask ((1UL<u.inuse.type_info) ) + { + unsigned long mfn = page_to_mfn(page); + int i; + + for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++) + { + put_page_and_type(mfn_to_page(mfn)); + } + } + if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); --- xen-unstable//xen/arch/x86/mm.c 2010-04-28 09:31:26.000000000 -0500 +++ xen-fhpage//xen/arch/x86/mm.c 2010-04-28 09:32:08.000000000 -0500 @@ -894,20 +894,30 @@ get_page_from_l2e( } else { - unsigned long m = mfn; + struct page_info *page = mfn_to_page(mfn); int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW); - do { - if ( !mfn_valid(m) || - !get_data_page(mfn_to_page(m), d, writeable) ) - { - while ( m-- > mfn ) - put_data_page(mfn_to_page(m), writeable); - return -EINVAL; - } - } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); - - rc = 1; + if ( likely(test_bit(_PGT_super, &page->u.inuse.type_info)) ) + { + rc = get_data_page(page, d, writeable); + if ( unlikely(!rc) ) + rc = -EINVAL; + } + else + { + unsigned long m = mfn; + + do { + if ( !mfn_valid(m) || + !get_data_page(mfn_to_page(m), d, writeable) ) + { + while ( m-- > mfn ) + put_data_page(mfn_to_page(m), writeable); + return -EINVAL; + } + } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); + rc = 1; + } } return rc; @@ -1101,13 +1111,24 @@ static int put_page_from_l2e(l2_pgentry_ if ( l2e_get_flags(l2e) & _PAGE_PSE ) { - unsigned long mfn = l2e_get_pfn(l2e), m = mfn; + unsigned long mfn = l2e_get_pfn(l2e); + struct page_info *page = mfn_to_page(mfn); int writeable = l2e_get_flags(l2e) & _PAGE_RW; ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1))); - do { - put_data_page(mfn_to_page(m), writeable); - } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); + + if ( likely(test_bit(_PGT_super, &page->u.inuse.type_info)) ) + { + put_data_page(page, writeable); + } + else + { + unsigned long m = mfn; + + do { + put_data_page(mfn_to_page(m), writeable); + } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); + } } else { @@ -2981,6 +3002,93 @@ int do_mmuext_op( break; } + case MMUEXT_MARK_SUPER: + { + unsigned long mfn; + struct page_info *page, *p; + unsigned long count; + int i; + + mfn = op.arg1.mfn; + if (mfn & (L1_PAGETABLE_ENTRIES-1)) + { + MEM_LOG("Unaligned superpage reference mfn %lx", mfn); + okay = 0; + break; + } + page = mfn_to_page(mfn); + if (unlikely(test_and_set_bit(_PGT_super, &page->u.inuse.type_info)) ) + { + MEM_LOG("Super flag already set on mfn %lx", mfn); + okay = 0; + break; + } + count = page->u.inuse.type_info & PGT_count_mask; + for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++) + { + p = mfn_to_page(mfn); + if ((p->u.inuse.type_info & PGT_count_mask) != count) + { + MEM_LOG("Mismatched page count, index %d, expected count %d, found %d", + i, count, p->u.inuse.type_info & PGT_count_mask); + okay = 0; + } else + { + okay = !get_page_and_type(p, d, PGT_writable_page); + } + if (!okay) + { + MEM_LOG("Mismatched type setting super flag"); + while (--i >= 0) + put_page_and_type(mfn_to_page(--mfn)); + test_and_clear_bit(_PGT_super, &page->u.inuse.type_info); + break; + } + } + break; + } + + case MMUEXT_UNMARK_SUPER: + { + unsigned long mfn; + struct page_info *page, *p; + unsigned long count; + int i; + + mfn = op.arg1.mfn; + if (mfn & (L1_PAGETABLE_ENTRIES-1)) + { + MEM_LOG("Unaligned superpage reference mfn %lx", mfn); + okay = 0; + break; + } + page = mfn_to_page(mfn); + if (unlikely(!test_and_clear_bit(_PGT_super, &page->u.inuse.type_info)) ) + { + MEM_LOG("Super flag already clear on mfn %lx", mfn); + okay = 0; + break; + } + count = page->u.inuse.type_info & PGT_count_mask; + for (i = 0; i < L1_PAGETABLE_ENTRIES; i++, mfn++) + { + p = mfn_to_page(mfn); + if ((p->u.inuse.type_info & PGT_count_mask) != count) + { + MEM_LOG("Superpage still in use. Can not clear flag"); + okay = 0; + while (--i >= 0) + { + get_page_and_type(mfn_to_page(--mfn), d, PGT_writable_page); + } + test_and_set_bit(_PGT_super, &page->u.inuse.type_info); + break; + } + put_page_and_type(p); + } + break; + } + default: MEM_LOG("Invalid extended pt command 0x%x", op.cmd); rc = -ENOSYS; --Boundary-00=_wcE2LEj7D1oQbVU Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel --Boundary-00=_wcE2LEj7D1oQbVU--