From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH, RFC] i386: highmem access assistance hypercalls Date: Fri, 17 Oct 2008 16:28:13 +0100 Message-ID: <48F8CB2D.76E4.0078.0@novell.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Return-path: Content-Disposition: inline List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: xen-devel@lists.xensource.com List-Id: xen-devel@lists.xenproject.org While looking at the origin of very frequently executed hypercalls I realized that the high page accessor functions in Linux would be good candidates to handle in the hypervisor - clearing or copying to/from a high page is a pretty frequent operation (provided there's enough memory in the domain). While prior to the first submission I only measured kernel builds (where the results are not hinting at a meaningful improvement), I now found time to do a more specific analysis: page clearing is being improved by about 20%, page copying doesn't seem to significantly benefit (though that may be an effect of the simplistic copy_page() implementation Xen currently uses) - nevertheless I would think that if one function is supported by the hypervisor, then the other should also be. The hypervisor patch is below (using temporary numbers for the newly added sub-hypercalls), the Linux patch didn't change over the first submission (if the patch here is acceptable, I'll create a version that applies to the 2.6.18 tree). Opinions? Signed-off-by: Jan Beulich Index: 2008-09-19/xen/arch/x86/mm.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/arch/x86/mm.c 2008-09-17 09:26:41.000000000 = +0200 +++ 2008-09-19/xen/arch/x86/mm.c 2008-09-19 14:00:01.000000000 = +0200 @@ -2432,6 +2432,29 @@ static inline cpumask_t vcpumask_to_pcpu return pmask; } =20 +#ifdef __i386__ +static inline void *fixmap_domain_page(unsigned long mfn) +{ + unsigned int cpu =3D smp_processor_id(); + void *ptr =3D (void *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu); + + l1e_write(fix_pae_highmem_pl1e - cpu, + l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); + flush_tlb_one_local(ptr); + return ptr; +} +static inline void fixunmap_domain_page(const void *ptr) +{ + unsigned int cpu =3D virt_to_fix((unsigned long)ptr) - FIX_PAE_HIGHMEM= _0; + + l1e_write(fix_pae_highmem_pl1e - cpu, l1e_empty()); + this_cpu(make_cr3_timestamp) =3D this_cpu(tlbflush_time); +} +#else +#define fixmap_domain_page(mfn) mfn_to_virt(mfn) +#define fixunmap_domain_page(ptr) ((void)(ptr)) +#endif + int do_mmuext_op( XEN_GUEST_HANDLE(mmuext_op_t) uops, unsigned int count, @@ -2701,6 +2724,66 @@ int do_mmuext_op( break; } =20 + case MMUEXT_CLEAR_PAGE: + { + unsigned char *ptr; + + okay =3D !get_page_and_type_from_pagenr(mfn, PGT_writable_page= , + FOREIGNDOM, 0); + if ( unlikely(!okay) ) + { + MEM_LOG("Error while clearing mfn %lx", mfn); + break; + } + + /* A page is dirtied when it's being cleared. */ + paging_mark_dirty(d, mfn); + + ptr =3D fixmap_domain_page(mfn); + clear_page(ptr); + fixunmap_domain_page(ptr); + + put_page_and_type(page); + break; + } + + case MMUEXT_COPY_PAGE: + { + const unsigned char *src; + unsigned char *dst; + unsigned long src_mfn; + + src_mfn =3D gmfn_to_mfn(FOREIGNDOM, op.arg2.src_mfn); + okay =3D get_page_from_pagenr(src_mfn, FOREIGNDOM); + if ( unlikely(!okay) ) + { + MEM_LOG("Error while copying from mfn %lx", src_mfn); + break; + } + + okay =3D !get_page_and_type_from_pagenr(mfn, PGT_writable_page= , + FOREIGNDOM, 0); + if ( unlikely(!okay) ) + { + put_page(mfn_to_page(src_mfn)); + MEM_LOG("Error while copying to mfn %lx", mfn); + break; + } + + /* A page is dirtied when it's being copied to. */ + paging_mark_dirty(d, mfn); + + src =3D map_domain_page(src_mfn); + dst =3D fixmap_domain_page(mfn); + copy_page(dst, src); + fixunmap_domain_page(dst); + unmap_domain_page(src); + + put_page_and_type(page); + put_page(mfn_to_page(src_mfn)); + break; + } + default: MEM_LOG("Invalid extended pt command 0x%x", op.cmd); rc =3D -ENOSYS; Index: 2008-09-19/xen/arch/x86/x86_32/domain_page.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/arch/x86/x86_32/domain_page.c 2008-09-19 = 13:59:19.000000000 +0200 +++ 2008-09-19/xen/arch/x86/x86_32/domain_page.c 2008-09-19 = 14:00:01.000000000 +0200 @@ -114,7 +114,7 @@ void *map_domain_page(unsigned long mfn) return (void *)va; } =20 -void unmap_domain_page(void *va) +void unmap_domain_page(const void *va) { unsigned int idx; struct vcpu *v; @@ -241,7 +241,7 @@ void *map_domain_page_global(unsigned lo return (void *)va; } =20 -void unmap_domain_page_global(void *va) +void unmap_domain_page_global(const void *va) { unsigned long __va =3D (unsigned long)va; l2_pgentry_t *pl2e; Index: 2008-09-19/xen/arch/x86/x86_64/compat/mm.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/arch/x86/x86_64/compat/mm.c 2008-09-15 = 11:25:43.000000000 +0200 +++ 2008-09-19/xen/arch/x86/x86_64/compat/mm.c 2008-09-19 14:00:01.0000000= 00 +0200 @@ -231,6 +231,8 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm case MMUEXT_PIN_L4_TABLE: case MMUEXT_UNPIN_TABLE: case MMUEXT_NEW_BASEPTR: + case MMUEXT_CLEAR_PAGE: + case MMUEXT_COPY_PAGE: arg1 =3D XLAT_mmuext_op_arg1_mfn; break; default: @@ -258,6 +260,9 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm case MMUEXT_INVLPG_MULTI: arg2 =3D XLAT_mmuext_op_arg2_vcpumask; break; + case MMUEXT_COPY_PAGE: + arg2 =3D XLAT_mmuext_op_arg2_src_mfn; + break; default: arg2 =3D -1; break; Index: 2008-09-19/xen/include/asm-x86/fixmap.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/include/asm-x86/fixmap.h 2008-09-15 = 11:25:43.000000000 +0200 +++ 2008-09-19/xen/include/asm-x86/fixmap.h 2008-09-19 14:00:01.0000000= 00 +0200 @@ -29,6 +29,7 @@ * from the end of virtual memory backwards. */ enum fixed_addresses { + FIX_HOLE, #ifdef __i386__ FIX_PAE_HIGHMEM_0, FIX_PAE_HIGHMEM_END =3D FIX_PAE_HIGHMEM_0 + NR_CPUS-1, Index: 2008-09-19/xen/include/public/xen.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/include/public/xen.h 2008-09-15 11:25:43.0000000= 00 +0200 +++ 2008-09-19/xen/include/public/xen.h 2008-09-19 14:00:01.000000000 = +0200 @@ -231,6 +231,13 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); * cmd: MMUEXT_SET_LDT * linear_addr: Linear address of LDT base (NB. must be page-aligned). * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. */ #define MMUEXT_PIN_L1_TABLE 0 #define MMUEXT_PIN_L2_TABLE 1 @@ -247,12 +254,15 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #define MMUEXT_FLUSH_CACHE 12 #define MMUEXT_SET_LDT 13 #define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 0x1000 +#define MMUEXT_COPY_PAGE 0x1001 =20 #ifndef __ASSEMBLY__ struct mmuext_op { unsigned int cmd; union { - /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + * CLEAR_PAGE, COPY_PAGE */ xen_pfn_t mfn; /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ unsigned long linear_addr; @@ -266,6 +276,8 @@ struct mmuext_op { #else void *vcpumask; #endif + /* COPY_PAGE */ + xen_pfn_t src_mfn; } arg2; }; typedef struct mmuext_op mmuext_op_t; Index: 2008-09-19/xen/include/xen/domain_page.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/include/xen/domain_page.h 2008-09-15 = 11:25:43.000000000 +0200 +++ 2008-09-19/xen/include/xen/domain_page.h 2008-09-19 14:00:01.0000000= 00 +0200 @@ -24,7 +24,7 @@ void *map_domain_page(unsigned long mfn) * Pass a VA within a page previously mapped in the context of the * currently-executing VCPU via a call to map_domain_page(). */ -void unmap_domain_page(void *va); +void unmap_domain_page(const void *va); =20 /* * Similar to the above calls, except the mapping is accessible in all @@ -32,7 +32,7 @@ void unmap_domain_page(void *va); * mappings can also be unmapped from any context. */ void *map_domain_page_global(unsigned long mfn); -void unmap_domain_page_global(void *va); +void unmap_domain_page_global(const void *va); =20 #define DMCACHE_ENTRY_VALID 1U #define DMCACHE_ENTRY_HELD 2U @@ -75,7 +75,7 @@ map_domain_page_with_cache(unsigned long } =20 static inline void -unmap_domain_page_with_cache(void *va, struct domain_mmap_cache *cache) +unmap_domain_page_with_cache(const void *va, struct domain_mmap_cache = *cache) { ASSERT(cache !=3D NULL); cache->flags &=3D ~DMCACHE_ENTRY_HELD;