From mboxrd@z Thu Jan 1 00:00:00 1970 From: Dave McCracken Subject: Re: [PATCH 1/2] PV hugepages - Xen patch Date: Wed, 8 Oct 2008 12:05:16 -0500 Message-ID: <200810081205.16978.dcm@mccr.org> References: Mime-Version: 1.0 Content-Type: Multipart/Mixed; boundary="Boundary-00=_MhO7IQxU1BgjDNp" Return-path: In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: xen-devel@lists.xensource.com Cc: Keir Fraser List-Id: xen-devel@lists.xenproject.org --Boundary-00=_MhO7IQxU1BgjDNp Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Content-Disposition: inline On Friday 03 October 2008, Keir Fraser wrote: > Some issues: > =A0* You need to check return value of get_page_from_pagenr() on every pa= ge > of the superpage. Any one of them can fail, causing you to undo your work > so far and then fail. > =A0* You need to get_page_type(PGT_writable) on every page if the superpa= ge > mapping asserts _PAGE_RW. Otherwise the guest is getting write access > without that being asserted in the reference counts. > =A0* Look at get_page_from_l1e() for an example of how this is done for a > single page. You need to do similar work for every page of the super-page. Ok, here's a version of the patch with all these issues addressed. > =A0* This surely breaks save/restore, since the restore code is not > superpage-aware. I don't have this one solved yet. I'm working on it. Dave McCracken --Boundary-00=_MhO7IQxU1BgjDNp Content-Type: text/x-diff; charset="iso 8859-15"; name="xen-hpage-05.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="xen-hpage-05.patch" --- xen-unstable//./xen/include/asm-x86/x86_32/page.h 2008-07-17 09:49:27.000000000 -0500 +++ xen-hpage/./xen/include/asm-x86/x86_32/page.h 2008-10-02 15:07:34.000000000 -0500 @@ -112,7 +112,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL. * Permit the NX bit if the hardware supports it. */ -#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX) +#define BASE_DISALLOW_MASK (0xFFFFF118U & ~_PAGE_NX) #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) --- xen-unstable//./xen/include/asm-x86/x86_64/page.h 2008-10-02 14:23:17.000000000 -0500 +++ xen-hpage/./xen/include/asm-x86/x86_64/page.h 2008-10-02 15:07:34.000000000 -0500 @@ -112,7 +112,7 @@ typedef l4_pgentry_t root_pgentry_t; * Permit the NX bit if the hardware supports it. * Note that range [62:52] is available for software use on x86/64. */ -#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX) +#define BASE_DISALLOW_MASK (0xFF800118U & ~_PAGE_NX) #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) --- xen-unstable//./xen/arch/x86/mm.c 2008-10-02 14:23:17.000000000 -0500 +++ xen-hpage/./xen/arch/x86/mm.c 2008-10-08 11:35:44.000000000 -0500 @@ -584,6 +584,28 @@ static int get_page_and_type_from_pagenr return rc; } +static int +get_data_page(struct page_info *page, struct domain *d, int writeable) +{ + int rc; + + if (writeable) + rc = get_page_and_type(page, d, PGT_writable_page); + else + rc = get_page(page, d); + + return rc; +} + +static void +put_data_page(struct page_info *page, int writeable) +{ + if (writeable) + put_page_and_type(page); + else + put_page(page); +} + /* * We allow root tables to map each other (a.k.a. linear page tables). It * needs some special care with reference counts and access permissions: @@ -656,6 +678,7 @@ get_page_from_l1e( struct vcpu *curr = current; struct domain *owner; int okay; + int writeable; if ( !(l1f & _PAGE_PRESENT) ) return 1; @@ -698,10 +721,9 @@ get_page_from_l1e( * contribute to writeable mapping refcounts. (This allows the * qemu-dm helper process in dom0 to map the domain's memory without * messing up the count of "real" writable mappings.) */ - okay = (((l1f & _PAGE_RW) && - !(unlikely(paging_mode_external(d) && (d != curr->domain)))) - ? get_page_and_type(page, d, PGT_writable_page) - : get_page(page, d)); + writeable = (l1f & _PAGE_RW) && + !(unlikely(paging_mode_external(d) && (d != curr->domain))); + okay = get_data_page(page, d, writeable); if ( !okay ) { MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte @@ -759,11 +781,39 @@ get_page_from_l2e( MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK); return -EINVAL; } + if ( l2e_get_flags(l2e) & _PAGE_PSE ) { + unsigned long mfn = l2e_get_pfn(l2e); + unsigned long m, me; + struct page_info *page = mfn_to_page(mfn); + int writeable; - rc = get_page_and_type_from_pagenr( - l2e_get_pfn(l2e), PGT_l1_page_table, d, 0); - if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) - rc = 0; + writeable = l2e_get_flags(l2e) & _PAGE_RW; + + rc = get_data_page(page, d, writeable); + if (unlikely(!rc)) { + return rc; + } + + for (m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++) { + rc = get_data_page(mfn_to_page(m), d, writeable); + if (unlikely(!rc)) { + for (--m; m > mfn; --m) { + put_data_page(mfn_to_page(m), writeable); + } + put_data_page(page, writeable); + return 0; + } + } +#ifdef __x86_64__ + map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, L1_PAGETABLE_ENTRIES, + PAGE_HYPERVISOR | l2e_get_flags(l2e)); +#endif + } else { + rc = get_page_and_type_from_pagenr( + l2e_get_pfn(l2e), PGT_l1_page_table, d, 0); + if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) + rc = 0; + } return rc; } @@ -955,7 +1005,19 @@ static int put_page_from_l2e(l2_pgentry_ if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && (l2e_get_pfn(l2e) != pfn) ) { - put_page_and_type(l2e_get_page(l2e)); + if (l2e_get_flags(l2e) & _PAGE_PSE) { + unsigned long mfn = l2e_get_pfn(l2e); + unsigned long m, me; + struct page_info *page = mfn_to_page(mfn); + int writeable = l2e_get_flags(l2e) & _PAGE_RW; + + for (m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++) { + put_data_page(mfn_to_page(m), writeable); + } + put_data_page(page, writeable); + } else { + put_page_and_type(l2e_get_page(l2e)); + } return 0; } return 1; --Boundary-00=_MhO7IQxU1BgjDNp Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel --Boundary-00=_MhO7IQxU1BgjDNp--