All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave McCracken <dcm@mccr.org>
To: xen-devel@lists.xensource.com
Cc: Keir Fraser <keir.fraser@eu.citrix.com>
Subject: Re: [PATCH 1/2] PV hugepages - Xen patch
Date: Wed, 8 Oct 2008 12:05:16 -0500	[thread overview]
Message-ID: <200810081205.16978.dcm@mccr.org> (raw)
In-Reply-To: <C50B9D48.1DCAB%keir.fraser@eu.citrix.com>

[-- Attachment #1: Type: text/plain, Size: 839 bytes --]

On Friday 03 October 2008, Keir Fraser wrote:
> Some issues:
>  * You need to check return value of get_page_from_pagenr() on every page
> of the superpage. Any one of them can fail, causing you to undo your work
> so far and then fail.
>  * You need to get_page_type(PGT_writable) on every page if the superpage
> mapping asserts _PAGE_RW. Otherwise the guest is getting write access
> without that being asserted in the reference counts.
>  * Look at get_page_from_l1e() for an example of how this is done for a
> single page. You need to do similar work for every page of the super-page.

Ok, here's a version of the patch with all these issues addressed.

>  * This surely breaks save/restore, since the restore code is not
> superpage-aware.

I don't have this one solved yet.  I'm working on it.

Dave McCracken


[-- Attachment #2: xen-hpage-05.patch --]
[-- Type: text/x-diff, Size: 5147 bytes --]

--- xen-unstable//./xen/include/asm-x86/x86_32/page.h	2008-07-17 09:49:27.000000000 -0500
+++ xen-hpage/./xen/include/asm-x86/x86_32/page.h	2008-10-02 15:07:34.000000000 -0500
@@ -112,7 +112,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
  * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
  * Permit the NX bit if the hardware supports it.
  */
-#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
+#define BASE_DISALLOW_MASK (0xFFFFF118U & ~_PAGE_NX)
 
 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
 #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
--- xen-unstable//./xen/include/asm-x86/x86_64/page.h	2008-10-02 14:23:17.000000000 -0500
+++ xen-hpage/./xen/include/asm-x86/x86_64/page.h	2008-10-02 15:07:34.000000000 -0500
@@ -112,7 +112,7 @@ typedef l4_pgentry_t root_pgentry_t;
  * Permit the NX bit if the hardware supports it.
  * Note that range [62:52] is available for software use on x86/64.
  */
-#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
+#define BASE_DISALLOW_MASK (0xFF800118U & ~_PAGE_NX)
 
 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
 #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
--- xen-unstable//./xen/arch/x86/mm.c	2008-10-02 14:23:17.000000000 -0500
+++ xen-hpage/./xen/arch/x86/mm.c	2008-10-08 11:35:44.000000000 -0500
@@ -584,6 +584,28 @@ static int get_page_and_type_from_pagenr
     return rc;
 }
 
+static int
+get_data_page(struct page_info *page, struct domain *d, int writeable)
+{
+    int rc;
+
+    if (writeable)
+        rc = get_page_and_type(page, d, PGT_writable_page);
+    else
+        rc = get_page(page, d);
+
+    return rc;
+}
+
+static void
+put_data_page(struct page_info *page, int writeable)
+{
+    if (writeable)
+        put_page_and_type(page);
+    else
+        put_page(page);
+}
+
 /*
  * We allow root tables to map each other (a.k.a. linear page tables). It
  * needs some special care with reference counts and access permissions:
@@ -656,6 +678,7 @@ get_page_from_l1e(
     struct vcpu *curr = current;
     struct domain *owner;
     int okay;
+    int writeable;
 
     if ( !(l1f & _PAGE_PRESENT) )
         return 1;
@@ -698,10 +721,9 @@ get_page_from_l1e(
      * contribute to writeable mapping refcounts.  (This allows the
      * qemu-dm helper process in dom0 to map the domain's memory without
      * messing up the count of "real" writable mappings.) */
-    okay = (((l1f & _PAGE_RW) && 
-             !(unlikely(paging_mode_external(d) && (d != curr->domain))))
-            ? get_page_and_type(page, d, PGT_writable_page)
-            : get_page(page, d));
+    writeable = (l1f & _PAGE_RW) &&
+        !(unlikely(paging_mode_external(d) && (d != curr->domain)));
+    okay = get_data_page(page, d, writeable);
     if ( !okay )
     {
         MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
@@ -759,11 +781,39 @@ get_page_from_l2e(
         MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
         return -EINVAL;
     }
+    if ( l2e_get_flags(l2e) & _PAGE_PSE ) {
+        unsigned long mfn = l2e_get_pfn(l2e);
+        unsigned long m, me;
+        struct page_info *page = mfn_to_page(mfn);
+        int writeable;
 
-    rc = get_page_and_type_from_pagenr(
-        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
-    if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
-        rc = 0;
+        writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+        rc = get_data_page(page, d, writeable);
+        if (unlikely(!rc)) {
+            return rc;
+        }
+
+        for (m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++) {
+            rc = get_data_page(mfn_to_page(m), d, writeable);
+            if (unlikely(!rc)) {
+                for (--m; m > mfn; --m) {
+                    put_data_page(mfn_to_page(m), writeable);
+                }
+                put_data_page(page, writeable);
+                return 0;
+            }
+        }
+#ifdef __x86_64__
+        map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, L1_PAGETABLE_ENTRIES,
+                         PAGE_HYPERVISOR | l2e_get_flags(l2e));
+#endif
+    } else {
+        rc = get_page_and_type_from_pagenr(
+            l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
+        if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+            rc = 0;
+    }
 
     return rc;
 }
@@ -955,7 +1005,19 @@ static int put_page_from_l2e(l2_pgentry_
     if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && 
          (l2e_get_pfn(l2e) != pfn) )
     {
-        put_page_and_type(l2e_get_page(l2e));
+        if (l2e_get_flags(l2e) & _PAGE_PSE) {
+            unsigned long mfn = l2e_get_pfn(l2e);
+            unsigned long m, me;
+            struct page_info *page = mfn_to_page(mfn);
+            int writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+            for (m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++) {
+                put_data_page(mfn_to_page(m), writeable);
+            }
+            put_data_page(page, writeable);
+        } else {
+            put_page_and_type(l2e_get_page(l2e));
+        }
         return 0;
     }
     return 1;

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

  reply	other threads:[~2008-10-08 17:05 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-02 23:26 [PATCH 1/2] PV hugepages - Xen patch Dave McCracken
2008-10-03  8:58 ` Keir Fraser
2008-10-08 17:05   ` Dave McCracken [this message]
2008-10-08 18:11     ` Keir Fraser
2008-10-08 18:28       ` Dave McCracken
2008-10-08 18:50         ` Keir Fraser
2008-10-08 22:07           ` Dave McCracken
2008-10-09  6:45             ` Keir Fraser
2008-10-09 10:21             ` Keir Fraser
2008-10-08 22:50       ` Jeremy Fitzhardinge
2008-10-09  8:38         ` Daniel P. Berrange
2008-10-10  0:05           ` Jeremy Fitzhardinge

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200810081205.16978.dcm@mccr.org \
    --to=dcm@mccr.org \
    --cc=keir.fraser@eu.citrix.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.