All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave McCracken <dcm@mccr.org>
To: Keir Fraser <keir.fraser@eu.citrix.com>
Cc: Ian Pratt <Ian.Pratt@eu.citrix.com>, xen-devel@lists.xensource.com
Subject: Re: [PATCH 1/2] PV hugepages - Xen patch
Date: Wed, 8 Oct 2008 17:07:08 -0500	[thread overview]
Message-ID: <200810081707.09324.dcm@mccr.org> (raw)
In-Reply-To: <C512BF78.1DFEF%keir.fraser@eu.citrix.com>

[-- Attachment #1: Type: text/plain, Size: 229 bytes --]

On Wednesday 08 October 2008, Keir Fraser wrote:
> Then PV superpage support must be a configuration option, and disabled by
> default.

I added a command line option to enable it.  Is this what you had in mind?

Dave McCracken


[-- Attachment #2: xen-hpage-06.patch --]
[-- Type: text/x-diff, Size: 5613 bytes --]

--- xen-unstable//./xen/include/asm-x86/x86_32/page.h	2008-07-17 09:49:27.000000000 -0500
+++ xen-hpage/./xen/include/asm-x86/x86_32/page.h	2008-10-02 15:07:34.000000000 -0500
@@ -112,7 +112,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
  * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
  * Permit the NX bit if the hardware supports it.
  */
-#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
+#define BASE_DISALLOW_MASK (0xFFFFF118U & ~_PAGE_NX)
 
 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
 #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
--- xen-unstable//./xen/include/asm-x86/x86_64/page.h	2008-10-02 14:23:17.000000000 -0500
+++ xen-hpage/./xen/include/asm-x86/x86_64/page.h	2008-10-02 15:07:34.000000000 -0500
@@ -112,7 +112,7 @@ typedef l4_pgentry_t root_pgentry_t;
  * Permit the NX bit if the hardware supports it.
  * Note that range [62:52] is available for software use on x86/64.
  */
-#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
+#define BASE_DISALLOW_MASK (0xFF800118U & ~_PAGE_NX)
 
 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
 #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
--- xen-unstable//./xen/arch/x86/mm.c	2008-10-02 14:23:17.000000000 -0500
+++ xen-hpage/./xen/arch/x86/mm.c	2008-10-08 16:56:46.000000000 -0500
@@ -160,6 +160,9 @@ unsigned long total_pages;
 
 #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
 
+static int opt_allow_hugepage = 0;
+boolean_param("allowhugepage", opt_allow_hugepage);
+
 #define l1_disallow_mask(d)                                     \
     ((d != dom_io) &&                                           \
      (rangeset_is_empty((d)->iomem_caps) &&                     \
@@ -584,6 +587,28 @@ static int get_page_and_type_from_pagenr
     return rc;
 }
 
+static int
+get_data_page(struct page_info *page, struct domain *d, int writeable)
+{
+    int rc;
+
+    if (writeable)
+        rc = get_page_and_type(page, d, PGT_writable_page);
+    else
+        rc = get_page(page, d);
+
+    return rc;
+}
+
+static void
+put_data_page(struct page_info *page, int writeable)
+{
+    if (writeable)
+        put_page_and_type(page);
+    else
+        put_page(page);
+}
+
 /*
  * We allow root tables to map each other (a.k.a. linear page tables). It
  * needs some special care with reference counts and access permissions:
@@ -656,6 +681,7 @@ get_page_from_l1e(
     struct vcpu *curr = current;
     struct domain *owner;
     int okay;
+    int writeable;
 
     if ( !(l1f & _PAGE_PRESENT) )
         return 1;
@@ -698,10 +724,9 @@ get_page_from_l1e(
      * contribute to writeable mapping refcounts.  (This allows the
      * qemu-dm helper process in dom0 to map the domain's memory without
      * messing up the count of "real" writable mappings.) */
-    okay = (((l1f & _PAGE_RW) && 
-             !(unlikely(paging_mode_external(d) && (d != curr->domain))))
-            ? get_page_and_type(page, d, PGT_writable_page)
-            : get_page(page, d));
+    writeable = (l1f & _PAGE_RW) &&
+        !(unlikely(paging_mode_external(d) && (d != curr->domain)));
+    okay = get_data_page(page, d, writeable);
     if ( !okay )
     {
         MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
@@ -759,11 +784,42 @@ get_page_from_l2e(
         MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
         return -EINVAL;
     }
+    if ( l2e_get_flags(l2e) & _PAGE_PSE ) {
+        unsigned long mfn = l2e_get_pfn(l2e);
+        unsigned long m, me;
+        struct page_info *page = mfn_to_page(mfn);
+        int writeable;
 
-    rc = get_page_and_type_from_pagenr(
-        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
-    if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
-        rc = 0;
+        if (!opt_allow_hugepage)
+            return -EINVAL;
+
+        writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+        rc = get_data_page(page, d, writeable);
+        if (unlikely(!rc)) {
+            return rc;
+        }
+
+        for (m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++) {
+            rc = get_data_page(mfn_to_page(m), d, writeable);
+            if (unlikely(!rc)) {
+                for (--m; m > mfn; --m) {
+                    put_data_page(mfn_to_page(m), writeable);
+                }
+                put_data_page(page, writeable);
+                return 0;
+            }
+        }
+#ifdef __x86_64__
+        map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, L1_PAGETABLE_ENTRIES,
+                         PAGE_HYPERVISOR | l2e_get_flags(l2e));
+#endif
+    } else {
+        rc = get_page_and_type_from_pagenr(
+            l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
+        if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+            rc = 0;
+    }
 
     return rc;
 }
@@ -955,7 +1011,19 @@ static int put_page_from_l2e(l2_pgentry_
     if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && 
          (l2e_get_pfn(l2e) != pfn) )
     {
-        put_page_and_type(l2e_get_page(l2e));
+        if (l2e_get_flags(l2e) & _PAGE_PSE) {
+            unsigned long mfn = l2e_get_pfn(l2e);
+            unsigned long m, me;
+            struct page_info *page = mfn_to_page(mfn);
+            int writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+            for (m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++) {
+                put_data_page(mfn_to_page(m), writeable);
+            }
+            put_data_page(page, writeable);
+        } else {
+            put_page_and_type(l2e_get_page(l2e));
+        }
         return 0;
     }
     return 1;

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

  reply	other threads:[~2008-10-08 22:07 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-02 23:26 [PATCH 1/2] PV hugepages - Xen patch Dave McCracken
2008-10-03  8:58 ` Keir Fraser
2008-10-08 17:05   ` Dave McCracken
2008-10-08 18:11     ` Keir Fraser
2008-10-08 18:28       ` Dave McCracken
2008-10-08 18:50         ` Keir Fraser
2008-10-08 22:07           ` Dave McCracken [this message]
2008-10-09  6:45             ` Keir Fraser
2008-10-09 10:21             ` Keir Fraser
2008-10-08 22:50       ` Jeremy Fitzhardinge
2008-10-09  8:38         ` Daniel P. Berrange
2008-10-10  0:05           ` Jeremy Fitzhardinge

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200810081707.09324.dcm@mccr.org \
    --to=dcm@mccr.org \
    --cc=Ian.Pratt@eu.citrix.com \
    --cc=keir.fraser@eu.citrix.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.