All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andres Lagar-Cavilla <andres@lagarcavilla.org>
To: xen-devel@lists.xensource.com
Cc: andres@gridcentric.ca, keir.xen@gmail.com, tim@xen.org,
	JBeulich@suse.com, adin@gridcentric.ca
Subject: [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out
Date: Thu, 24 Nov 2011 08:41:31 -0500	[thread overview]
Message-ID: <f079cbeae77e5570c193.1322142091@xdev.gridcentric.ca> (raw)
In-Reply-To: <patchbomb.1322142090@xdev.gridcentric.ca>

 xen/arch/x86/hvm/hvm.c             |  69 ++++++++++++++++++++++++-------------
 xen/arch/x86/hvm/nestedhvm.c       |   4 +-
 xen/arch/x86/hvm/svm/nestedsvm.c   |  23 ++++++------
 xen/arch/x86/hvm/vmx/vvmx.c        |  36 +++++++++++--------
 xen/include/asm-x86/hvm/hvm.h      |   9 +++-
 xen/include/asm-x86/hvm/vcpu.h     |   1 +
 xen/include/asm-x86/hvm/vmx/vvmx.h |   1 +
 7 files changed, 88 insertions(+), 55 deletions(-)


The nested hvm code maps pages of the guest hvm. These maps live beyond
a hypervisor entry/exit pair, and thus their liveness cannot be ensured
with get_gfn/put_gfn critical sections. Ensure their liveness by
increasing the page ref count, instead.

Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1801,11 +1801,15 @@ int hvm_virtual_to_linear_addr(
     return 0;
 }
 
-/* We leave this function holding a lock on the p2m entry */
-static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable)
+/* On non-NULL return, we leave this function holding an additional 
+ * ref on the underlying mfn, if any */
+static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable,
+                                    struct page_info **page)
 {
+    void *map;
     unsigned long mfn;
     p2m_type_t p2mt;
+    struct page_info *pg;
     struct domain *d = current->domain;
 
     mfn = mfn_x(writable
@@ -1828,27 +1832,43 @@ static void *__hvm_map_guest_frame(unsig
     if ( writable )
         paging_mark_dirty(d, mfn);
 
-    return map_domain_page(mfn);
+    pg = mfn_to_page(mfn);
+    if (page)
+    {
+        page_get_owner_and_reference(pg);
+        *page = pg;
+    }
+
+    map = map_domain_page(mfn);
+    put_gfn(d, gfn);
+    return map;
 }
 
-void *hvm_map_guest_frame_rw(unsigned long gfn)
+void *hvm_map_guest_frame_rw(unsigned long gfn,
+                             struct page_info **page)
 {
-    return __hvm_map_guest_frame(gfn, 1);
+    return __hvm_map_guest_frame(gfn, 1, page);
 }
 
-void *hvm_map_guest_frame_ro(unsigned long gfn)
+void *hvm_map_guest_frame_ro(unsigned long gfn,
+                             struct page_info **page)
 {
-    return __hvm_map_guest_frame(gfn, 0);
+    return __hvm_map_guest_frame(gfn, 0, page);
 }
 
-void hvm_unmap_guest_frame(void *p)
+void hvm_unmap_guest_frame(void *p, struct page_info *page)
 {
     if ( p )
+    {
         unmap_domain_page(p);
+        if ( page )
+            put_page(page);
+    }
 }
 
-static void *hvm_map_entry(unsigned long va, unsigned long *gfn)
+static void *hvm_map_entry(unsigned long va, struct page_info **page)
 {
+    unsigned long gfn;
     uint32_t pfec;
     char *v;
 
@@ -1865,11 +1885,11 @@ static void *hvm_map_entry(unsigned long
      * treat it as a kernel-mode read (i.e. no access checks).
      */
     pfec = PFEC_page_present;
-    *gfn = paging_gva_to_gfn(current, va, &pfec);
+    gfn = paging_gva_to_gfn(current, va, &pfec);
     if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) )
         goto fail;
 
-    v = hvm_map_guest_frame_rw(*gfn);
+    v = hvm_map_guest_frame_rw(gfn, page);
     if ( v == NULL )
         goto fail;
 
@@ -1880,11 +1900,9 @@ static void *hvm_map_entry(unsigned long
     return NULL;
 }
 
-static void hvm_unmap_entry(void *p, unsigned long gfn)
+static void hvm_unmap_entry(void *p, struct page_info *page)
 {
-    hvm_unmap_guest_frame(p);
-    if ( p && (gfn != INVALID_GFN) )
-        put_gfn(current->domain, gfn);
+    hvm_unmap_guest_frame(p, page);
 }
 
 static int hvm_load_segment_selector(
@@ -1896,7 +1914,7 @@ static int hvm_load_segment_selector(
     int fault_type = TRAP_invalid_tss;
     struct cpu_user_regs *regs = guest_cpu_user_regs();
     struct vcpu *v = current;
-    unsigned long pdesc_gfn = INVALID_GFN;
+    struct page_info *pdesc_pg = NULL;
 
     if ( regs->eflags & X86_EFLAGS_VM )
     {
@@ -1930,7 +1948,7 @@ static int hvm_load_segment_selector(
     if ( ((sel & 0xfff8) + 7) > desctab.limit )
         goto fail;
 
-    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn);
+    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_pg);
     if ( pdesc == NULL )
         goto hvm_map_fail;
 
@@ -1990,7 +2008,7 @@ static int hvm_load_segment_selector(
     desc.b |= 0x100;
 
  skip_accessed_flag:
-    hvm_unmap_entry(pdesc, pdesc_gfn);
+    hvm_unmap_entry(pdesc, pdesc_pg);
 
     segr.base = (((desc.b <<  0) & 0xff000000u) |
                  ((desc.b << 16) & 0x00ff0000u) |
@@ -2006,7 +2024,7 @@ static int hvm_load_segment_selector(
     return 0;
 
  unmap_and_fail:
-    hvm_unmap_entry(pdesc, pdesc_gfn);
+    hvm_unmap_entry(pdesc, pdesc_pg);
  fail:
     hvm_inject_exception(fault_type, sel & 0xfffc, 0);
  hvm_map_fail:
@@ -2021,7 +2039,8 @@ void hvm_task_switch(
     struct cpu_user_regs *regs = guest_cpu_user_regs();
     struct segment_register gdt, tr, prev_tr, segr;
     struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
-    unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN;
+    unsigned long eflags;
+    struct page_info *optss_pg = NULL, *nptss_pg = NULL;
     int exn_raised, rc;
     struct {
         u16 back_link,__blh;
@@ -2047,11 +2066,13 @@ void hvm_task_switch(
         goto out;
     }
 
-    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), &optss_gfn);
+    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), 
+                                &optss_pg);
     if ( optss_desc == NULL )
         goto out;
 
-    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn);
+    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), 
+                                &nptss_pg);
     if ( nptss_desc == NULL )
         goto out;
 
@@ -2216,8 +2237,8 @@ void hvm_task_switch(
     }
 
  out:
-    hvm_unmap_entry(optss_desc, optss_gfn);
-    hvm_unmap_entry(nptss_desc, nptss_gfn);
+    hvm_unmap_entry(optss_desc, optss_pg);
+    hvm_unmap_entry(nptss_desc, nptss_pg);
 }
 
 #define HVMCOPY_from_guest (0u<<0)
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/nestedhvm.c
--- a/xen/arch/x86/hvm/nestedhvm.c
+++ b/xen/arch/x86/hvm/nestedhvm.c
@@ -56,9 +56,11 @@ nestedhvm_vcpu_reset(struct vcpu *v)
     nv->nv_ioportED = 0;
 
     if (nv->nv_vvmcx)
-        hvm_unmap_guest_frame(nv->nv_vvmcx);
+        hvm_unmap_guest_frame(nv->nv_vvmcx, 
+                              nv->nv_vvmcx_pg);
     nv->nv_vvmcx = NULL;
     nv->nv_vvmcxaddr = VMCX_EADDR;
+    nv->nv_vvmcx_pg  = NULL;
     nv->nv_flushp2m = 0;
     nv->nv_p2m = NULL;
 
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/svm/nestedsvm.c
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -71,20 +71,18 @@ int nestedsvm_vmcb_map(struct vcpu *v, u
     if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) {
         ASSERT(nv->nv_vvmcx != NULL);
         ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR);
-        hvm_unmap_guest_frame(nv->nv_vvmcx);
+        hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcx_pg);
         nv->nv_vvmcx = NULL;
         nv->nv_vvmcxaddr = VMCX_EADDR;
+        nv->nv_vvmcx_pg = NULL;
     }
 
     if (nv->nv_vvmcx == NULL) {
-        nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT);
+        nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT,
+                                                &nv->nv_vvmcx_pg);
         if (nv->nv_vvmcx == NULL)
             return 0;
         nv->nv_vvmcxaddr = vmcbaddr;
-        /* put_gfn here even though the map survives beyond this caller.
-         * The map can likely survive beyond a hypervisor exit, thus we
-         * need to put the gfn */
-        put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT);
     }
 
     return 1;
@@ -336,6 +334,7 @@ static int nsvm_vmrun_permissionmap(stru
     unsigned int i;
     enum hvm_copy_result ret;
     unsigned long *ns_viomap;
+    struct page_info *ns_viomap_pg = NULL;
     bool_t ioport_80, ioport_ed;
 
     ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
@@ -353,12 +352,12 @@ static int nsvm_vmrun_permissionmap(stru
     svm->ns_oiomap_pa = svm->ns_iomap_pa;
     svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa;
 
-    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT);
+    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT,
+                                        &ns_viomap_pg);
     ASSERT(ns_viomap != NULL);
     ioport_80 = test_bit(0x80, ns_viomap);
     ioport_ed = test_bit(0xed, ns_viomap);
-    hvm_unmap_guest_frame(ns_viomap);
-    put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT);
+    hvm_unmap_guest_frame(ns_viomap, ns_viomap_pg);
 
     svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed);
 
@@ -863,6 +862,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
     uint16_t port;
     bool_t enabled;
     unsigned long gfn = 0; /* gcc ... */
+    struct page_info *io_bitmap_pg = NULL;
 
     ioinfo.bytes = exitinfo1;
     port = ioinfo.fields.port;
@@ -880,7 +880,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
         break;
     }
 
-    io_bitmap = hvm_map_guest_frame_ro(gfn);
+    io_bitmap = hvm_map_guest_frame_ro(gfn, &io_bitmap_pg);
     if (io_bitmap == NULL) {
         gdprintk(XENLOG_ERR,
             "IOIO intercept: mapping of permission map failed\n");
@@ -888,8 +888,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
     }
 
     enabled = test_bit(port, io_bitmap);
-    hvm_unmap_guest_frame(io_bitmap);
-    put_gfn(current->domain, gfn);
+    hvm_unmap_guest_frame(io_bitmap, io_bitmap_pg);
 
     if (!enabled)
         return NESTEDHVM_VMEXIT_HOST;
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/vmx/vvmx.c
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -44,10 +44,13 @@ int nvmx_vcpu_initialise(struct vcpu *v)
     nvmx->vmxon_region_pa = 0;
     nvcpu->nv_vvmcx = NULL;
     nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+    nvcpu->nv_vvmcx_pg = NULL;
     nvmx->intr.intr_info = 0;
     nvmx->intr.error_code = 0;
     nvmx->iobitmap[0] = NULL;
     nvmx->iobitmap[1] = NULL;
+    nvmx->iobitmap_pg[0] = NULL;
+    nvmx->iobitmap_pg[1] = NULL;
     return 0;
 out:
     return -ENOMEM;
@@ -558,12 +561,14 @@ static void __map_io_bitmap(struct vcpu 
 
     index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
     if (nvmx->iobitmap[index])
-        hvm_unmap_guest_frame (nvmx->iobitmap[index]); 
+    {
+        hvm_unmap_guest_frame (nvmx->iobitmap[index],
+                               nvmx->iobitmap_pg[index]); 
+        nvmx->iobitmap_pg[index] = NULL;
+    }
     gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
-    nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT);
-    /* See comment in nestedsvm_vmcb_map re putting this gfn and 
-     * liveness of the map it backs */
-    put_gfn(current->domain, gpa >> PAGE_SHIFT);
+    nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT,
+                                             &nvmx->iobitmap_pg[index]);
 }
 
 static inline void map_io_bitmap_all(struct vcpu *v)
@@ -580,13 +585,16 @@ static void nvmx_purge_vvmcs(struct vcpu
 
     __clear_current_vvmcs(v);
     if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR )
-        hvm_unmap_guest_frame(nvcpu->nv_vvmcx);
-    nvcpu->nv_vvmcx == NULL;
+        hvm_unmap_guest_frame(nvcpu->nv_vvmcx, nvcpu->nv_vvmcx_pg);
+    nvcpu->nv_vvmcx = NULL;
     nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+    nvcpu->nv_vvmcx_pg = NULL;
     for (i=0; i<2; i++) {
         if ( nvmx->iobitmap[i] ) {
-            hvm_unmap_guest_frame(nvmx->iobitmap[i]); 
+            hvm_unmap_guest_frame(nvmx->iobitmap[i], 
+                                  nvmx->iobitmap_pg[i]); 
             nvmx->iobitmap[i] = NULL;
+            nvmx->iobitmap_pg[i] = NULL;
         }
     }
 }
@@ -1138,12 +1146,10 @@ int nvmx_handle_vmptrld(struct cpu_user_
 
     if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR )
     {
-        nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT);
+        nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT,
+                                                 &nvcpu->nv_vvmcx_pg);
         nvcpu->nv_vvmcxaddr = gpa;
         map_io_bitmap_all (v);
-        /* See comment in nestedsvm_vmcb_map regarding putting this 
-         * gfn and liveness of the map that uses it */
-        put_gfn(current->domain, gpa >> PAGE_SHIFT);
     }
 
     vmreturn(regs, VMSUCCEED);
@@ -1201,11 +1207,11 @@ int nvmx_handle_vmclear(struct cpu_user_
     else 
     {
         /* Even if this VMCS isn't the current one, we must clear it. */
-        vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT);
+        struct page_info *vvmcs_pg = NULL;
+        vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, &vvmcs_pg);
         if ( vvmcs ) 
             __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0);
-        hvm_unmap_guest_frame(vvmcs);
-        put_gfn(current->domain, gpa >> PAGE_SHIFT);
+        hvm_unmap_guest_frame(vvmcs, vvmcs_pg);
     }
 
     vmreturn(regs, VMSUCCEED);
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -26,6 +26,7 @@
 #include <asm/hvm/asid.h>
 #include <public/domctl.h>
 #include <public/hvm/save.h>
+#include <asm/mm.h>
 
 /* Interrupt acknowledgement sources. */
 enum hvm_intsrc {
@@ -392,9 +393,11 @@ int hvm_virtual_to_linear_addr(
     unsigned int addr_size,
     unsigned long *linear_addr);
 
-void *hvm_map_guest_frame_rw(unsigned long gfn);
-void *hvm_map_guest_frame_ro(unsigned long gfn);
-void hvm_unmap_guest_frame(void *p);
+void *hvm_map_guest_frame_rw(unsigned long gfn,
+                             struct page_info **page);
+void *hvm_map_guest_frame_ro(unsigned long gfn,
+                             struct page_info **page);
+void hvm_unmap_guest_frame(void *p, struct page_info *page);
 
 static inline void hvm_set_info_guest(struct vcpu *v)
 {
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -77,6 +77,7 @@ struct nestedvcpu {
     void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */
 
     uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */
+    struct page_info *nv_vvmcx_pg; /* page where nv_vvmcx resides */
     uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
     uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
 
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vmx/vvmx.h
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
@@ -26,6 +26,7 @@
 struct nestedvmx {
     paddr_t    vmxon_region_pa;
     void       *iobitmap[2];		/* map (va) of L1 guest I/O bitmap */
+    struct page_info *iobitmap_pg[2]; /* pages backing L1 guest I/O bitmap */
     /* deferred nested interrupt */
     struct {
         unsigned long intr_info;

  reply	other threads:[~2011-11-24 13:41 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-24 13:41 [PATCH 0 of 3] MM fixes, part 2 Andres Lagar-Cavilla
2011-11-24 13:41 ` Andres Lagar-Cavilla [this message]
2011-11-24 16:48   ` [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out Tim Deegan
2011-11-24 17:04     ` Keir Fraser
2011-11-24 13:41 ` [PATCH 2 of 3] Ensure liveness of pages involved in a guest page table walk Andres Lagar-Cavilla
2011-11-24 17:07   ` Tim Deegan
2011-11-24 13:41 ` [PATCH 3 of 3] Fix liveness of pages in grant copy operations Andres Lagar-Cavilla
2011-11-24 17:07   ` Tim Deegan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f079cbeae77e5570c193.1322142091@xdev.gridcentric.ca \
    --to=andres@lagarcavilla.org \
    --cc=JBeulich@suse.com \
    --cc=adin@gridcentric.ca \
    --cc=andres@gridcentric.ca \
    --cc=keir.xen@gmail.com \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.