All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0 of 3] MM fixes, part 2
@ 2011-11-24 13:41 Andres Lagar-Cavilla
  2011-11-24 13:41 ` [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out Andres Lagar-Cavilla
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Andres Lagar-Cavilla @ 2011-11-24 13:41 UTC (permalink / raw)
  To: xen-devel; +Cc: andres, keir.xen, tim, JBeulich, adin

Second haf of patch series submitted yesterday. These three patches contain
improvements to users of the get_gfn* interface. We ensure liveness of 
hypervisors mappings of guest pages in the face of paging, and remove a
few cases of recursive locking.

Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

 xen/arch/x86/hvm/hvm.c             |  69 ++++++++++++++++++++++++-------------
 xen/arch/x86/hvm/nestedhvm.c       |   4 +-
 xen/arch/x86/hvm/svm/nestedsvm.c   |  23 ++++++------
 xen/arch/x86/hvm/vmx/vvmx.c        |  36 +++++++++++--------
 xen/include/asm-x86/hvm/hvm.h      |   9 +++-
 xen/include/asm-x86/hvm/vcpu.h     |   1 +
 xen/include/asm-x86/hvm/vmx/vvmx.h |   1 +
 xen/arch/x86/mm/guest_walk.c       |  16 ++++++--
 xen/common/grant_table.c           |  69 ++++++++++++++++++++-----------------
 9 files changed, 136 insertions(+), 92 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out
  2011-11-24 13:41 [PATCH 0 of 3] MM fixes, part 2 Andres Lagar-Cavilla
@ 2011-11-24 13:41 ` Andres Lagar-Cavilla
  2011-11-24 16:48   ` Tim Deegan
  2011-11-24 13:41 ` [PATCH 2 of 3] Ensure liveness of pages involved in a guest page table walk Andres Lagar-Cavilla
  2011-11-24 13:41 ` [PATCH 3 of 3] Fix liveness of pages in grant copy operations Andres Lagar-Cavilla
  2 siblings, 1 reply; 8+ messages in thread
From: Andres Lagar-Cavilla @ 2011-11-24 13:41 UTC (permalink / raw)
  To: xen-devel; +Cc: andres, keir.xen, tim, JBeulich, adin

 xen/arch/x86/hvm/hvm.c             |  69 ++++++++++++++++++++++++-------------
 xen/arch/x86/hvm/nestedhvm.c       |   4 +-
 xen/arch/x86/hvm/svm/nestedsvm.c   |  23 ++++++------
 xen/arch/x86/hvm/vmx/vvmx.c        |  36 +++++++++++--------
 xen/include/asm-x86/hvm/hvm.h      |   9 +++-
 xen/include/asm-x86/hvm/vcpu.h     |   1 +
 xen/include/asm-x86/hvm/vmx/vvmx.h |   1 +
 7 files changed, 88 insertions(+), 55 deletions(-)


The nested hvm code maps pages of the guest hvm. These maps live beyond
a hypervisor entry/exit pair, and thus their liveness cannot be ensured
with get_gfn/put_gfn critical sections. Ensure their liveness by
increasing the page ref count, instead.

Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1801,11 +1801,15 @@ int hvm_virtual_to_linear_addr(
     return 0;
 }
 
-/* We leave this function holding a lock on the p2m entry */
-static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable)
+/* On non-NULL return, we leave this function holding an additional 
+ * ref on the underlying mfn, if any */
+static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable,
+                                    struct page_info **page)
 {
+    void *map;
     unsigned long mfn;
     p2m_type_t p2mt;
+    struct page_info *pg;
     struct domain *d = current->domain;
 
     mfn = mfn_x(writable
@@ -1828,27 +1832,43 @@ static void *__hvm_map_guest_frame(unsig
     if ( writable )
         paging_mark_dirty(d, mfn);
 
-    return map_domain_page(mfn);
+    pg = mfn_to_page(mfn);
+    if (page)
+    {
+        page_get_owner_and_reference(pg);
+        *page = pg;
+    }
+
+    map = map_domain_page(mfn);
+    put_gfn(d, gfn);
+    return map;
 }
 
-void *hvm_map_guest_frame_rw(unsigned long gfn)
+void *hvm_map_guest_frame_rw(unsigned long gfn,
+                             struct page_info **page)
 {
-    return __hvm_map_guest_frame(gfn, 1);
+    return __hvm_map_guest_frame(gfn, 1, page);
 }
 
-void *hvm_map_guest_frame_ro(unsigned long gfn)
+void *hvm_map_guest_frame_ro(unsigned long gfn,
+                             struct page_info **page)
 {
-    return __hvm_map_guest_frame(gfn, 0);
+    return __hvm_map_guest_frame(gfn, 0, page);
 }
 
-void hvm_unmap_guest_frame(void *p)
+void hvm_unmap_guest_frame(void *p, struct page_info *page)
 {
     if ( p )
+    {
         unmap_domain_page(p);
+        if ( page )
+            put_page(page);
+    }
 }
 
-static void *hvm_map_entry(unsigned long va, unsigned long *gfn)
+static void *hvm_map_entry(unsigned long va, struct page_info **page)
 {
+    unsigned long gfn;
     uint32_t pfec;
     char *v;
 
@@ -1865,11 +1885,11 @@ static void *hvm_map_entry(unsigned long
      * treat it as a kernel-mode read (i.e. no access checks).
      */
     pfec = PFEC_page_present;
-    *gfn = paging_gva_to_gfn(current, va, &pfec);
+    gfn = paging_gva_to_gfn(current, va, &pfec);
     if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) )
         goto fail;
 
-    v = hvm_map_guest_frame_rw(*gfn);
+    v = hvm_map_guest_frame_rw(gfn, page);
     if ( v == NULL )
         goto fail;
 
@@ -1880,11 +1900,9 @@ static void *hvm_map_entry(unsigned long
     return NULL;
 }
 
-static void hvm_unmap_entry(void *p, unsigned long gfn)
+static void hvm_unmap_entry(void *p, struct page_info *page)
 {
-    hvm_unmap_guest_frame(p);
-    if ( p && (gfn != INVALID_GFN) )
-        put_gfn(current->domain, gfn);
+    hvm_unmap_guest_frame(p, page);
 }
 
 static int hvm_load_segment_selector(
@@ -1896,7 +1914,7 @@ static int hvm_load_segment_selector(
     int fault_type = TRAP_invalid_tss;
     struct cpu_user_regs *regs = guest_cpu_user_regs();
     struct vcpu *v = current;
-    unsigned long pdesc_gfn = INVALID_GFN;
+    struct page_info *pdesc_pg = NULL;
 
     if ( regs->eflags & X86_EFLAGS_VM )
     {
@@ -1930,7 +1948,7 @@ static int hvm_load_segment_selector(
     if ( ((sel & 0xfff8) + 7) > desctab.limit )
         goto fail;
 
-    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn);
+    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_pg);
     if ( pdesc == NULL )
         goto hvm_map_fail;
 
@@ -1990,7 +2008,7 @@ static int hvm_load_segment_selector(
     desc.b |= 0x100;
 
  skip_accessed_flag:
-    hvm_unmap_entry(pdesc, pdesc_gfn);
+    hvm_unmap_entry(pdesc, pdesc_pg);
 
     segr.base = (((desc.b <<  0) & 0xff000000u) |
                  ((desc.b << 16) & 0x00ff0000u) |
@@ -2006,7 +2024,7 @@ static int hvm_load_segment_selector(
     return 0;
 
  unmap_and_fail:
-    hvm_unmap_entry(pdesc, pdesc_gfn);
+    hvm_unmap_entry(pdesc, pdesc_pg);
  fail:
     hvm_inject_exception(fault_type, sel & 0xfffc, 0);
  hvm_map_fail:
@@ -2021,7 +2039,8 @@ void hvm_task_switch(
     struct cpu_user_regs *regs = guest_cpu_user_regs();
     struct segment_register gdt, tr, prev_tr, segr;
     struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
-    unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN;
+    unsigned long eflags;
+    struct page_info *optss_pg = NULL, *nptss_pg = NULL;
     int exn_raised, rc;
     struct {
         u16 back_link,__blh;
@@ -2047,11 +2066,13 @@ void hvm_task_switch(
         goto out;
     }
 
-    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), &optss_gfn);
+    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), 
+                                &optss_pg);
     if ( optss_desc == NULL )
         goto out;
 
-    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn);
+    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), 
+                                &nptss_pg);
     if ( nptss_desc == NULL )
         goto out;
 
@@ -2216,8 +2237,8 @@ void hvm_task_switch(
     }
 
  out:
-    hvm_unmap_entry(optss_desc, optss_gfn);
-    hvm_unmap_entry(nptss_desc, nptss_gfn);
+    hvm_unmap_entry(optss_desc, optss_pg);
+    hvm_unmap_entry(nptss_desc, nptss_pg);
 }
 
 #define HVMCOPY_from_guest (0u<<0)
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/nestedhvm.c
--- a/xen/arch/x86/hvm/nestedhvm.c
+++ b/xen/arch/x86/hvm/nestedhvm.c
@@ -56,9 +56,11 @@ nestedhvm_vcpu_reset(struct vcpu *v)
     nv->nv_ioportED = 0;
 
     if (nv->nv_vvmcx)
-        hvm_unmap_guest_frame(nv->nv_vvmcx);
+        hvm_unmap_guest_frame(nv->nv_vvmcx, 
+                              nv->nv_vvmcx_pg);
     nv->nv_vvmcx = NULL;
     nv->nv_vvmcxaddr = VMCX_EADDR;
+    nv->nv_vvmcx_pg  = NULL;
     nv->nv_flushp2m = 0;
     nv->nv_p2m = NULL;
 
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/svm/nestedsvm.c
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -71,20 +71,18 @@ int nestedsvm_vmcb_map(struct vcpu *v, u
     if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) {
         ASSERT(nv->nv_vvmcx != NULL);
         ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR);
-        hvm_unmap_guest_frame(nv->nv_vvmcx);
+        hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcx_pg);
         nv->nv_vvmcx = NULL;
         nv->nv_vvmcxaddr = VMCX_EADDR;
+        nv->nv_vvmcx_pg = NULL;
     }
 
     if (nv->nv_vvmcx == NULL) {
-        nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT);
+        nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT,
+                                                &nv->nv_vvmcx_pg);
         if (nv->nv_vvmcx == NULL)
             return 0;
         nv->nv_vvmcxaddr = vmcbaddr;
-        /* put_gfn here even though the map survives beyond this caller.
-         * The map can likely survive beyond a hypervisor exit, thus we
-         * need to put the gfn */
-        put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT);
     }
 
     return 1;
@@ -336,6 +334,7 @@ static int nsvm_vmrun_permissionmap(stru
     unsigned int i;
     enum hvm_copy_result ret;
     unsigned long *ns_viomap;
+    struct page_info *ns_viomap_pg = NULL;
     bool_t ioport_80, ioport_ed;
 
     ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
@@ -353,12 +352,12 @@ static int nsvm_vmrun_permissionmap(stru
     svm->ns_oiomap_pa = svm->ns_iomap_pa;
     svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa;
 
-    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT);
+    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT,
+                                        &ns_viomap_pg);
     ASSERT(ns_viomap != NULL);
     ioport_80 = test_bit(0x80, ns_viomap);
     ioport_ed = test_bit(0xed, ns_viomap);
-    hvm_unmap_guest_frame(ns_viomap);
-    put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT);
+    hvm_unmap_guest_frame(ns_viomap, ns_viomap_pg);
 
     svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed);
 
@@ -863,6 +862,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
     uint16_t port;
     bool_t enabled;
     unsigned long gfn = 0; /* gcc ... */
+    struct page_info *io_bitmap_pg = NULL;
 
     ioinfo.bytes = exitinfo1;
     port = ioinfo.fields.port;
@@ -880,7 +880,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
         break;
     }
 
-    io_bitmap = hvm_map_guest_frame_ro(gfn);
+    io_bitmap = hvm_map_guest_frame_ro(gfn, &io_bitmap_pg);
     if (io_bitmap == NULL) {
         gdprintk(XENLOG_ERR,
             "IOIO intercept: mapping of permission map failed\n");
@@ -888,8 +888,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
     }
 
     enabled = test_bit(port, io_bitmap);
-    hvm_unmap_guest_frame(io_bitmap);
-    put_gfn(current->domain, gfn);
+    hvm_unmap_guest_frame(io_bitmap, io_bitmap_pg);
 
     if (!enabled)
         return NESTEDHVM_VMEXIT_HOST;
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/vmx/vvmx.c
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -44,10 +44,13 @@ int nvmx_vcpu_initialise(struct vcpu *v)
     nvmx->vmxon_region_pa = 0;
     nvcpu->nv_vvmcx = NULL;
     nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+    nvcpu->nv_vvmcx_pg = NULL;
     nvmx->intr.intr_info = 0;
     nvmx->intr.error_code = 0;
     nvmx->iobitmap[0] = NULL;
     nvmx->iobitmap[1] = NULL;
+    nvmx->iobitmap_pg[0] = NULL;
+    nvmx->iobitmap_pg[1] = NULL;
     return 0;
 out:
     return -ENOMEM;
@@ -558,12 +561,14 @@ static void __map_io_bitmap(struct vcpu 
 
     index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
     if (nvmx->iobitmap[index])
-        hvm_unmap_guest_frame (nvmx->iobitmap[index]); 
+    {
+        hvm_unmap_guest_frame (nvmx->iobitmap[index],
+                               nvmx->iobitmap_pg[index]); 
+        nvmx->iobitmap_pg[index] = NULL;
+    }
     gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
-    nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT);
-    /* See comment in nestedsvm_vmcb_map re putting this gfn and 
-     * liveness of the map it backs */
-    put_gfn(current->domain, gpa >> PAGE_SHIFT);
+    nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT,
+                                             &nvmx->iobitmap_pg[index]);
 }
 
 static inline void map_io_bitmap_all(struct vcpu *v)
@@ -580,13 +585,16 @@ static void nvmx_purge_vvmcs(struct vcpu
 
     __clear_current_vvmcs(v);
     if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR )
-        hvm_unmap_guest_frame(nvcpu->nv_vvmcx);
-    nvcpu->nv_vvmcx == NULL;
+        hvm_unmap_guest_frame(nvcpu->nv_vvmcx, nvcpu->nv_vvmcx_pg);
+    nvcpu->nv_vvmcx = NULL;
     nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+    nvcpu->nv_vvmcx_pg = NULL;
     for (i=0; i<2; i++) {
         if ( nvmx->iobitmap[i] ) {
-            hvm_unmap_guest_frame(nvmx->iobitmap[i]); 
+            hvm_unmap_guest_frame(nvmx->iobitmap[i], 
+                                  nvmx->iobitmap_pg[i]); 
             nvmx->iobitmap[i] = NULL;
+            nvmx->iobitmap_pg[i] = NULL;
         }
     }
 }
@@ -1138,12 +1146,10 @@ int nvmx_handle_vmptrld(struct cpu_user_
 
     if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR )
     {
-        nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT);
+        nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT,
+                                                 &nvcpu->nv_vvmcx_pg);
         nvcpu->nv_vvmcxaddr = gpa;
         map_io_bitmap_all (v);
-        /* See comment in nestedsvm_vmcb_map regarding putting this 
-         * gfn and liveness of the map that uses it */
-        put_gfn(current->domain, gpa >> PAGE_SHIFT);
     }
 
     vmreturn(regs, VMSUCCEED);
@@ -1201,11 +1207,11 @@ int nvmx_handle_vmclear(struct cpu_user_
     else 
     {
         /* Even if this VMCS isn't the current one, we must clear it. */
-        vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT);
+        struct page_info *vvmcs_pg = NULL;
+        vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, &vvmcs_pg);
         if ( vvmcs ) 
             __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0);
-        hvm_unmap_guest_frame(vvmcs);
-        put_gfn(current->domain, gpa >> PAGE_SHIFT);
+        hvm_unmap_guest_frame(vvmcs, vvmcs_pg);
     }
 
     vmreturn(regs, VMSUCCEED);
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -26,6 +26,7 @@
 #include <asm/hvm/asid.h>
 #include <public/domctl.h>
 #include <public/hvm/save.h>
+#include <asm/mm.h>
 
 /* Interrupt acknowledgement sources. */
 enum hvm_intsrc {
@@ -392,9 +393,11 @@ int hvm_virtual_to_linear_addr(
     unsigned int addr_size,
     unsigned long *linear_addr);
 
-void *hvm_map_guest_frame_rw(unsigned long gfn);
-void *hvm_map_guest_frame_ro(unsigned long gfn);
-void hvm_unmap_guest_frame(void *p);
+void *hvm_map_guest_frame_rw(unsigned long gfn,
+                             struct page_info **page);
+void *hvm_map_guest_frame_ro(unsigned long gfn,
+                             struct page_info **page);
+void hvm_unmap_guest_frame(void *p, struct page_info *page);
 
 static inline void hvm_set_info_guest(struct vcpu *v)
 {
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -77,6 +77,7 @@ struct nestedvcpu {
     void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */
 
     uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */
+    struct page_info *nv_vvmcx_pg; /* page where nv_vvmcx resides */
     uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
     uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
 
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vmx/vvmx.h
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
@@ -26,6 +26,7 @@
 struct nestedvmx {
     paddr_t    vmxon_region_pa;
     void       *iobitmap[2];		/* map (va) of L1 guest I/O bitmap */
+    struct page_info *iobitmap_pg[2]; /* pages backing L1 guest I/O bitmap */
     /* deferred nested interrupt */
     struct {
         unsigned long intr_info;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2 of 3] Ensure liveness of pages involved in a guest page table walk
  2011-11-24 13:41 [PATCH 0 of 3] MM fixes, part 2 Andres Lagar-Cavilla
  2011-11-24 13:41 ` [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out Andres Lagar-Cavilla
@ 2011-11-24 13:41 ` Andres Lagar-Cavilla
  2011-11-24 17:07   ` Tim Deegan
  2011-11-24 13:41 ` [PATCH 3 of 3] Fix liveness of pages in grant copy operations Andres Lagar-Cavilla
  2 siblings, 1 reply; 8+ messages in thread
From: Andres Lagar-Cavilla @ 2011-11-24 13:41 UTC (permalink / raw)
  To: xen-devel; +Cc: andres, keir.xen, tim, JBeulich, adin

 xen/arch/x86/mm/guest_walk.c |  16 +++++++++++-----
 1 files changed, 11 insertions(+), 5 deletions(-)


Instead of risking deadlock by holding onto the gfn's acquired during
a guest page table walk, acquire an extra reference within the get_gfn/
put_gfn critical section, and drop the extra reference when done with
the map. This ensures liveness of the map, i.e. the underlying page
won't be paged out.

Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

diff -r f079cbeae77e -r b0410cb27d17 xen/arch/x86/mm/guest_walk.c
--- a/xen/arch/x86/mm/guest_walk.c
+++ b/xen/arch/x86/mm/guest_walk.c
@@ -87,7 +87,7 @@ static uint32_t set_ad_bits(void *guest_
 }
 
 /* If the map is non-NULL, we leave this function having 
- * called get_gfn, you need to call put_gfn. */
+ * acquired an extra ref on mfn_to_page(*mfn) */
 static inline void *map_domain_gfn(struct p2m_domain *p2m,
                                    gfn_t gfn, 
                                    mfn_t *mfn,
@@ -95,6 +95,7 @@ static inline void *map_domain_gfn(struc
                                    uint32_t *rc) 
 {
     p2m_access_t p2ma;
+    void *map;
 
     /* Translate the gfn, unsharing if shared */
     *mfn = get_gfn_type_access(p2m, gfn_x(gfn), p2mt, &p2ma, p2m_unshare, NULL);
@@ -120,7 +121,12 @@ static inline void *map_domain_gfn(struc
     }
     ASSERT(mfn_valid(mfn_x(*mfn)));
     
-    return map_domain_page(mfn_x(*mfn));
+    /* Get an extra ref to the page to ensure liveness of the map.
+     * Then we can safely put gfn */
+    page_get_owner_and_reference(mfn_to_page(mfn_x(*mfn)));
+    map = map_domain_page(mfn_x(*mfn));
+    __put_gfn(p2m, gfn_x(gfn));
+    return map;
 }
 
 
@@ -357,20 +363,20 @@ set_ad:
     if ( l3p ) 
     {
         unmap_domain_page(l3p);
-        __put_gfn(p2m, gfn_x(guest_l4e_get_gfn(gw->l4e)));
+        put_page(mfn_to_page(mfn_x(gw->l3mfn)));
     }
 #endif
 #if GUEST_PAGING_LEVELS >= 3
     if ( l2p ) 
     {
         unmap_domain_page(l2p);
-        __put_gfn(p2m, gfn_x(guest_l3e_get_gfn(gw->l3e))); 
+        put_page(mfn_to_page(mfn_x(gw->l2mfn)));
     }
 #endif
     if ( l1p ) 
     {
         unmap_domain_page(l1p);
-        __put_gfn(p2m, gfn_x(guest_l2e_get_gfn(gw->l2e)));
+        put_page(mfn_to_page(mfn_x(gw->l1mfn)));
     }
 
     return rc;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 3 of 3] Fix liveness of pages in grant copy operations
  2011-11-24 13:41 [PATCH 0 of 3] MM fixes, part 2 Andres Lagar-Cavilla
  2011-11-24 13:41 ` [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out Andres Lagar-Cavilla
  2011-11-24 13:41 ` [PATCH 2 of 3] Ensure liveness of pages involved in a guest page table walk Andres Lagar-Cavilla
@ 2011-11-24 13:41 ` Andres Lagar-Cavilla
  2011-11-24 17:07   ` Tim Deegan
  2 siblings, 1 reply; 8+ messages in thread
From: Andres Lagar-Cavilla @ 2011-11-24 13:41 UTC (permalink / raw)
  To: xen-devel; +Cc: andres, keir.xen, tim, JBeulich, adin

 xen/common/grant_table.c |  69 +++++++++++++++++++++++++----------------------
 1 files changed, 37 insertions(+), 32 deletions(-)


We were immediately putting the p2m entry translation for grant
copy operations. This allowed for an unnecessary race by which the
page could have been swapped out between the p2m lookup and the actual
use. Hold on to the p2m entries until the grant operation finishes.

Also fixes a small bug: for the source page of the copy, get_page
was assuming the page was owned by the source domain. It may be a
shared page, since we don't perform an unsharing p2m lookup.

Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

diff -r b0410cb27d17 -r 9e44fd6e4955 xen/common/grant_table.c
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1723,11 +1723,12 @@ static void __fixup_status_for_pin(struc
 /* Grab a frame number from a grant entry and update the flags and pin
    count as appropriate.  Note that this does *not* update the page
    type or reference counts, and does not check that the mfn is
-   actually valid. */
+   actually valid. If *gfn != INVALID_GFN, and rc == GNTST_okay, then
+   we leave this function holding the p2m entry for *gfn in *owning_domain */
 static int
 __acquire_grant_for_copy(
     struct domain *rd, unsigned long gref, struct domain *ld, int readonly,
-    unsigned long *frame, unsigned *page_off, unsigned *length,
+    unsigned long *frame, unsigned long *gfn, unsigned *page_off, unsigned *length,
     unsigned allow_transitive, struct domain **owning_domain)
 {
     grant_entry_v1_t *sha1;
@@ -1739,7 +1740,6 @@ __acquire_grant_for_copy(
     domid_t trans_domid;
     grant_ref_t trans_gref;
     struct domain *td;
-    unsigned long gfn;
     unsigned long grant_frame;
     unsigned trans_page_off;
     unsigned trans_length;
@@ -1748,6 +1748,7 @@ __acquire_grant_for_copy(
     s16 rc = GNTST_okay;
 
     *owning_domain = NULL;
+    *gfn = INVALID_GFN;
 
     spin_lock(&rd->grant_table->lock);
 
@@ -1824,7 +1825,7 @@ __acquire_grant_for_copy(
             spin_unlock(&rd->grant_table->lock);
 
             rc = __acquire_grant_for_copy(td, trans_gref, rd,
-                                          readonly, &grant_frame,
+                                          readonly, &grant_frame, gfn,
                                           &trans_page_off, &trans_length,
                                           0, &ignore);
 
@@ -1846,7 +1847,7 @@ __acquire_grant_for_copy(
 	        rcu_unlock_domain(td);
                 spin_unlock(&rd->grant_table->lock);
                 return __acquire_grant_for_copy(rd, gref, ld, readonly,
-                                                frame, page_off, length,
+                                                frame, gfn, page_off, length,
                                                 allow_transitive,
                                                 owning_domain);
             }
@@ -1860,13 +1861,11 @@ __acquire_grant_for_copy(
         }
         else if ( sha1 )
         {
-            gfn = sha1->frame;
-            rc = __get_paged_frame(gfn, &grant_frame, readonly, rd);
-            /* We drop this immediately per the comments at the top */
-            put_gfn(rd, gfn);
+            *gfn = sha1->frame;
+            rc = __get_paged_frame(*gfn, &grant_frame, readonly, rd);
             if ( rc != GNTST_okay )
                 goto unlock_out;
-            act->gfn = gfn;
+            act->gfn = *gfn;
             is_sub_page = 0;
             trans_page_off = 0;
             trans_length = PAGE_SIZE;
@@ -1874,12 +1873,11 @@ __acquire_grant_for_copy(
         }
         else if ( !(sha2->hdr.flags & GTF_sub_page) )
         {
-            gfn = sha2->full_page.frame;
-            rc = __get_paged_frame(gfn, &grant_frame, readonly, rd);
-            put_gfn(rd, gfn);
+            *gfn = sha2->full_page.frame;
+            rc = __get_paged_frame(*gfn, &grant_frame, readonly, rd);
             if ( rc != GNTST_okay )
                 goto unlock_out;
-            act->gfn = gfn;
+            act->gfn = *gfn;
             is_sub_page = 0;
             trans_page_off = 0;
             trans_length = PAGE_SIZE;
@@ -1887,12 +1885,11 @@ __acquire_grant_for_copy(
         }
         else
         {
-            gfn = sha2->sub_page.frame;
-            rc = __get_paged_frame(gfn, &grant_frame, readonly, rd);
-            put_gfn(rd, gfn);
+            *gfn = sha2->sub_page.frame;
+            rc = __get_paged_frame(*gfn, &grant_frame, readonly, rd);
             if ( rc != GNTST_okay )
                 goto unlock_out;
-            act->gfn = gfn;
+            act->gfn = *gfn;
             is_sub_page = 1;
             trans_page_off = sha2->sub_page.page_off;
             trans_length = sha2->sub_page.length;
@@ -1932,7 +1929,7 @@ __gnttab_copy(
 {
     struct domain *sd = NULL, *dd = NULL;
     struct domain *source_domain = NULL, *dest_domain = NULL;
-    unsigned long s_frame, d_frame;
+    unsigned long s_frame, d_frame, s_gfn = INVALID_GFN, d_gfn = INVALID_GFN;
     char *sp, *dp;
     s16 rc = GNTST_okay;
     int have_d_grant = 0, have_s_grant = 0, have_s_ref = 0;
@@ -1973,14 +1970,14 @@ __gnttab_copy(
     {
         unsigned source_off, source_len;
         rc = __acquire_grant_for_copy(sd, op->source.u.ref, current->domain, 1,
-                                      &s_frame, &source_off, &source_len, 1,
+                                      &s_frame, &s_gfn, &source_off, &source_len, 1,
                                       &source_domain);
         if ( rc != GNTST_okay )
             goto error_out;
         have_s_grant = 1;
         if ( op->source.offset < source_off ||
              op->len > source_len )
-            PIN_FAIL(error_out, GNTST_general_error,
+            PIN_FAIL(error_put_s_gfn, GNTST_general_error,
                      "copy source out of bounds: %d < %d || %d > %d\n",
                      op->source.offset, source_off,
                      op->len, source_len);
@@ -1988,8 +1985,8 @@ __gnttab_copy(
     else
     {
 #ifdef CONFIG_X86
+        s_gfn = op->source.u.gmfn;
         rc = __get_paged_frame(op->source.u.gmfn, &s_frame, 1, sd);
-        put_gfn(sd, op->source.u.gmfn);
         if ( rc != GNTST_okay )
             goto error_out;
 #else
@@ -1998,14 +1995,16 @@ __gnttab_copy(
         source_domain = sd;
     }
     if ( unlikely(!mfn_valid(s_frame)) )
-        PIN_FAIL(error_out, GNTST_general_error,
+        PIN_FAIL(error_put_s_gfn, GNTST_general_error,
                  "source frame %lx invalid.\n", s_frame);
-    if ( !get_page(mfn_to_page(s_frame), source_domain) )
+    /* For the source frame, the page could still be shared, so
+     * don't assume ownership by source_domain */
+    if ( !page_get_owner_and_reference(mfn_to_page(s_frame)) )
     {
         if ( !sd->is_dying )
             gdprintk(XENLOG_WARNING, "Could not get src frame %lx\n", s_frame);
         rc = GNTST_general_error;
-        goto error_out;
+        goto error_put_s_gfn;
     }
     have_s_ref = 1;
 
@@ -2013,14 +2012,14 @@ __gnttab_copy(
     {
         unsigned dest_off, dest_len;
         rc = __acquire_grant_for_copy(dd, op->dest.u.ref, current->domain, 0,
-                                      &d_frame, &dest_off, &dest_len, 1,
+                                      &d_frame, &d_gfn, &dest_off, &dest_len, 1,
                                       &dest_domain);
         if ( rc != GNTST_okay )
-            goto error_out;
+            goto error_put_s_gfn;
         have_d_grant = 1;
         if ( op->dest.offset < dest_off ||
              op->len > dest_len )
-            PIN_FAIL(error_out, GNTST_general_error,
+            PIN_FAIL(error_put_d_gfn, GNTST_general_error,
                      "copy dest out of bounds: %d < %d || %d > %d\n",
                      op->dest.offset, dest_off,
                      op->len, dest_len);
@@ -2028,17 +2027,17 @@ __gnttab_copy(
     else
     {
 #ifdef CONFIG_X86
+        d_gfn = op->dest.u.gmfn;
         rc = __get_paged_frame(op->dest.u.gmfn, &d_frame, 0, dd);
-        put_gfn(dd, op->dest.u.gmfn);
         if ( rc != GNTST_okay )
-            goto error_out;
+            goto error_put_s_gfn;
 #else
         d_frame = gmfn_to_mfn(dd, op->dest.u.gmfn);
 #endif
         dest_domain = dd;
     }
     if ( unlikely(!mfn_valid(d_frame)) )
-        PIN_FAIL(error_out, GNTST_general_error,
+        PIN_FAIL(error_put_d_gfn, GNTST_general_error,
                  "destination frame %lx invalid.\n", d_frame);
     if ( !get_page_and_type(mfn_to_page(d_frame), dest_domain,
                             PGT_writable_page) )
@@ -2046,7 +2045,7 @@ __gnttab_copy(
         if ( !dd->is_dying )
             gdprintk(XENLOG_WARNING, "Could not get dst frame %lx\n", d_frame);
         rc = GNTST_general_error;
-        goto error_out;
+        goto error_put_d_gfn;
     }
 
     sp = map_domain_page(s_frame);
@@ -2060,6 +2059,12 @@ __gnttab_copy(
     gnttab_mark_dirty(dd, d_frame);
 
     put_page_and_type(mfn_to_page(d_frame));
+ error_put_d_gfn:
+    if ( (d_gfn != INVALID_GFN) && (dest_domain) )
+        put_gfn(dest_domain, d_gfn);
+ error_put_s_gfn:
+    if ( (s_gfn != INVALID_GFN) && (source_domain) )
+        put_gfn(source_domain, s_gfn);
  error_out:
     if ( have_s_ref )
         put_page(mfn_to_page(s_frame));

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out
  2011-11-24 13:41 ` [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out Andres Lagar-Cavilla
@ 2011-11-24 16:48   ` Tim Deegan
  2011-11-24 17:04     ` Keir Fraser
  0 siblings, 1 reply; 8+ messages in thread
From: Tim Deegan @ 2011-11-24 16:48 UTC (permalink / raw)
  To: Andres Lagar-Cavilla; +Cc: andres, xen-devel, keir.xen, JBeulich, adin


This is a bit messy still.  On 64-bit Xen we could just use
virt_to_page() on the map pointers rather than storing their 
page_info pointers separately. 

On 32-bit, maybe we could use the existing linear mappings to look up
the MFN instead.  Or would it be easier to add a function to eth
map_domain_page() family that does va->mfn for mapped frames?
Keir, any opinion?

Tim.

At 08:41 -0500 on 24 Nov (1322124091), Andres Lagar-Cavilla wrote:
>  xen/arch/x86/hvm/hvm.c             |  69 ++++++++++++++++++++++++-------------
>  xen/arch/x86/hvm/nestedhvm.c       |   4 +-
>  xen/arch/x86/hvm/svm/nestedsvm.c   |  23 ++++++------
>  xen/arch/x86/hvm/vmx/vvmx.c        |  36 +++++++++++--------
>  xen/include/asm-x86/hvm/hvm.h      |   9 +++-
>  xen/include/asm-x86/hvm/vcpu.h     |   1 +
>  xen/include/asm-x86/hvm/vmx/vvmx.h |   1 +
>  7 files changed, 88 insertions(+), 55 deletions(-)
> 
> 
> The nested hvm code maps pages of the guest hvm. These maps live beyond
> a hypervisor entry/exit pair, and thus their liveness cannot be ensured
> with get_gfn/put_gfn critical sections. Ensure their liveness by
> increasing the page ref count, instead.
> 
> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
> 
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -1801,11 +1801,15 @@ int hvm_virtual_to_linear_addr(
>      return 0;
>  }
>  
> -/* We leave this function holding a lock on the p2m entry */
> -static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable)
> +/* On non-NULL return, we leave this function holding an additional 
> + * ref on the underlying mfn, if any */
> +static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable,
> +                                    struct page_info **page)
>  {
> +    void *map;
>      unsigned long mfn;
>      p2m_type_t p2mt;
> +    struct page_info *pg;
>      struct domain *d = current->domain;
>  
>      mfn = mfn_x(writable
> @@ -1828,27 +1832,43 @@ static void *__hvm_map_guest_frame(unsig
>      if ( writable )
>          paging_mark_dirty(d, mfn);
>  
> -    return map_domain_page(mfn);
> +    pg = mfn_to_page(mfn);
> +    if (page)
> +    {
> +        page_get_owner_and_reference(pg);
> +        *page = pg;
> +    }
> +
> +    map = map_domain_page(mfn);
> +    put_gfn(d, gfn);
> +    return map;
>  }
>  
> -void *hvm_map_guest_frame_rw(unsigned long gfn)
> +void *hvm_map_guest_frame_rw(unsigned long gfn,
> +                             struct page_info **page)
>  {
> -    return __hvm_map_guest_frame(gfn, 1);
> +    return __hvm_map_guest_frame(gfn, 1, page);
>  }
>  
> -void *hvm_map_guest_frame_ro(unsigned long gfn)
> +void *hvm_map_guest_frame_ro(unsigned long gfn,
> +                             struct page_info **page)
>  {
> -    return __hvm_map_guest_frame(gfn, 0);
> +    return __hvm_map_guest_frame(gfn, 0, page);
>  }
>  
> -void hvm_unmap_guest_frame(void *p)
> +void hvm_unmap_guest_frame(void *p, struct page_info *page)
>  {
>      if ( p )
> +    {
>          unmap_domain_page(p);
> +        if ( page )
> +            put_page(page);
> +    }
>  }
>  
> -static void *hvm_map_entry(unsigned long va, unsigned long *gfn)
> +static void *hvm_map_entry(unsigned long va, struct page_info **page)
>  {
> +    unsigned long gfn;
>      uint32_t pfec;
>      char *v;
>  
> @@ -1865,11 +1885,11 @@ static void *hvm_map_entry(unsigned long
>       * treat it as a kernel-mode read (i.e. no access checks).
>       */
>      pfec = PFEC_page_present;
> -    *gfn = paging_gva_to_gfn(current, va, &pfec);
> +    gfn = paging_gva_to_gfn(current, va, &pfec);
>      if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) )
>          goto fail;
>  
> -    v = hvm_map_guest_frame_rw(*gfn);
> +    v = hvm_map_guest_frame_rw(gfn, page);
>      if ( v == NULL )
>          goto fail;
>  
> @@ -1880,11 +1900,9 @@ static void *hvm_map_entry(unsigned long
>      return NULL;
>  }
>  
> -static void hvm_unmap_entry(void *p, unsigned long gfn)
> +static void hvm_unmap_entry(void *p, struct page_info *page)
>  {
> -    hvm_unmap_guest_frame(p);
> -    if ( p && (gfn != INVALID_GFN) )
> -        put_gfn(current->domain, gfn);
> +    hvm_unmap_guest_frame(p, page);
>  }
>  
>  static int hvm_load_segment_selector(
> @@ -1896,7 +1914,7 @@ static int hvm_load_segment_selector(
>      int fault_type = TRAP_invalid_tss;
>      struct cpu_user_regs *regs = guest_cpu_user_regs();
>      struct vcpu *v = current;
> -    unsigned long pdesc_gfn = INVALID_GFN;
> +    struct page_info *pdesc_pg = NULL;
>  
>      if ( regs->eflags & X86_EFLAGS_VM )
>      {
> @@ -1930,7 +1948,7 @@ static int hvm_load_segment_selector(
>      if ( ((sel & 0xfff8) + 7) > desctab.limit )
>          goto fail;
>  
> -    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn);
> +    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_pg);
>      if ( pdesc == NULL )
>          goto hvm_map_fail;
>  
> @@ -1990,7 +2008,7 @@ static int hvm_load_segment_selector(
>      desc.b |= 0x100;
>  
>   skip_accessed_flag:
> -    hvm_unmap_entry(pdesc, pdesc_gfn);
> +    hvm_unmap_entry(pdesc, pdesc_pg);
>  
>      segr.base = (((desc.b <<  0) & 0xff000000u) |
>                   ((desc.b << 16) & 0x00ff0000u) |
> @@ -2006,7 +2024,7 @@ static int hvm_load_segment_selector(
>      return 0;
>  
>   unmap_and_fail:
> -    hvm_unmap_entry(pdesc, pdesc_gfn);
> +    hvm_unmap_entry(pdesc, pdesc_pg);
>   fail:
>      hvm_inject_exception(fault_type, sel & 0xfffc, 0);
>   hvm_map_fail:
> @@ -2021,7 +2039,8 @@ void hvm_task_switch(
>      struct cpu_user_regs *regs = guest_cpu_user_regs();
>      struct segment_register gdt, tr, prev_tr, segr;
>      struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
> -    unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN;
> +    unsigned long eflags;
> +    struct page_info *optss_pg = NULL, *nptss_pg = NULL;
>      int exn_raised, rc;
>      struct {
>          u16 back_link,__blh;
> @@ -2047,11 +2066,13 @@ void hvm_task_switch(
>          goto out;
>      }
>  
> -    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), &optss_gfn);
> +    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), 
> +                                &optss_pg);
>      if ( optss_desc == NULL )
>          goto out;
>  
> -    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn);
> +    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), 
> +                                &nptss_pg);
>      if ( nptss_desc == NULL )
>          goto out;
>  
> @@ -2216,8 +2237,8 @@ void hvm_task_switch(
>      }
>  
>   out:
> -    hvm_unmap_entry(optss_desc, optss_gfn);
> -    hvm_unmap_entry(nptss_desc, nptss_gfn);
> +    hvm_unmap_entry(optss_desc, optss_pg);
> +    hvm_unmap_entry(nptss_desc, nptss_pg);
>  }
>  
>  #define HVMCOPY_from_guest (0u<<0)
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/nestedhvm.c
> --- a/xen/arch/x86/hvm/nestedhvm.c
> +++ b/xen/arch/x86/hvm/nestedhvm.c
> @@ -56,9 +56,11 @@ nestedhvm_vcpu_reset(struct vcpu *v)
>      nv->nv_ioportED = 0;
>  
>      if (nv->nv_vvmcx)
> -        hvm_unmap_guest_frame(nv->nv_vvmcx);
> +        hvm_unmap_guest_frame(nv->nv_vvmcx, 
> +                              nv->nv_vvmcx_pg);
>      nv->nv_vvmcx = NULL;
>      nv->nv_vvmcxaddr = VMCX_EADDR;
> +    nv->nv_vvmcx_pg  = NULL;
>      nv->nv_flushp2m = 0;
>      nv->nv_p2m = NULL;
>  
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/svm/nestedsvm.c
> --- a/xen/arch/x86/hvm/svm/nestedsvm.c
> +++ b/xen/arch/x86/hvm/svm/nestedsvm.c
> @@ -71,20 +71,18 @@ int nestedsvm_vmcb_map(struct vcpu *v, u
>      if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) {
>          ASSERT(nv->nv_vvmcx != NULL);
>          ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR);
> -        hvm_unmap_guest_frame(nv->nv_vvmcx);
> +        hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcx_pg);
>          nv->nv_vvmcx = NULL;
>          nv->nv_vvmcxaddr = VMCX_EADDR;
> +        nv->nv_vvmcx_pg = NULL;
>      }
>  
>      if (nv->nv_vvmcx == NULL) {
> -        nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT);
> +        nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT,
> +                                                &nv->nv_vvmcx_pg);
>          if (nv->nv_vvmcx == NULL)
>              return 0;
>          nv->nv_vvmcxaddr = vmcbaddr;
> -        /* put_gfn here even though the map survives beyond this caller.
> -         * The map can likely survive beyond a hypervisor exit, thus we
> -         * need to put the gfn */
> -        put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT);
>      }
>  
>      return 1;
> @@ -336,6 +334,7 @@ static int nsvm_vmrun_permissionmap(stru
>      unsigned int i;
>      enum hvm_copy_result ret;
>      unsigned long *ns_viomap;
> +    struct page_info *ns_viomap_pg = NULL;
>      bool_t ioport_80, ioport_ed;
>  
>      ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
> @@ -353,12 +352,12 @@ static int nsvm_vmrun_permissionmap(stru
>      svm->ns_oiomap_pa = svm->ns_iomap_pa;
>      svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa;
>  
> -    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT);
> +    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT,
> +                                        &ns_viomap_pg);
>      ASSERT(ns_viomap != NULL);
>      ioport_80 = test_bit(0x80, ns_viomap);
>      ioport_ed = test_bit(0xed, ns_viomap);
> -    hvm_unmap_guest_frame(ns_viomap);
> -    put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT);
> +    hvm_unmap_guest_frame(ns_viomap, ns_viomap_pg);
>  
>      svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed);
>  
> @@ -863,6 +862,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
>      uint16_t port;
>      bool_t enabled;
>      unsigned long gfn = 0; /* gcc ... */
> +    struct page_info *io_bitmap_pg = NULL;
>  
>      ioinfo.bytes = exitinfo1;
>      port = ioinfo.fields.port;
> @@ -880,7 +880,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
>          break;
>      }
>  
> -    io_bitmap = hvm_map_guest_frame_ro(gfn);
> +    io_bitmap = hvm_map_guest_frame_ro(gfn, &io_bitmap_pg);
>      if (io_bitmap == NULL) {
>          gdprintk(XENLOG_ERR,
>              "IOIO intercept: mapping of permission map failed\n");
> @@ -888,8 +888,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
>      }
>  
>      enabled = test_bit(port, io_bitmap);
> -    hvm_unmap_guest_frame(io_bitmap);
> -    put_gfn(current->domain, gfn);
> +    hvm_unmap_guest_frame(io_bitmap, io_bitmap_pg);
>  
>      if (!enabled)
>          return NESTEDHVM_VMEXIT_HOST;
> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/vmx/vvmx.c
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -44,10 +44,13 @@ int nvmx_vcpu_initialise(struct vcpu *v)
>      nvmx->vmxon_region_pa = 0;
>      nvcpu->nv_vvmcx = NULL;
>      nvcpu->nv_vvmcxaddr = VMCX_EADDR;
> +    nvcpu->nv_vvmcx_pg = NULL;
>      nvmx->intr.intr_info = 0;
>      nvmx->intr.error_code = 0;
>      nvmx->iobitmap[0] = NULL;
>      nvmx->iobitmap[1] = NULL;
> +    nvmx->iobitmap_pg[0] = NULL;
> +    nvmx->iobitmap_pg[1] = NULL;
>      return 0;
>  out:
>      return -ENOMEM;
> @@ -558,12 +561,14 @@ static void __map_io_bitmap(struct vcpu 
>  
>      index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
>      if (nvmx->iobitmap[index])
> -        hvm_unmap_guest_frame (nvmx->iobitmap[index]); 
> +    {
> +        hvm_unmap_guest_frame (nvmx->iobitmap[index],
> +                               nvmx->iobitmap_pg[index]); 
> +        nvmx->iobitmap_pg[index] = NULL;
> +    }
>      gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
> -    nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT);
> -    /* See comment in nestedsvm_vmcb_map re putting this gfn and 
> -     * liveness of the map it backs */
> -    put_gfn(current->domain, gpa >> PAGE_SHIFT);
> +    nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT,
> +                                             &nvmx->iobitmap_pg[index]);
>  }
>  
>  static inline void map_io_bitmap_all(struct vcpu *v)
> @@ -580,13 +585,16 @@ static void nvmx_purge_vvmcs(struct vcpu
>  
>      __clear_current_vvmcs(v);
>      if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR )
> -        hvm_unmap_guest_frame(nvcpu->nv_vvmcx);
> -    nvcpu->nv_vvmcx == NULL;
> +        hvm_unmap_guest_frame(nvcpu->nv_vvmcx, nvcpu->nv_vvmcx_pg);
> +    nvcpu->nv_vvmcx = NULL;
>      nvcpu->nv_vvmcxaddr = VMCX_EADDR;
> +    nvcpu->nv_vvmcx_pg = NULL;
>      for (i=0; i<2; i++) {
>          if ( nvmx->iobitmap[i] ) {
> -            hvm_unmap_guest_frame(nvmx->iobitmap[i]); 
> +            hvm_unmap_guest_frame(nvmx->iobitmap[i], 
> +                                  nvmx->iobitmap_pg[i]); 
>              nvmx->iobitmap[i] = NULL;
> +            nvmx->iobitmap_pg[i] = NULL;
>          }
>      }
>  }
> @@ -1138,12 +1146,10 @@ int nvmx_handle_vmptrld(struct cpu_user_
>  
>      if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR )
>      {
> -        nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT);
> +        nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT,
> +                                                 &nvcpu->nv_vvmcx_pg);
>          nvcpu->nv_vvmcxaddr = gpa;
>          map_io_bitmap_all (v);
> -        /* See comment in nestedsvm_vmcb_map regarding putting this 
> -         * gfn and liveness of the map that uses it */
> -        put_gfn(current->domain, gpa >> PAGE_SHIFT);
>      }
>  
>      vmreturn(regs, VMSUCCEED);
> @@ -1201,11 +1207,11 @@ int nvmx_handle_vmclear(struct cpu_user_
>      else 
>      {
>          /* Even if this VMCS isn't the current one, we must clear it. */
> -        vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT);
> +        struct page_info *vvmcs_pg = NULL;
> +        vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, &vvmcs_pg);
>          if ( vvmcs ) 
>              __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0);
> -        hvm_unmap_guest_frame(vvmcs);
> -        put_gfn(current->domain, gpa >> PAGE_SHIFT);
> +        hvm_unmap_guest_frame(vvmcs, vvmcs_pg);
>      }
>  
>      vmreturn(regs, VMSUCCEED);
> diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/hvm.h
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -26,6 +26,7 @@
>  #include <asm/hvm/asid.h>
>  #include <public/domctl.h>
>  #include <public/hvm/save.h>
> +#include <asm/mm.h>
>  
>  /* Interrupt acknowledgement sources. */
>  enum hvm_intsrc {
> @@ -392,9 +393,11 @@ int hvm_virtual_to_linear_addr(
>      unsigned int addr_size,
>      unsigned long *linear_addr);
>  
> -void *hvm_map_guest_frame_rw(unsigned long gfn);
> -void *hvm_map_guest_frame_ro(unsigned long gfn);
> -void hvm_unmap_guest_frame(void *p);
> +void *hvm_map_guest_frame_rw(unsigned long gfn,
> +                             struct page_info **page);
> +void *hvm_map_guest_frame_ro(unsigned long gfn,
> +                             struct page_info **page);
> +void hvm_unmap_guest_frame(void *p, struct page_info *page);
>  
>  static inline void hvm_set_info_guest(struct vcpu *v)
>  {
> diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vcpu.h
> --- a/xen/include/asm-x86/hvm/vcpu.h
> +++ b/xen/include/asm-x86/hvm/vcpu.h
> @@ -77,6 +77,7 @@ struct nestedvcpu {
>      void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */
>  
>      uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */
> +    struct page_info *nv_vvmcx_pg; /* page where nv_vvmcx resides */
>      uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
>      uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
>  
> diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vmx/vvmx.h
> --- a/xen/include/asm-x86/hvm/vmx/vvmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
> @@ -26,6 +26,7 @@
>  struct nestedvmx {
>      paddr_t    vmxon_region_pa;
>      void       *iobitmap[2];		/* map (va) of L1 guest I/O bitmap */
> +    struct page_info *iobitmap_pg[2]; /* pages backing L1 guest I/O bitmap */
>      /* deferred nested interrupt */
>      struct {
>          unsigned long intr_info;
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out
  2011-11-24 16:48   ` Tim Deegan
@ 2011-11-24 17:04     ` Keir Fraser
  0 siblings, 0 replies; 8+ messages in thread
From: Keir Fraser @ 2011-11-24 17:04 UTC (permalink / raw)
  To: Tim Deegan, Andres Lagar-Cavilla
  Cc: andres, xen-devel, keir.xen, JBeulich, adin

On 24/11/2011 16:48, "Tim Deegan" <tim@xen.org> wrote:

> This is a bit messy still.  On 64-bit Xen we could just use
> virt_to_page() on the map pointers rather than storing their
> page_info pointers separately.
> 
> On 32-bit, maybe we could use the existing linear mappings to look up
> the MFN instead.  Or would it be easier to add a function to eth
> map_domain_page() family that does va->mfn for mapped frames?
> Keir, any opinion?

Whatever's easiest. 32-bit isn't a priority.

 -- Keir

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2 of 3] Ensure liveness of pages involved in a guest page table walk
  2011-11-24 13:41 ` [PATCH 2 of 3] Ensure liveness of pages involved in a guest page table walk Andres Lagar-Cavilla
@ 2011-11-24 17:07   ` Tim Deegan
  0 siblings, 0 replies; 8+ messages in thread
From: Tim Deegan @ 2011-11-24 17:07 UTC (permalink / raw)
  To: Andres Lagar-Cavilla; +Cc: andres, xen-devel, keir.xen, JBeulich, adin

At 08:41 -0500 on 24 Nov (1322124092), Andres Lagar-Cavilla wrote:
> Instead of risking deadlock by holding onto the gfn's acquired during
> a guest page table walk, acquire an extra reference within the get_gfn/
> put_gfn critical section, and drop the extra reference when done with
> the map. This ensures liveness of the map, i.e. the underlying page
> won't be paged out.
> 
> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

Applied, thanks.

Tim.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3 of 3] Fix liveness of pages in grant copy operations
  2011-11-24 13:41 ` [PATCH 3 of 3] Fix liveness of pages in grant copy operations Andres Lagar-Cavilla
@ 2011-11-24 17:07   ` Tim Deegan
  0 siblings, 0 replies; 8+ messages in thread
From: Tim Deegan @ 2011-11-24 17:07 UTC (permalink / raw)
  To: Andres Lagar-Cavilla; +Cc: andres, xen-devel, keir.xen, JBeulich, adin

At 08:41 -0500 on 24 Nov (1322124093), Andres Lagar-Cavilla wrote:
> We were immediately putting the p2m entry translation for grant
> copy operations. This allowed for an unnecessary race by which the
> page could have been swapped out between the p2m lookup and the actual
> use. Hold on to the p2m entries until the grant operation finishes.
> 
> Also fixes a small bug: for the source page of the copy, get_page
> was assuming the page was owned by the source domain. It may be a
> shared page, since we don't perform an unsharing p2m lookup.
> 
> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

Applied, thanks.

Tim.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-11-24 17:07 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-11-24 13:41 [PATCH 0 of 3] MM fixes, part 2 Andres Lagar-Cavilla
2011-11-24 13:41 ` [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out Andres Lagar-Cavilla
2011-11-24 16:48   ` Tim Deegan
2011-11-24 17:04     ` Keir Fraser
2011-11-24 13:41 ` [PATCH 2 of 3] Ensure liveness of pages involved in a guest page table walk Andres Lagar-Cavilla
2011-11-24 17:07   ` Tim Deegan
2011-11-24 13:41 ` [PATCH 3 of 3] Fix liveness of pages in grant copy operations Andres Lagar-Cavilla
2011-11-24 17:07   ` Tim Deegan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.