From: Andres Lagar-Cavilla <andres@lagarcavilla.org>
To: xen-devel@lists.xensource.com
Cc: andres@gridcentric.ca, keir.xen@gmail.com, tim@xen.org,
JBeulich@suse.com, adin@gridcentric.ca
Subject: [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out
Date: Thu, 24 Nov 2011 08:41:31 -0500 [thread overview]
Message-ID: <f079cbeae77e5570c193.1322142091@xdev.gridcentric.ca> (raw)
In-Reply-To: <patchbomb.1322142090@xdev.gridcentric.ca>
xen/arch/x86/hvm/hvm.c | 69 ++++++++++++++++++++++++-------------
xen/arch/x86/hvm/nestedhvm.c | 4 +-
xen/arch/x86/hvm/svm/nestedsvm.c | 23 ++++++------
xen/arch/x86/hvm/vmx/vvmx.c | 36 +++++++++++--------
xen/include/asm-x86/hvm/hvm.h | 9 +++-
xen/include/asm-x86/hvm/vcpu.h | 1 +
xen/include/asm-x86/hvm/vmx/vvmx.h | 1 +
7 files changed, 88 insertions(+), 55 deletions(-)
The nested hvm code maps pages of the guest hvm. These maps live beyond
a hypervisor entry/exit pair, and thus their liveness cannot be ensured
with get_gfn/put_gfn critical sections. Ensure their liveness by
increasing the page ref count, instead.
Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1801,11 +1801,15 @@ int hvm_virtual_to_linear_addr(
return 0;
}
-/* We leave this function holding a lock on the p2m entry */
-static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable)
+/* On non-NULL return, we leave this function holding an additional
+ * ref on the underlying mfn, if any */
+static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable,
+ struct page_info **page)
{
+ void *map;
unsigned long mfn;
p2m_type_t p2mt;
+ struct page_info *pg;
struct domain *d = current->domain;
mfn = mfn_x(writable
@@ -1828,27 +1832,43 @@ static void *__hvm_map_guest_frame(unsig
if ( writable )
paging_mark_dirty(d, mfn);
- return map_domain_page(mfn);
+ pg = mfn_to_page(mfn);
+ if (page)
+ {
+ page_get_owner_and_reference(pg);
+ *page = pg;
+ }
+
+ map = map_domain_page(mfn);
+ put_gfn(d, gfn);
+ return map;
}
-void *hvm_map_guest_frame_rw(unsigned long gfn)
+void *hvm_map_guest_frame_rw(unsigned long gfn,
+ struct page_info **page)
{
- return __hvm_map_guest_frame(gfn, 1);
+ return __hvm_map_guest_frame(gfn, 1, page);
}
-void *hvm_map_guest_frame_ro(unsigned long gfn)
+void *hvm_map_guest_frame_ro(unsigned long gfn,
+ struct page_info **page)
{
- return __hvm_map_guest_frame(gfn, 0);
+ return __hvm_map_guest_frame(gfn, 0, page);
}
-void hvm_unmap_guest_frame(void *p)
+void hvm_unmap_guest_frame(void *p, struct page_info *page)
{
if ( p )
+ {
unmap_domain_page(p);
+ if ( page )
+ put_page(page);
+ }
}
-static void *hvm_map_entry(unsigned long va, unsigned long *gfn)
+static void *hvm_map_entry(unsigned long va, struct page_info **page)
{
+ unsigned long gfn;
uint32_t pfec;
char *v;
@@ -1865,11 +1885,11 @@ static void *hvm_map_entry(unsigned long
* treat it as a kernel-mode read (i.e. no access checks).
*/
pfec = PFEC_page_present;
- *gfn = paging_gva_to_gfn(current, va, &pfec);
+ gfn = paging_gva_to_gfn(current, va, &pfec);
if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) )
goto fail;
- v = hvm_map_guest_frame_rw(*gfn);
+ v = hvm_map_guest_frame_rw(gfn, page);
if ( v == NULL )
goto fail;
@@ -1880,11 +1900,9 @@ static void *hvm_map_entry(unsigned long
return NULL;
}
-static void hvm_unmap_entry(void *p, unsigned long gfn)
+static void hvm_unmap_entry(void *p, struct page_info *page)
{
- hvm_unmap_guest_frame(p);
- if ( p && (gfn != INVALID_GFN) )
- put_gfn(current->domain, gfn);
+ hvm_unmap_guest_frame(p, page);
}
static int hvm_load_segment_selector(
@@ -1896,7 +1914,7 @@ static int hvm_load_segment_selector(
int fault_type = TRAP_invalid_tss;
struct cpu_user_regs *regs = guest_cpu_user_regs();
struct vcpu *v = current;
- unsigned long pdesc_gfn = INVALID_GFN;
+ struct page_info *pdesc_pg = NULL;
if ( regs->eflags & X86_EFLAGS_VM )
{
@@ -1930,7 +1948,7 @@ static int hvm_load_segment_selector(
if ( ((sel & 0xfff8) + 7) > desctab.limit )
goto fail;
- pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn);
+ pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_pg);
if ( pdesc == NULL )
goto hvm_map_fail;
@@ -1990,7 +2008,7 @@ static int hvm_load_segment_selector(
desc.b |= 0x100;
skip_accessed_flag:
- hvm_unmap_entry(pdesc, pdesc_gfn);
+ hvm_unmap_entry(pdesc, pdesc_pg);
segr.base = (((desc.b << 0) & 0xff000000u) |
((desc.b << 16) & 0x00ff0000u) |
@@ -2006,7 +2024,7 @@ static int hvm_load_segment_selector(
return 0;
unmap_and_fail:
- hvm_unmap_entry(pdesc, pdesc_gfn);
+ hvm_unmap_entry(pdesc, pdesc_pg);
fail:
hvm_inject_exception(fault_type, sel & 0xfffc, 0);
hvm_map_fail:
@@ -2021,7 +2039,8 @@ void hvm_task_switch(
struct cpu_user_regs *regs = guest_cpu_user_regs();
struct segment_register gdt, tr, prev_tr, segr;
struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
- unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN;
+ unsigned long eflags;
+ struct page_info *optss_pg = NULL, *nptss_pg = NULL;
int exn_raised, rc;
struct {
u16 back_link,__blh;
@@ -2047,11 +2066,13 @@ void hvm_task_switch(
goto out;
}
- optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), &optss_gfn);
+ optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8),
+ &optss_pg);
if ( optss_desc == NULL )
goto out;
- nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn);
+ nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8),
+ &nptss_pg);
if ( nptss_desc == NULL )
goto out;
@@ -2216,8 +2237,8 @@ void hvm_task_switch(
}
out:
- hvm_unmap_entry(optss_desc, optss_gfn);
- hvm_unmap_entry(nptss_desc, nptss_gfn);
+ hvm_unmap_entry(optss_desc, optss_pg);
+ hvm_unmap_entry(nptss_desc, nptss_pg);
}
#define HVMCOPY_from_guest (0u<<0)
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/nestedhvm.c
--- a/xen/arch/x86/hvm/nestedhvm.c
+++ b/xen/arch/x86/hvm/nestedhvm.c
@@ -56,9 +56,11 @@ nestedhvm_vcpu_reset(struct vcpu *v)
nv->nv_ioportED = 0;
if (nv->nv_vvmcx)
- hvm_unmap_guest_frame(nv->nv_vvmcx);
+ hvm_unmap_guest_frame(nv->nv_vvmcx,
+ nv->nv_vvmcx_pg);
nv->nv_vvmcx = NULL;
nv->nv_vvmcxaddr = VMCX_EADDR;
+ nv->nv_vvmcx_pg = NULL;
nv->nv_flushp2m = 0;
nv->nv_p2m = NULL;
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/svm/nestedsvm.c
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -71,20 +71,18 @@ int nestedsvm_vmcb_map(struct vcpu *v, u
if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) {
ASSERT(nv->nv_vvmcx != NULL);
ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR);
- hvm_unmap_guest_frame(nv->nv_vvmcx);
+ hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcx_pg);
nv->nv_vvmcx = NULL;
nv->nv_vvmcxaddr = VMCX_EADDR;
+ nv->nv_vvmcx_pg = NULL;
}
if (nv->nv_vvmcx == NULL) {
- nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT);
+ nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT,
+ &nv->nv_vvmcx_pg);
if (nv->nv_vvmcx == NULL)
return 0;
nv->nv_vvmcxaddr = vmcbaddr;
- /* put_gfn here even though the map survives beyond this caller.
- * The map can likely survive beyond a hypervisor exit, thus we
- * need to put the gfn */
- put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT);
}
return 1;
@@ -336,6 +334,7 @@ static int nsvm_vmrun_permissionmap(stru
unsigned int i;
enum hvm_copy_result ret;
unsigned long *ns_viomap;
+ struct page_info *ns_viomap_pg = NULL;
bool_t ioport_80, ioport_ed;
ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
@@ -353,12 +352,12 @@ static int nsvm_vmrun_permissionmap(stru
svm->ns_oiomap_pa = svm->ns_iomap_pa;
svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa;
- ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT);
+ ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT,
+ &ns_viomap_pg);
ASSERT(ns_viomap != NULL);
ioport_80 = test_bit(0x80, ns_viomap);
ioport_ed = test_bit(0xed, ns_viomap);
- hvm_unmap_guest_frame(ns_viomap);
- put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT);
+ hvm_unmap_guest_frame(ns_viomap, ns_viomap_pg);
svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed);
@@ -863,6 +862,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t
uint16_t port;
bool_t enabled;
unsigned long gfn = 0; /* gcc ... */
+ struct page_info *io_bitmap_pg = NULL;
ioinfo.bytes = exitinfo1;
port = ioinfo.fields.port;
@@ -880,7 +880,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t
break;
}
- io_bitmap = hvm_map_guest_frame_ro(gfn);
+ io_bitmap = hvm_map_guest_frame_ro(gfn, &io_bitmap_pg);
if (io_bitmap == NULL) {
gdprintk(XENLOG_ERR,
"IOIO intercept: mapping of permission map failed\n");
@@ -888,8 +888,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t
}
enabled = test_bit(port, io_bitmap);
- hvm_unmap_guest_frame(io_bitmap);
- put_gfn(current->domain, gfn);
+ hvm_unmap_guest_frame(io_bitmap, io_bitmap_pg);
if (!enabled)
return NESTEDHVM_VMEXIT_HOST;
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/vmx/vvmx.c
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -44,10 +44,13 @@ int nvmx_vcpu_initialise(struct vcpu *v)
nvmx->vmxon_region_pa = 0;
nvcpu->nv_vvmcx = NULL;
nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+ nvcpu->nv_vvmcx_pg = NULL;
nvmx->intr.intr_info = 0;
nvmx->intr.error_code = 0;
nvmx->iobitmap[0] = NULL;
nvmx->iobitmap[1] = NULL;
+ nvmx->iobitmap_pg[0] = NULL;
+ nvmx->iobitmap_pg[1] = NULL;
return 0;
out:
return -ENOMEM;
@@ -558,12 +561,14 @@ static void __map_io_bitmap(struct vcpu
index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
if (nvmx->iobitmap[index])
- hvm_unmap_guest_frame (nvmx->iobitmap[index]);
+ {
+ hvm_unmap_guest_frame (nvmx->iobitmap[index],
+ nvmx->iobitmap_pg[index]);
+ nvmx->iobitmap_pg[index] = NULL;
+ }
gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
- nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT);
- /* See comment in nestedsvm_vmcb_map re putting this gfn and
- * liveness of the map it backs */
- put_gfn(current->domain, gpa >> PAGE_SHIFT);
+ nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT,
+ &nvmx->iobitmap_pg[index]);
}
static inline void map_io_bitmap_all(struct vcpu *v)
@@ -580,13 +585,16 @@ static void nvmx_purge_vvmcs(struct vcpu
__clear_current_vvmcs(v);
if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR )
- hvm_unmap_guest_frame(nvcpu->nv_vvmcx);
- nvcpu->nv_vvmcx == NULL;
+ hvm_unmap_guest_frame(nvcpu->nv_vvmcx, nvcpu->nv_vvmcx_pg);
+ nvcpu->nv_vvmcx = NULL;
nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+ nvcpu->nv_vvmcx_pg = NULL;
for (i=0; i<2; i++) {
if ( nvmx->iobitmap[i] ) {
- hvm_unmap_guest_frame(nvmx->iobitmap[i]);
+ hvm_unmap_guest_frame(nvmx->iobitmap[i],
+ nvmx->iobitmap_pg[i]);
nvmx->iobitmap[i] = NULL;
+ nvmx->iobitmap_pg[i] = NULL;
}
}
}
@@ -1138,12 +1146,10 @@ int nvmx_handle_vmptrld(struct cpu_user_
if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR )
{
- nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT);
+ nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT,
+ &nvcpu->nv_vvmcx_pg);
nvcpu->nv_vvmcxaddr = gpa;
map_io_bitmap_all (v);
- /* See comment in nestedsvm_vmcb_map regarding putting this
- * gfn and liveness of the map that uses it */
- put_gfn(current->domain, gpa >> PAGE_SHIFT);
}
vmreturn(regs, VMSUCCEED);
@@ -1201,11 +1207,11 @@ int nvmx_handle_vmclear(struct cpu_user_
else
{
/* Even if this VMCS isn't the current one, we must clear it. */
- vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT);
+ struct page_info *vvmcs_pg = NULL;
+ vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, &vvmcs_pg);
if ( vvmcs )
__set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0);
- hvm_unmap_guest_frame(vvmcs);
- put_gfn(current->domain, gpa >> PAGE_SHIFT);
+ hvm_unmap_guest_frame(vvmcs, vvmcs_pg);
}
vmreturn(regs, VMSUCCEED);
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -26,6 +26,7 @@
#include <asm/hvm/asid.h>
#include <public/domctl.h>
#include <public/hvm/save.h>
+#include <asm/mm.h>
/* Interrupt acknowledgement sources. */
enum hvm_intsrc {
@@ -392,9 +393,11 @@ int hvm_virtual_to_linear_addr(
unsigned int addr_size,
unsigned long *linear_addr);
-void *hvm_map_guest_frame_rw(unsigned long gfn);
-void *hvm_map_guest_frame_ro(unsigned long gfn);
-void hvm_unmap_guest_frame(void *p);
+void *hvm_map_guest_frame_rw(unsigned long gfn,
+ struct page_info **page);
+void *hvm_map_guest_frame_ro(unsigned long gfn,
+ struct page_info **page);
+void hvm_unmap_guest_frame(void *p, struct page_info *page);
static inline void hvm_set_info_guest(struct vcpu *v)
{
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -77,6 +77,7 @@ struct nestedvcpu {
void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */
uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */
+ struct page_info *nv_vvmcx_pg; /* page where nv_vvmcx resides */
uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vmx/vvmx.h
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
@@ -26,6 +26,7 @@
struct nestedvmx {
paddr_t vmxon_region_pa;
void *iobitmap[2]; /* map (va) of L1 guest I/O bitmap */
+ struct page_info *iobitmap_pg[2]; /* pages backing L1 guest I/O bitmap */
/* deferred nested interrupt */
struct {
unsigned long intr_info;
next prev parent reply other threads:[~2011-11-24 13:41 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-24 13:41 [PATCH 0 of 3] MM fixes, part 2 Andres Lagar-Cavilla
2011-11-24 13:41 ` Andres Lagar-Cavilla [this message]
2011-11-24 16:48 ` [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out Tim Deegan
2011-11-24 17:04 ` Keir Fraser
2011-11-24 13:41 ` [PATCH 2 of 3] Ensure liveness of pages involved in a guest page table walk Andres Lagar-Cavilla
2011-11-24 17:07 ` Tim Deegan
2011-11-24 13:41 ` [PATCH 3 of 3] Fix liveness of pages in grant copy operations Andres Lagar-Cavilla
2011-11-24 17:07 ` Tim Deegan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f079cbeae77e5570c193.1322142091@xdev.gridcentric.ca \
--to=andres@lagarcavilla.org \
--cc=JBeulich@suse.com \
--cc=adin@gridcentric.ca \
--cc=andres@gridcentric.ca \
--cc=keir.xen@gmail.com \
--cc=tim@xen.org \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.