From: Avi Kivity <avi@qumranet.com>
To: kvm-devel@lists.sourceforge.net
Cc: linux-kernel@vger.kernel.org, akpm@osdl.org, mingo@elte.hu
Subject: [PATCH 1/33] KVM: MMU: Implement simple reverse mapping
Date: Thu, 04 Jan 2007 15:50:05 -0000 [thread overview]
Message-ID: <20070104155005.98B85250048@il.qumranet.com> (raw)
In-Reply-To: <459D21DD.5090506@qumranet.com>
Keep in each host page frame's page->private a pointer to the shadow pte which
maps it. If there are multiple shadow ptes mapping the page, set bit 0 of
page->private, and use the rest as a pointer to a linked list of all such
mappings.
Reverse mappings are needed because we when we cache shadow page tables,
we must protect the guest page tables from being modified by the guest, as
that would invalidate the cached ptes.
Signed-off-by: Avi Kivity <avi@qumranet.com>
Index: linux-2.6/drivers/kvm/mmu.c
===================================================================
--- linux-2.6.orig/drivers/kvm/mmu.c
+++ linux-2.6/drivers/kvm/mmu.c
@@ -27,6 +27,7 @@
#include "kvm.h"
#define pgprintk(x...) do { } while (0)
+#define rmap_printk(x...) do { } while (0)
#define ASSERT(x) \
if (!(x)) { \
@@ -125,6 +126,13 @@
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
+#define RMAP_EXT 4
+
+struct kvm_rmap_desc {
+ u64 *shadow_ptes[RMAP_EXT];
+ struct kvm_rmap_desc *more;
+};
+
static int is_write_protection(struct kvm_vcpu *vcpu)
{
return vcpu->cr0 & CR0_WP_MASK;
@@ -150,6 +158,120 @@ static int is_io_pte(unsigned long pte)
return pte & PT_SHADOW_IO_MARK;
}
+static int is_rmap_pte(u64 pte)
+{
+ return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
+ == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
+}
+
+/*
+ * Reverse mapping data structures:
+ *
+ * If page->private bit zero is zero, then page->private points to the
+ * shadow page table entry that points to page_address(page).
+ *
+ * If page->private bit zero is one, (then page->private & ~1) points
+ * to a struct kvm_rmap_desc containing more mappings.
+ */
+static void rmap_add(struct kvm *kvm, u64 *spte)
+{
+ struct page *page;
+ struct kvm_rmap_desc *desc;
+ int i;
+
+ if (!is_rmap_pte(*spte))
+ return;
+ page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ if (!page->private) {
+ rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
+ page->private = (unsigned long)spte;
+ } else if (!(page->private & 1)) {
+ rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
+ desc = kzalloc(sizeof *desc, GFP_NOWAIT);
+ if (!desc)
+ BUG(); /* FIXME: return error */
+ desc->shadow_ptes[0] = (u64 *)page->private;
+ desc->shadow_ptes[1] = spte;
+ page->private = (unsigned long)desc | 1;
+ } else {
+ rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
+ desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
+ desc = desc->more;
+ if (desc->shadow_ptes[RMAP_EXT-1]) {
+ desc->more = kzalloc(sizeof *desc->more, GFP_NOWAIT);
+ if (!desc->more)
+ BUG(); /* FIXME: return error */
+ desc = desc->more;
+ }
+ for (i = 0; desc->shadow_ptes[i]; ++i)
+ ;
+ desc->shadow_ptes[i] = spte;
+ }
+}
+
+static void rmap_desc_remove_entry(struct page *page,
+ struct kvm_rmap_desc *desc,
+ int i,
+ struct kvm_rmap_desc *prev_desc)
+{
+ int j;
+
+ for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
+ ;
+ desc->shadow_ptes[i] = desc->shadow_ptes[j];
+ desc->shadow_ptes[j] = 0;
+ if (j != 0)
+ return;
+ if (!prev_desc && !desc->more)
+ page->private = (unsigned long)desc->shadow_ptes[0];
+ else
+ if (prev_desc)
+ prev_desc->more = desc->more;
+ else
+ page->private = (unsigned long)desc->more | 1;
+ kfree(desc);
+}
+
+static void rmap_remove(struct kvm *kvm, u64 *spte)
+{
+ struct page *page;
+ struct kvm_rmap_desc *desc;
+ struct kvm_rmap_desc *prev_desc;
+ int i;
+
+ if (!is_rmap_pte(*spte))
+ return;
+ page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ if (!page->private) {
+ printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
+ BUG();
+ } else if (!(page->private & 1)) {
+ rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
+ if ((u64 *)page->private != spte) {
+ printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
+ spte, *spte);
+ BUG();
+ }
+ page->private = 0;
+ } else {
+ rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
+ desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ prev_desc = NULL;
+ while (desc) {
+ for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
+ if (desc->shadow_ptes[i] == spte) {
+ rmap_desc_remove_entry(page, desc, i,
+ prev_desc);
+ return;
+ }
+ prev_desc = desc;
+ desc = desc->more;
+ }
+ BUG();
+ }
+}
+
static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
{
struct kvm_mmu_page *page_head = page_header(page_hpa);
@@ -229,27 +351,27 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu,
static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa,
int level)
{
+ u64 *pos;
+ u64 *end;
+
ASSERT(vcpu);
ASSERT(VALID_PAGE(page_hpa));
ASSERT(level <= PT64_ROOT_LEVEL && level > 0);
- if (level == 1)
- memset(__va(page_hpa), 0, PAGE_SIZE);
- else {
- u64 *pos;
- u64 *end;
-
- for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
- pos != end; pos++) {
- u64 current_ent = *pos;
+ for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
+ pos != end; pos++) {
+ u64 current_ent = *pos;
- *pos = 0;
- if (is_present_pte(current_ent))
+ if (is_present_pte(current_ent)) {
+ if (level != 1)
release_pt_page_64(vcpu,
current_ent &
PT64_BASE_ADDR_MASK,
level - 1);
+ else
+ rmap_remove(vcpu->kvm, pos);
}
+ *pos = 0;
}
kvm_mmu_free_page(vcpu, page_hpa);
}
@@ -275,6 +397,7 @@ static int nonpaging_map(struct kvm_vcpu
page_header_update_slot(vcpu->kvm, table, v);
table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
PT_USER_MASK;
+ rmap_add(vcpu->kvm, &table[index]);
return 0;
}
@@ -437,6 +560,7 @@ static inline void set_pte_common(struct
} else {
*shadow_pte |= paddr;
page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
+ rmap_add(vcpu->kvm, shadow_pte);
}
}
@@ -489,6 +613,7 @@ static void paging_inval_page(struct kvm
u64 *table = __va(page_addr);
if (level == PT_PAGE_TABLE_LEVEL ) {
+ rmap_remove(vcpu->kvm, &table[index]);
table[index] = 0;
return;
}
@@ -679,8 +804,9 @@ void kvm_mmu_slot_remove_write_access(st
pt = __va(page->page_hpa);
for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
/* avoid RMW */
- if (pt[i] & PT_WRITABLE_MASK)
+ if (pt[i] & PT_WRITABLE_MASK) {
+ rmap_remove(kvm, &pt[i]);
pt[i] &= ~PT_WRITABLE_MASK;
-
+ }
}
}
Index: linux-2.6/drivers/kvm/kvm.h
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm.h
+++ linux-2.6/drivers/kvm/kvm.h
@@ -236,6 +236,7 @@ struct kvm {
struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
int memory_config_version;
int busy;
+ unsigned long rmap_overflow;
};
struct kvm_stat {
Index: linux-2.6/drivers/kvm/paging_tmpl.h
===================================================================
--- linux-2.6.orig/drivers/kvm/paging_tmpl.h
+++ linux-2.6/drivers/kvm/paging_tmpl.h
@@ -261,6 +261,7 @@ static int FNAME(fix_write_pf)(struct kv
mark_page_dirty(vcpu->kvm, gfn);
*shadow_ent |= PT_WRITABLE_MASK;
*guest_ent |= PT_DIRTY_MASK;
+ rmap_add(vcpu->kvm, shadow_ent);
return 1;
}
Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -638,6 +638,7 @@ raced:
| __GFP_ZERO);
if (!new.phys_mem[i])
goto out_free;
+ new.phys_mem[i]->private = 0;
}
}
next prev parent reply other threads:[~2007-01-04 15:50 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-01-04 15:48 [PATCH 0/33] KVM: MMU: Cache shadow page tables Avi Kivity
2007-01-04 15:50 ` Avi Kivity [this message]
[not found] ` <459D21DD.5090506-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-01-04 15:51 ` [PATCH 2/33] KVM: MMU: Teach the page table walker to track guest page table gfns Avi Kivity
2007-01-04 15:52 ` [PATCH 3/33] KVM: MMU: Load the pae pdptrs on cr3 change like the processor does Avi Kivity
2007-01-04 15:53 ` [PATCH 4/33] KVM: MMU: Fold fetch_guest() into init_walker() Avi Kivity
2007-01-04 15:54 ` [PATCH 5/33] KVM: MU: Special treatment for shadow pae root pages Avi Kivity
2007-01-04 15:55 ` [PATCH 6/33] KVM: MMU: Use the guest pdptrs instead of mapping cr3 in pae mode Avi Kivity
2007-01-04 15:56 ` [PATCH 7/33] KVM: MMU: Make the shadow page tables also special-case pae Avi Kivity
2007-01-04 15:57 ` [PATCH 8/33] KVM: MMU: Make kvm_mmu_alloc_page() return a kvm_mmu_page pointer Avi Kivity
2007-01-04 15:58 ` [PATCH 9/33] KVM: MMU: Shadow page table caching Avi Kivity
2007-01-04 15:59 ` [PATCH 10/33] KVM: MMU: Write protect guest pages when a shadow is created for them Avi Kivity
2007-01-04 16:00 ` [PATCH 11/33] KVM: MMU: Let the walker extract the target page gfn from the pte Avi Kivity
2007-01-04 16:01 ` [PATCH 12/33] KVM: MMU: Support emulated writes into RAM Avi Kivity
2007-01-04 16:02 ` [PATCH 13/33] KVM: MMU: Zap shadow page table entries on writes to guest page tables Avi Kivity
2007-01-04 16:03 ` [PATCH 14/33] KVM: MMU: If emulating an instruction fails, try unprotecting the page Avi Kivity
2007-01-04 16:04 ` [PATCH 15/33] KVM: MMU: Implement child shadow unlinking Avi Kivity
2007-01-04 16:06 ` [PATCH 17/33] KVM: MMU: oom handling Avi Kivity
2007-01-04 16:07 ` [PATCH 18/33] KVM: MMU: Remove invlpg interception Avi Kivity
2007-01-04 16:08 ` [PATCH 19/33] KVM: MMU: Remove release_pt_page_64() Avi Kivity
2007-01-04 16:10 ` [PATCH 21/33] KVM: MMU: <ove is_empty_shadow_page() above kvm_mmu_free_page() Avi Kivity
2007-01-04 16:11 ` [PATCH 22/33] KVM: MMU: Ensure freed shadow pages are clean Avi Kivity
2007-01-04 16:13 ` [PATCH 24/33] KVM: MMU: Page table write flood protection Avi Kivity
2007-01-04 16:14 ` [PATCH 25/33] KVM: MMU: Never free a shadow page actively serving as a root Avi Kivity
2007-01-04 16:15 ` [PATCH 26/33] KVM: MMU: Fix cmpxchg8b emulation Avi Kivity
2007-01-04 16:16 ` [PATCH 27/33] KVM: MMU: Treat user-mode faults as a hint that a page is no longer a page table Avi Kivity
2007-01-04 16:17 ` [PATCH 28/33] KVM: MMU: Free pages on kvm destruction Avi Kivity
2007-01-04 16:18 ` [PATCH 29/33] KVM: MMU: Replace atomic allocations by preallocated objects Avi Kivity
2007-01-04 16:19 ` [PATCH 30/33] KVM: MMU: Detect oom conditions and propagate error to userspace Avi Kivity
2007-01-04 16:20 ` [PATCH 31/33] KVM: MMU: Flush guest tlb when reducing permissions on a pte Avi Kivity
2007-01-04 16:21 ` [PATCH 32/33] KVM: MMU: Destroy mmu while we still have a vcpu left Avi Kivity
2007-01-04 16:22 ` [PATCH 33/33] KVM: MMU: add audit code to check mappings, etc are correct Avi Kivity
2007-01-04 17:22 ` [PATCH 0/33] KVM: MMU: Cache shadow page tables Andrew Morton
[not found] ` <20070104092226.91fa2dfe.akpm-3NddpPZAyC0@public.gmane.org>
2007-01-04 17:41 ` Avi Kivity
[not found] ` <459D3C65.2090703-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-01-04 18:02 ` Ingo Molnar
2007-01-04 16:05 ` [PATCH 16/33] KVM: MMU: kvm_mmu_put_page() only removes one link to the page Avi Kivity
2007-01-04 16:09 ` [PATCH 20/33] KVM: MMU: Handle misaligned accesses to write protected guest page tables Avi Kivity
2007-01-04 16:12 ` [PATCH 23/33] KVM: MMU: If an empty shadow page is not empty, report more info Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070104155005.98B85250048@il.qumranet.com \
--to=avi@qumranet.com \
--cc=akpm@osdl.org \
--cc=kvm-devel@lists.sourceforge.net \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox