* [patch 0/4] oos shadow optimizations v2
@ 2008-12-02 0:32 Marcelo Tosatti
2008-12-02 0:32 ` [patch 1/4] KVM: MMU: use page array in unsync walk Marcelo Tosatti
` (4 more replies)
0 siblings, 5 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2008-12-02 0:32 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm
Addressing comments from previous version.
--
^ permalink raw reply [flat|nested] 6+ messages in thread
* [patch 1/4] KVM: MMU: use page array in unsync walk
2008-12-02 0:32 [patch 0/4] oos shadow optimizations v2 Marcelo Tosatti
@ 2008-12-02 0:32 ` Marcelo Tosatti
2008-12-02 0:32 ` [patch 2/4] KVM: MMU: collapse remote TLB flushes on root sync Marcelo Tosatti
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2008-12-02 0:32 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marcelo Tosatti
[-- Attachment #1: kvm-array-based-walk --]
[-- Type: text/plain, Size: 7922 bytes --]
Instead of invoking the handler directly collect pages into
an array so the caller can work with it.
Simplifies TLB flush collapsing.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -908,8 +908,9 @@ static void kvm_mmu_update_unsync_bitmap
struct kvm_mmu_page *sp = page_header(__pa(spte));
index = spte - sp->spt;
- __set_bit(index, sp->unsync_child_bitmap);
- sp->unsync_children = 1;
+ if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
+ sp->unsync_children++;
+ WARN_ON(!sp->unsync_children);
}
static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@@ -936,7 +937,6 @@ static void kvm_mmu_update_parents_unsyn
static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
- sp->unsync_children = 1;
kvm_mmu_update_parents_unsync(sp);
return 1;
}
@@ -967,18 +967,41 @@ static void nonpaging_invlpg(struct kvm_
{
}
+#define KVM_PAGE_ARRAY_NR 16
+
+struct kvm_mmu_pages {
+ struct mmu_page_and_offset {
+ struct kvm_mmu_page *sp;
+ unsigned int idx;
+ } page[KVM_PAGE_ARRAY_NR];
+ unsigned int nr;
+};
+
#define for_each_unsync_children(bitmap, idx) \
for (idx = find_first_bit(bitmap, 512); \
idx < 512; \
idx = find_next_bit(bitmap, 512, idx+1))
-static int mmu_unsync_walk(struct kvm_mmu_page *sp,
- struct kvm_unsync_walk *walker)
+int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+ int idx)
{
- int i, ret;
+ int i;
- if (!sp->unsync_children)
- return 0;
+ if (sp->unsync)
+ for (i=0; i < pvec->nr; i++)
+ if (pvec->page[i].sp == sp)
+ return 0;
+
+ pvec->page[pvec->nr].sp = sp;
+ pvec->page[pvec->nr].idx = idx;
+ pvec->nr++;
+ return (pvec->nr == KVM_PAGE_ARRAY_NR);
+}
+
+static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
+ struct kvm_mmu_pages *pvec)
+{
+ int i, ret, nr_unsync_leaf = 0;
for_each_unsync_children(sp->unsync_child_bitmap, i) {
u64 ent = sp->spt[i];
@@ -988,17 +1011,22 @@ static int mmu_unsync_walk(struct kvm_mm
child = page_header(ent & PT64_BASE_ADDR_MASK);
if (child->unsync_children) {
- ret = mmu_unsync_walk(child, walker);
- if (ret)
+ if (mmu_pages_add(pvec, child, i))
+ return -ENOSPC;
+
+ ret = __mmu_unsync_walk(child, pvec);
+ if (!ret)
+ __clear_bit(i, sp->unsync_child_bitmap);
+ else if (ret > 0)
+ nr_unsync_leaf += ret;
+ else
return ret;
- __clear_bit(i, sp->unsync_child_bitmap);
}
if (child->unsync) {
- ret = walker->entry(child, walker);
- __clear_bit(i, sp->unsync_child_bitmap);
- if (ret)
- return ret;
+ nr_unsync_leaf++;
+ if (mmu_pages_add(pvec, child, i))
+ return -ENOSPC;
}
}
}
@@ -1006,7 +1034,17 @@ static int mmu_unsync_walk(struct kvm_mm
if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
sp->unsync_children = 0;
- return 0;
+ return nr_unsync_leaf;
+}
+
+static int mmu_unsync_walk(struct kvm_mmu_page *sp,
+ struct kvm_mmu_pages *pvec)
+{
+ if (!sp->unsync_children)
+ return 0;
+
+ mmu_pages_add(pvec, sp, 0);
+ return __mmu_unsync_walk(sp, pvec);
}
static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@@ -1056,30 +1094,81 @@ static int kvm_sync_page(struct kvm_vcpu
return 0;
}
-struct sync_walker {
- struct kvm_vcpu *vcpu;
- struct kvm_unsync_walk walker;
+struct mmu_page_path {
+ struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
+ unsigned int idx[PT64_ROOT_LEVEL-1];
};
-static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+#define for_each_sp(pvec, sp, parents, i) \
+ for (i = mmu_pages_next(&pvec, &parents, -1), \
+ sp = pvec.page[i].sp; \
+ i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
+ i = mmu_pages_next(&pvec, &parents, i))
+
+int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
+ int i)
{
- struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
- walker);
- struct kvm_vcpu *vcpu = sync_walk->vcpu;
+ int n;
+
+ for (n = i+1; n < pvec->nr; n++) {
+ struct kvm_mmu_page *sp = pvec->page[n].sp;
+
+ if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
+ parents->idx[0] = pvec->page[n].idx;
+ return n;
+ }
- kvm_sync_page(vcpu, sp);
- return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
+ parents->parent[sp->role.level-2] = sp;
+ parents->idx[sp->role.level-1] = pvec->page[n].idx;
+ }
+
+ return n;
}
-static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+void mmu_pages_clear_parents(struct mmu_page_path *parents)
{
- struct sync_walker walker = {
- .walker = { .entry = mmu_sync_fn, },
- .vcpu = vcpu,
- };
+ struct kvm_mmu_page *sp;
+ unsigned int level = 0;
+
+ do {
+ unsigned int idx = parents->idx[level];
+
+ sp = parents->parent[level];
+ if (!sp)
+ return;
- while (mmu_unsync_walk(sp, &walker.walker))
+ --sp->unsync_children;
+ WARN_ON((int)sp->unsync_children < 0);
+ __clear_bit(idx, sp->unsync_child_bitmap);
+ level++;
+ } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
+}
+
+static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
+ struct mmu_page_path *parents,
+ struct kvm_mmu_pages *pvec)
+{
+ parents->parent[parent->role.level-1] = NULL;
+ pvec->nr = 0;
+}
+
+static void mmu_sync_children(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *parent)
+{
+ int i;
+ struct kvm_mmu_page *sp;
+ struct mmu_page_path parents;
+ struct kvm_mmu_pages pages;
+
+ kvm_mmu_pages_init(parent, &parents, &pages);
+ while (mmu_unsync_walk(parent, &pages)) {
+ for_each_sp(pages, sp, parents, i) {
+ kvm_sync_page(vcpu, sp);
+ mmu_pages_clear_parents(&parents);
+ }
cond_resched_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_pages_init(parent, &parents, &pages);
+ }
}
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1245,33 +1334,29 @@ static void kvm_mmu_unlink_parents(struc
}
}
-struct zap_walker {
- struct kvm_unsync_walk walker;
- struct kvm *kvm;
- int zapped;
-};
-
-static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
-{
- struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
- walker);
- kvm_mmu_zap_page(zap_walk->kvm, sp);
- zap_walk->zapped = 1;
- return 0;
-}
-
-static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
+static int mmu_zap_unsync_children(struct kvm *kvm,
+ struct kvm_mmu_page *parent)
{
- struct zap_walker walker = {
- .walker = { .entry = mmu_zap_fn, },
- .kvm = kvm,
- .zapped = 0,
- };
+ int i, zapped = 0;
+ struct mmu_page_path parents;
+ struct kvm_mmu_pages pages;
- if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+ if (parent->role.level == PT_PAGE_TABLE_LEVEL)
return 0;
- mmu_unsync_walk(sp, &walker.walker);
- return walker.zapped;
+
+ kvm_mmu_pages_init(parent, &parents, &pages);
+ while (mmu_unsync_walk(parent, &pages)) {
+ struct kvm_mmu_page *sp;
+
+ for_each_sp(pages, sp, parents, i) {
+ kvm_mmu_zap_page(kvm, sp);
+ mmu_pages_clear_parents(&parents);
+ }
+ zapped += pages.nr;
+ kvm_mmu_pages_init(parent, &parents, &pages);
+ }
+
+ return zapped;
}
static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
Index: kvm/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm_host.h
+++ kvm/arch/x86/include/asm/kvm_host.h
@@ -200,7 +200,7 @@ struct kvm_mmu_page {
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
bool unsync;
- bool unsync_children;
+ unsigned int unsync_children;
union {
u64 *parent_pte; /* !multimapped */
struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
--
^ permalink raw reply [flat|nested] 6+ messages in thread
* [patch 2/4] KVM: MMU: collapse remote TLB flushes on root sync
2008-12-02 0:32 [patch 0/4] oos shadow optimizations v2 Marcelo Tosatti
2008-12-02 0:32 ` [patch 1/4] KVM: MMU: use page array in unsync walk Marcelo Tosatti
@ 2008-12-02 0:32 ` Marcelo Tosatti
2008-12-02 0:32 ` [patch 3/4] KVM: MMU: skip global pgtables on sync due to cr3 switch Marcelo Tosatti
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2008-12-02 0:32 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marcelo Tosatti
[-- Attachment #1: kvm-collapse-tlb-flush --]
[-- Type: text/plain, Size: 2002 bytes --]
Collapse remote TLB flushes on root sync.
kernbench is 2.7% faster on 4-way guest. Improvements have been seen
with other loads such as AIM7.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -621,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, u
return NULL;
}
-static void rmap_write_protect(struct kvm *kvm, u64 gfn)
+static int rmap_write_protect(struct kvm *kvm, u64 gfn)
{
unsigned long *rmapp;
u64 *spte;
@@ -667,8 +667,7 @@ static void rmap_write_protect(struct kv
spte = rmap_next(kvm, rmapp, spte);
}
- if (write_protected)
- kvm_flush_remote_tlbs(kvm);
+ return write_protected;
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -1083,7 +1082,8 @@ static int kvm_sync_page(struct kvm_vcpu
return 1;
}
- rmap_write_protect(vcpu->kvm, sp->gfn);
+ if (rmap_write_protect(vcpu->kvm, sp->gfn))
+ kvm_flush_remote_tlbs(vcpu->kvm);
kvm_unlink_unsync_page(vcpu->kvm, sp);
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1162,6 +1162,14 @@ static void mmu_sync_children(struct kvm
kvm_mmu_pages_init(parent, &parents, &pages);
while (mmu_unsync_walk(parent, &pages)) {
+ int protected = 0;
+
+ for_each_sp(pages, sp, parents, i)
+ protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
+
+ if (protected)
+ kvm_flush_remote_tlbs(vcpu->kvm);
+
for_each_sp(pages, sp, parents, i) {
kvm_sync_page(vcpu, sp);
mmu_pages_clear_parents(&parents);
@@ -1226,7 +1234,8 @@ static struct kvm_mmu_page *kvm_mmu_get_
sp->role = role;
hlist_add_head(&sp->hash_link, bucket);
if (!metaphysical) {
- rmap_write_protect(vcpu->kvm, gfn);
+ if (rmap_write_protect(vcpu->kvm, gfn))
+ kvm_flush_remote_tlbs(vcpu->kvm);
account_shadowed(vcpu->kvm, gfn);
}
if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
--
^ permalink raw reply [flat|nested] 6+ messages in thread
* [patch 3/4] KVM: MMU: skip global pgtables on sync due to cr3 switch
2008-12-02 0:32 [patch 0/4] oos shadow optimizations v2 Marcelo Tosatti
2008-12-02 0:32 ` [patch 1/4] KVM: MMU: use page array in unsync walk Marcelo Tosatti
2008-12-02 0:32 ` [patch 2/4] KVM: MMU: collapse remote TLB flushes on root sync Marcelo Tosatti
@ 2008-12-02 0:32 ` Marcelo Tosatti
2008-12-02 0:32 ` [patch 4/4] KVM: MMU: prepopulate the shadow on invlpg Marcelo Tosatti
2008-12-05 17:57 ` [patch 0/4] oos shadow optimizations v2 Avi Kivity
4 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2008-12-02 0:32 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marcelo Tosatti
[-- Attachment #1: oos-global --]
[-- Type: text/plain, Size: 8325 bytes --]
Skip syncing global pages on cr3 switch (but not on cr4/cr0). This is
important for Linux 32-bit guests with PAE, where the kmap page is
marked as global.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm_host.h
+++ kvm/arch/x86/include/asm/kvm_host.h
@@ -182,6 +182,8 @@ struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
+ struct list_head oos_link;
+
/*
* The following two entries are used to key the shadow page in the
* hash table.
@@ -200,6 +202,7 @@ struct kvm_mmu_page {
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
bool unsync;
+ bool global;
unsigned int unsync_children;
union {
u64 *parent_pte; /* !multimapped */
@@ -356,6 +359,7 @@ struct kvm_arch{
*/
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
+ struct list_head oos_global_pages;
struct dmar_domain *intel_iommu_domain;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
@@ -385,6 +389,7 @@ struct kvm_vm_stat {
u32 mmu_recycled;
u32 mmu_cache_miss;
u32 mmu_unsync;
+ u32 mmu_unsync_global;
u32 remote_tlb_flush;
u32 lpages;
};
@@ -603,6 +608,7 @@ void __kvm_mmu_free_some_pages(struct kv
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -793,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_allo
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
+ INIT_LIST_HEAD(&sp->oos_link);
ASSERT(is_empty_shadow_page(sp->spt));
bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
sp->multimapped = 0;
+ sp->global = 1;
sp->parent_pte = parent_pte;
--vcpu->kvm->arch.n_free_mmu_pages;
return sp;
@@ -1066,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_look
return NULL;
}
+static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ list_del(&sp->oos_link);
+ --kvm->stat.mmu_unsync_global;
+}
+
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
WARN_ON(!sp->unsync);
sp->unsync = 0;
+ if (sp->global)
+ kvm_unlink_unsync_global(kvm, sp);
--kvm->stat.mmu_unsync;
}
@@ -1615,9 +1625,15 @@ static int kvm_unsync_page(struct kvm_vc
if (s->role.word != sp->role.word)
return 1;
}
- kvm_mmu_mark_parents_unsync(vcpu, sp);
++vcpu->kvm->stat.mmu_unsync;
sp->unsync = 1;
+
+ if (sp->global) {
+ list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
+ ++vcpu->kvm->stat.mmu_unsync_global;
+ } else
+ kvm_mmu_mark_parents_unsync(vcpu, sp);
+
mmu_convert_notrap(sp);
return 0;
}
@@ -1643,12 +1659,21 @@ static int mmu_need_write_protect(struct
static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pte_access, int user_fault,
int write_fault, int dirty, int largepage,
- gfn_t gfn, pfn_t pfn, bool speculative,
+ int global, gfn_t gfn, pfn_t pfn, bool speculative,
bool can_unsync)
{
u64 spte;
int ret = 0;
u64 mt_mask = shadow_mt_mask;
+ struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
+
+ if (!global && sp->global) {
+ sp->global = 0;
+ if (sp->unsync) {
+ kvm_unlink_unsync_global(vcpu->kvm, sp);
+ kvm_mmu_mark_parents_unsync(vcpu, sp);
+ }
+ }
/*
* We don't set the accessed bit, since we sometimes want to see
@@ -1717,8 +1742,8 @@ set_pte:
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
- int *ptwrite, int largepage, gfn_t gfn,
- pfn_t pfn, bool speculative)
+ int *ptwrite, int largepage, int global,
+ gfn_t gfn, pfn_t pfn, bool speculative)
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1751,7 +1776,7 @@ static void mmu_set_spte(struct kvm_vcpu
}
}
if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
- dirty, largepage, gfn, pfn, speculative, true)) {
+ dirty, largepage, global, gfn, pfn, speculative, true)) {
if (write_fault)
*ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu);
@@ -1808,7 +1833,7 @@ static int direct_map_entry(struct kvm_s
|| (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
0, walk->write, 1, &walk->pt_write,
- walk->largepage, gfn, walk->pfn, false);
+ walk->largepage, 0, gfn, walk->pfn, false);
++vcpu->stat.pf_fixed;
return 1;
}
@@ -1995,6 +2020,15 @@ static void mmu_sync_roots(struct kvm_vc
}
}
+static void mmu_sync_global(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_mmu_page *sp, *n;
+
+ list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
+ kvm_sync_page(vcpu, sp);
+}
+
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->kvm->mmu_lock);
@@ -2002,6 +2036,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu
spin_unlock(&vcpu->kvm->mmu_lock);
}
+void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->kvm->mmu_lock);
+ mmu_sync_global(vcpu);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+}
+
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
{
return vaddr;
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -274,7 +274,8 @@ static void FNAME(update_pte)(struct kvm
return;
kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
- gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
+ gpte & PT_DIRTY_MASK, NULL, largepage,
+ gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
pfn, true);
}
@@ -301,8 +302,9 @@ static int FNAME(shadow_walk_entry)(stru
mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
sw->user_fault, sw->write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
- sw->ptwrite, sw->largepage, gw->gfn, sw->pfn,
- false);
+ sw->ptwrite, sw->largepage,
+ gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
+ gw->gfn, sw->pfn, false);
sw->sptep = sptep;
return 1;
}
@@ -580,7 +582,7 @@ static int FNAME(sync_page)(struct kvm_v
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
- is_dirty_pte(gpte), 0, gfn,
+ is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
spte_to_pfn(sp->spt[i]), true, false);
}
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -104,6 +104,7 @@ struct kvm_stats_debugfs_item debugfs_en
{ "mmu_recycled", VM_STAT(mmu_recycled) },
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
{ "mmu_unsync", VM_STAT(mmu_unsync) },
+ { "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages) },
{ NULL }
@@ -315,6 +316,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu,
kvm_x86_ops->set_cr0(vcpu, cr0);
vcpu->arch.cr0 = cr0;
+ kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
return;
}
@@ -358,6 +360,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu,
}
kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4;
+ kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -4113,6 +4116,7 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
--
^ permalink raw reply [flat|nested] 6+ messages in thread
* [patch 4/4] KVM: MMU: prepopulate the shadow on invlpg
2008-12-02 0:32 [patch 0/4] oos shadow optimizations v2 Marcelo Tosatti
` (2 preceding siblings ...)
2008-12-02 0:32 ` [patch 3/4] KVM: MMU: skip global pgtables on sync due to cr3 switch Marcelo Tosatti
@ 2008-12-02 0:32 ` Marcelo Tosatti
2008-12-05 17:57 ` [patch 0/4] oos shadow optimizations v2 Avi Kivity
4 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2008-12-02 0:32 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marcelo Tosatti
[-- Attachment #1: invlpg-opt --]
[-- Type: text/plain, Size: 4910 bytes --]
If the guest executes invlpg, peek into the pagetable and attempt to
prepopulate the shadow entry.
Also stop dirty fault updates from interfering with the fork detector.
2% improvement on RHEL3/AIM7.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -2441,7 +2441,8 @@ static void kvm_mmu_access_page(struct k
}
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes)
+ const u8 *new, int bytes,
+ bool guest_initiated)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
@@ -2467,15 +2468,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *
kvm_mmu_free_some_pages(vcpu);
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, "pre pte write");
- if (gfn == vcpu->arch.last_pt_write_gfn
- && !last_updated_pte_accessed(vcpu)) {
- ++vcpu->arch.last_pt_write_count;
- if (vcpu->arch.last_pt_write_count >= 3)
- flooded = 1;
- } else {
- vcpu->arch.last_pt_write_gfn = gfn;
- vcpu->arch.last_pt_write_count = 1;
- vcpu->arch.last_pte_updated = NULL;
+ if (guest_initiated) {
+ if (gfn == vcpu->arch.last_pt_write_gfn
+ && !last_updated_pte_accessed(vcpu)) {
+ ++vcpu->arch.last_pt_write_count;
+ if (vcpu->arch.last_pt_write_count >= 3)
+ flooded = 1;
+ } else {
+ vcpu->arch.last_pt_write_gfn = gfn;
+ vcpu->arch.last_pt_write_count = 1;
+ vcpu->arch.last_pte_updated = NULL;
+ }
}
index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@@ -2615,9 +2618,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
- spin_lock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.invlpg(vcpu, gva);
- spin_unlock(&vcpu->kvm->mmu_lock);
kvm_mmu_flush_tlb(vcpu);
++vcpu->stat.invlpg;
}
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -82,6 +82,7 @@ struct shadow_walker {
int *ptwrite;
pfn_t pfn;
u64 *sptep;
+ gpa_t pte_gpa;
};
static gfn_t gpte_to_gfn(pt_element_t gpte)
@@ -222,7 +223,7 @@ walk:
if (ret)
goto walk;
pte |= PT_DIRTY_MASK;
- kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
+ kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
walker->ptes[walker->level - 1] = pte;
}
@@ -468,8 +469,15 @@ static int FNAME(shadow_invlpg_entry)(st
struct kvm_vcpu *vcpu, u64 addr,
u64 *sptep, int level)
{
+ struct shadow_walker *sw =
+ container_of(_sw, struct shadow_walker, walker);
if (level == PT_PAGE_TABLE_LEVEL) {
+ struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+ sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
+ sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
if (is_shadow_present_pte(*sptep))
rmap_remove(vcpu->kvm, sptep);
set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
@@ -482,11 +490,26 @@ static int FNAME(shadow_invlpg_entry)(st
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
+ pt_element_t gpte;
struct shadow_walker walker = {
.walker = { .entry = FNAME(shadow_invlpg_entry), },
+ .pte_gpa = -1,
};
+ spin_lock(&vcpu->kvm->mmu_lock);
walk_shadow(&walker.walker, vcpu, gva);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ if (walker.pte_gpa == -1)
+ return;
+ if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
+ sizeof(pt_element_t)))
+ return;
+ if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
+ if (mmu_topup_memory_caches(vcpu))
+ return;
+ kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
+ sizeof(pt_element_t), 0);
+ }
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -2046,7 +2046,7 @@ int emulator_write_phys(struct kvm_vcpu
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
if (ret < 0)
return 0;
- kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+ kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
return 1;
}
Index: kvm/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm_host.h
+++ kvm/arch/x86/include/asm/kvm_host.h
@@ -602,7 +602,8 @@ unsigned long segment_base(u16 selector)
void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes);
+ const u8 *new, int bytes,
+ bool guest_initiated);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
--
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [patch 0/4] oos shadow optimizations v2
2008-12-02 0:32 [patch 0/4] oos shadow optimizations v2 Marcelo Tosatti
` (3 preceding siblings ...)
2008-12-02 0:32 ` [patch 4/4] KVM: MMU: prepopulate the shadow on invlpg Marcelo Tosatti
@ 2008-12-05 17:57 ` Avi Kivity
4 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2008-12-05 17:57 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: kvm
Marcelo Tosatti wrote:
> Addressing comments from previous version.
>
>
All applied, thanks.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2008-12-05 17:57 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-12-02 0:32 [patch 0/4] oos shadow optimizations v2 Marcelo Tosatti
2008-12-02 0:32 ` [patch 1/4] KVM: MMU: use page array in unsync walk Marcelo Tosatti
2008-12-02 0:32 ` [patch 2/4] KVM: MMU: collapse remote TLB flushes on root sync Marcelo Tosatti
2008-12-02 0:32 ` [patch 3/4] KVM: MMU: skip global pgtables on sync due to cr3 switch Marcelo Tosatti
2008-12-02 0:32 ` [patch 4/4] KVM: MMU: prepopulate the shadow on invlpg Marcelo Tosatti
2008-12-05 17:57 ` [patch 0/4] oos shadow optimizations v2 Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).