From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
To: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
LKML <linux-kernel@vger.kernel.org>, KVM <kvm@vger.kernel.org>
Subject: [PATCH v4 11/11] KVM: MMU: improve write flooding detected
Date: Thu, 22 Sep 2011 16:58:36 +0800 [thread overview]
Message-ID: <4E7AF8BC.7010803@cn.fujitsu.com> (raw)
In-Reply-To: <4E7AF758.7060900@cn.fujitsu.com>
Detecting write-flooding does not work well, when we handle page written, if
the last speculative spte is not accessed, we treat the page is
write-flooding, however, we can speculative spte on many path, such as pte
prefetch, page synced, that means the last speculative spte may be not point
to the written page and the written page can be accessed via other sptes, so
depends on the Accessed bit of the last speculative spte is not enough
Instead of detected page accessed, we can detect whether the spte is accessed
after it is written, if the spte is not accessed but it is written frequently,
we treat is not a page table or it not used for a long time
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
arch/x86/include/asm/kvm_host.h | 6 +--
arch/x86/kvm/mmu.c | 62 +++++++++++++++-----------------------
arch/x86/kvm/paging_tmpl.h | 12 +++----
3 files changed, 32 insertions(+), 48 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 927ba73..9d17238 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -239,6 +239,8 @@ struct kvm_mmu_page {
int clear_spte_count;
#endif
+ int write_flooding_count;
+
struct rcu_head rcu;
};
@@ -353,10 +355,6 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
- gfn_t last_pt_write_gfn;
- int last_pt_write_count;
- u64 *last_pte_updated;
-
struct fpu guest_fpu;
u64 xcr0;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 13f4d2a..77030ea 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1652,6 +1652,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp)
sp->spt[i] = 0ull;
}
+static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
+{
+ sp->write_flooding_count = 0;
+}
+
+static void clear_sp_write_flooding_count(u64 *spte)
+{
+ struct kvm_mmu_page *sp = page_header(__pa(spte));
+
+ __clear_sp_write_flooding_count(sp);
+}
+
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
gva_t gaddr,
@@ -1695,6 +1707,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
} else if (sp->unsync)
kvm_mmu_mark_parents_unsync(sp);
+ __clear_sp_write_flooding_count(sp);
trace_kvm_mmu_get_page(sp, false);
return sp;
}
@@ -1847,15 +1860,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
mmu_page_remove_parent_pte(sp, parent_pte);
}
-static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
-{
- int i;
- struct kvm_vcpu *vcpu;
-
- kvm_for_each_vcpu(i, vcpu, kvm)
- vcpu->arch.last_pte_updated = NULL;
-}
-
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
{
u64 *parent_pte;
@@ -1915,7 +1919,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
}
sp->role.invalid = 1;
- kvm_mmu_reset_last_pte_updated(kvm);
return ret;
}
@@ -2360,8 +2363,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
}
kvm_release_pfn_clean(pfn);
- if (speculative)
- vcpu->arch.last_pte_updated = sptep;
}
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -3522,13 +3523,6 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
kvm_mmu_flush_tlb(vcpu);
}
-static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
-{
- u64 *spte = vcpu->arch.last_pte_updated;
-
- return !!(spte && (*spte & shadow_accessed_mask));
-}
-
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
const u8 *new, int *bytes)
{
@@ -3569,22 +3563,16 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
* If we're seeing too many writes to a page, it may no longer be a page table,
* or we may be forking, in which case it is better to unmap the page.
*/
-static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn)
+static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte)
{
- bool flooded = false;
-
- if (gfn == vcpu->arch.last_pt_write_gfn
- && !last_updated_pte_accessed(vcpu)) {
- ++vcpu->arch.last_pt_write_count;
- if (vcpu->arch.last_pt_write_count >= 3)
- flooded = true;
- } else {
- vcpu->arch.last_pt_write_gfn = gfn;
- vcpu->arch.last_pt_write_count = 1;
- vcpu->arch.last_pte_updated = NULL;
- }
+ /*
+ * Skip write-flooding detected for the sp whose level is 1, because
+ * it can become unsync, then the guest page is not write-protected.
+ */
+ if (sp->role.level == 1)
+ return false;
- return flooded;
+ return ++sp->write_flooding_count >= 3;
}
/*
@@ -3656,7 +3644,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
LIST_HEAD(invalid_list);
u64 entry, gentry, *spte;
int npte;
- bool remote_flush, local_flush, zap_page, flooded, misaligned;
+ bool remote_flush, local_flush, zap_page;
/*
* If we don't have indirect shadow pages, it means no page is
@@ -3682,12 +3670,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++vcpu->kvm->stat.mmu_pte_write;
trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
- flooded = detect_write_flooding(vcpu, gfn);
mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
- misaligned = detect_write_misaligned(sp, gpa, bytes);
+ spte = get_written_sptes(sp, gpa, &npte);
- if (misaligned || flooded) {
+ if (detect_write_misaligned(sp, gpa, bytes) ||
+ detect_write_flooding(sp, spte)) {
zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
&invalid_list);
++vcpu->kvm->stat.mmu_flooded;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9efb860..52e9d58 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -497,6 +497,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
shadow_walk_next(&it)) {
gfn_t table_gfn;
+ clear_sp_write_flooding_count(it.sptep);
drop_large_spte(vcpu, it.sptep);
sp = NULL;
@@ -522,6 +523,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
shadow_walk_next(&it)) {
gfn_t direct_gfn;
+ clear_sp_write_flooding_count(it.sptep);
validate_direct_spte(vcpu, it.sptep, direct_access);
drop_large_spte(vcpu, it.sptep);
@@ -536,6 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
link_shadow_page(it.sptep, sp);
}
+ clear_sp_write_flooding_count(it.sptep);
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
user_fault, write_fault, emulate, it.level,
gw->gfn, pfn, prefault, map_writable);
@@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
*/
if (!r) {
pgprintk("%s: guest page fault\n", __func__);
- if (!prefault) {
+ if (!prefault)
inject_page_fault(vcpu, &walker.fault);
- /* reset fork detector */
- vcpu->arch.last_pt_write_count = 0;
- }
+
return 0;
}
@@ -641,9 +642,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
sptep, *sptep, emulate);
- if (!emulate)
- vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
-
++vcpu->stat.pf_fixed;
trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
spin_unlock(&vcpu->kvm->mmu_lock);
--
1.7.5.4
next prev parent reply other threads:[~2011-09-22 8:56 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-09-22 8:52 [PATCH v4 00/11] KVM: x86: optimize for writing guest page Xiao Guangrong
2011-09-22 8:53 ` [PATCH v4 01/11] KVM: MMU: avoid pte_list_desc running out in kvm_mmu_pte_write Xiao Guangrong
2011-09-22 8:53 ` [PATCH v4 02/11] KVM: x86: tag the instructions which are used to write page table Xiao Guangrong
2011-09-22 8:55 ` [PATCH v4 04/11] KVM: x86: cleanup port-in/port-out emulated Xiao Guangrong
2011-09-22 8:55 ` [PATCH v4 05/11] KVM: MMU: do not mark accessed bit on pte write path Xiao Guangrong
2011-09-22 8:56 ` [PATCH v4 06/11] KVM: MMU: cleanup FNAME(invlpg) Xiao Guangrong
2011-09-22 8:56 ` [PATCH v4 07/11] KVM: MMU: fast prefetch spte on invlpg path Xiao Guangrong
2011-09-22 8:56 ` [PATCH v4 08/11] KVM: MMU: remove unnecessary kvm_mmu_free_some_pages Xiao Guangrong
2011-09-22 8:57 ` [PATCH v4 09/11] KVM: MMU: split kvm_mmu_pte_write function Xiao Guangrong
2011-09-22 8:57 ` [PATCH v4 10/11] KVM: MMU: fix detecting misaligned accessed Xiao Guangrong
2011-09-22 8:58 ` Xiao Guangrong [this message]
2011-09-22 9:02 ` [PATCH v4 03/11] KVM: x86: retry non-page-table writing instructions Xiao Guangrong
2011-09-23 11:51 ` [PATCH v4 00/11] KVM: x86: optimize for writing guest page Marcelo Tosatti
2011-09-30 3:49 ` Xiao Guangrong
2011-10-05 13:25 ` Avi Kivity
2011-10-06 17:50 ` Marcelo Tosatti
2011-10-06 17:53 ` Marcelo Tosatti
2011-10-08 4:06 ` Xiao Guangrong
2011-10-09 12:24 ` Avi Kivity
2011-10-09 13:37 ` Avi Kivity
2011-10-11 8:36 ` Xiao Guangrong
2011-11-04 9:16 ` Xiao Guangrong
2011-11-06 15:35 ` Avi Kivity
2011-11-10 13:28 ` Xiao Guangrong
2011-11-10 14:05 ` Avi Kivity
2011-11-11 3:42 ` Xiao Guangrong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4E7AF8BC.7010803@cn.fujitsu.com \
--to=xiaoguangrong@cn.fujitsu.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).