From: Avi Kivity <avi@redhat.com>
To: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
Subject: [PATCH 4/4] KVM: MMU: Reinstate pte prefetch on invlpg
Date: Mon, 15 Feb 2010 14:48:29 +0200 [thread overview]
Message-ID: <1266238109-30280-5-git-send-email-avi@redhat.com> (raw)
In-Reply-To: <1266238109-30280-1-git-send-email-avi@redhat.com>
Commit fb341f57 removed the pte prefetch on guest invlpg, citing guest races.
However, the SDM is adamant that prefetch is allowed:
"The processor may create entries in paging-structure caches for
translations required for prefetches and for accesses that are a
result of speculative execution that would never actually occur
in the executed code path."
And, in fact, there was a race in the prefetch code: we picked up the pte
without the mmu lock held, so an older invlpg could install the pte over
a newer invlpg.
Reinstate the prefetch logic, but this time note whether another invlpg has
executed using a counter. If a race occured, do not install the pte.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/mmu.c | 37 +++++++++++++++++++++++--------------
arch/x86/kvm/paging_tmpl.h | 15 +++++++++++++++
3 files changed, 39 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f9a2f66..ded4ed7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -389,6 +389,7 @@ struct kvm_arch {
unsigned int n_free_mmu_pages;
unsigned int n_requested_mmu_pages;
unsigned int n_alloc_mmu_pages;
+ atomic_t invlpg_counter;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 086025e..e821609 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2611,20 +2611,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
int flooded = 0;
int npte;
int r;
+ int invlpg_counter;
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
- switch (bytes) {
- case 4:
- gentry = *(const u32 *)new;
- break;
- case 8:
- gentry = *(const u64 *)new;
- break;
- default:
- gentry = 0;
- break;
- }
+ invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
/*
* Assume that the pte write on a page table of the same type
@@ -2632,16 +2623,34 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
* (might be false while changing modes). Note it is verified later
* by update_pte().
*/
- if (is_pae(vcpu) && bytes == 4) {
+ if ((is_pae(vcpu) && bytes == 4) || !new) {
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
- gpa &= ~(gpa_t)7;
- r = kvm_read_guest(vcpu->kvm, gpa, &gentry, 8);
+ if (is_pae(vcpu)) {
+ gpa &= ~(gpa_t)7;
+ bytes = 8;
+ }
+ r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
if (r)
gentry = 0;
+ new = (const u8 *)&gentry;
+ }
+
+ switch (bytes) {
+ case 4:
+ gentry = *(const u32 *)new;
+ break;
+ case 8:
+ gentry = *(const u64 *)new;
+ break;
+ default:
+ gentry = 0;
+ break;
}
mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
spin_lock(&vcpu->kvm->mmu_lock);
+ if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
+ gentry = 0;
kvm_mmu_access_page(vcpu, gfn);
kvm_mmu_free_some_pages(vcpu);
++vcpu->kvm->stat.mmu_pte_write;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 81eab9a..0628b94 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -457,6 +457,7 @@ out_unlock:
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
struct kvm_shadow_walk_iterator iterator;
+ gpa_t pte_gpa = -1;
int level;
u64 *sptep;
int need_flush = 0;
@@ -470,6 +471,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
if (level == PT_PAGE_TABLE_LEVEL ||
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
+ struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+ pte_gpa = (sp->gfn << PAGE_SHIFT);
+ pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
if (is_shadow_present_pte(*sptep)) {
rmap_remove(vcpu->kvm, sptep);
@@ -487,7 +492,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
if (need_flush)
kvm_flush_remote_tlbs(vcpu->kvm);
+
+ atomic_inc(&vcpu->kvm->arch.invlpg_counter);
+
spin_unlock(&vcpu->kvm->mmu_lock);
+
+ if (pte_gpa == -1)
+ return;
+
+ if (mmu_topup_memory_caches(vcpu))
+ return;
+ kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
--
1.6.5.3
next prev parent reply other threads:[~2010-02-15 12:48 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-15 12:48 [PATCH 0/4] Fix some mmu/emulator atomicity issues Avi Kivity
2010-02-15 12:48 ` [PATCH 1/4] KVM: MMU: Consolidate two guest pte reads in kvm_mmu_pte_write() Avi Kivity
2010-02-15 12:48 ` [PATCH 2/4] KVM: Make locked operations truly atomic Avi Kivity
2010-02-15 12:48 ` [PATCH 3/4] KVM: Don't follow an atmoic operation by a non-atomic one Avi Kivity
2010-02-15 12:48 ` Avi Kivity [this message]
2010-02-16 8:40 ` [PATCH 4/4] KVM: MMU: Reinstate pte prefetch on invlpg Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1266238109-30280-5-git-send-email-avi@redhat.com \
--to=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox