* [PATCH v3 1/4] KVM: MMU: Introduce drop_spte()
2010-07-05 12:39 [PATCH v3 0/4] Fix accessed bit tracking Avi Kivity
@ 2010-07-05 12:39 ` Avi Kivity
2010-07-05 12:39 ` [PATCH v3 2/4] KVM: MMU: Move accessed/dirty bit checks from rmap_remove() to drop_spte() Avi Kivity
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2010-07-05 12:39 UTC (permalink / raw)
To: kvm; +Cc: Marcelo Tosatti
When we call rmap_remove(), we (almost) always immediately follow it by
an __set_spte() to a nonpresent pte. Since we need to perform the two
operations atomically, to avoid losing the dirty and accessed bits, introduce
a helper drop_spte() and convert all call sites.
The operation is still nonatomic at this point.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/mmu.c | 30 +++++++++++++++++-------------
arch/x86/kvm/paging_tmpl.h | 13 ++++++-------
2 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c515753..5b211dc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -658,6 +658,12 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
}
}
+static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
+{
+ rmap_remove(kvm, sptep);
+ __set_spte(sptep, new_spte);
+}
+
static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
{
struct kvm_rmap_desc *desc;
@@ -722,9 +728,9 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
if (is_writable_pte(*spte)) {
- rmap_remove(kvm, spte);
+ drop_spte(kvm, spte,
+ shadow_trap_nonpresent_pte);
--kvm->stat.lpages;
- __set_spte(spte, shadow_trap_nonpresent_pte);
spte = NULL;
write_protected = 1;
}
@@ -744,8 +750,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
while ((spte = rmap_next(kvm, rmapp, NULL))) {
BUG_ON(!(*spte & PT_PRESENT_MASK));
rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
- rmap_remove(kvm, spte);
- __set_spte(spte, shadow_trap_nonpresent_pte);
+ drop_spte(kvm, spte, shadow_trap_nonpresent_pte);
need_tlb_flush = 1;
}
return need_tlb_flush;
@@ -767,8 +772,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
need_flush = 1;
if (pte_write(*ptep)) {
- rmap_remove(kvm, spte);
- __set_spte(spte, shadow_trap_nonpresent_pte);
+ drop_spte(kvm, spte, shadow_trap_nonpresent_pte);
spte = rmap_next(kvm, rmapp, NULL);
} else {
new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
@@ -1464,7 +1468,8 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
} else {
if (is_large_pte(ent))
--kvm->stat.lpages;
- rmap_remove(kvm, &pt[i]);
+ drop_spte(kvm, &pt[i],
+ shadow_trap_nonpresent_pte);
}
}
pt[i] = shadow_trap_nonpresent_pte;
@@ -1868,9 +1873,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (level > PT_PAGE_TABLE_LEVEL &&
has_wrprotected_page(vcpu->kvm, gfn, level)) {
ret = 1;
- rmap_remove(vcpu->kvm, sptep);
- spte = shadow_trap_nonpresent_pte;
- goto set_pte;
+ drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
+ goto done;
}
spte |= PT_WRITABLE_MASK;
@@ -1902,6 +1906,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
set_pte:
__set_spte(sptep, spte);
+done:
return ret;
}
@@ -1938,8 +1943,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
} else if (pfn != spte_to_pfn(*sptep)) {
pgprintk("hfn old %lx new %lx\n",
spte_to_pfn(*sptep), pfn);
- rmap_remove(vcpu->kvm, sptep);
- __set_spte(sptep, shadow_trap_nonpresent_pte);
+ drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
} else
was_rmapped = 1;
@@ -2591,7 +2595,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
pte = *spte;
if (is_shadow_present_pte(pte)) {
if (is_last_spte(pte, sp->role.level))
- rmap_remove(vcpu->kvm, spte);
+ drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte);
else {
child = page_header(pte & PT64_BASE_ADDR_MASK);
mmu_page_remove_parent_pte(child, spte);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 3350c02..dfb2720 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -353,8 +353,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
}
if (is_large_pte(*sptep)) {
- rmap_remove(vcpu->kvm, sptep);
- __set_spte(sptep, shadow_trap_nonpresent_pte);
+ drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
}
@@ -515,12 +514,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
if (is_shadow_present_pte(*sptep)) {
- rmap_remove(vcpu->kvm, sptep);
if (is_large_pte(*sptep))
--vcpu->kvm->stat.lpages;
+ drop_spte(vcpu->kvm, sptep,
+ shadow_trap_nonpresent_pte);
need_flush = 1;
- }
- __set_spte(sptep, shadow_trap_nonpresent_pte);
+ } else
+ __set_spte(sptep, shadow_trap_nonpresent_pte);
break;
}
@@ -636,12 +636,11 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
!is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) {
u64 nonpresent;
- rmap_remove(vcpu->kvm, &sp->spt[i]);
if (is_present_gpte(gpte) || !clear_unsync)
nonpresent = shadow_trap_nonpresent_pte;
else
nonpresent = shadow_notrap_nonpresent_pte;
- __set_spte(&sp->spt[i], nonpresent);
+ drop_spte(vcpu->kvm, &sp->spt[i], nonpresent);
continue;
}
--
1.7.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH v3 3/4] KVM: MMU: Atomically check for accessed bit when dropping an spte
2010-07-05 12:39 [PATCH v3 0/4] Fix accessed bit tracking Avi Kivity
2010-07-05 12:39 ` [PATCH v3 1/4] KVM: MMU: Introduce drop_spte() Avi Kivity
2010-07-05 12:39 ` [PATCH v3 2/4] KVM: MMU: Move accessed/dirty bit checks from rmap_remove() to drop_spte() Avi Kivity
@ 2010-07-05 12:39 ` Avi Kivity
2010-07-05 12:39 ` [PATCH v3 4/4] KVM: MMU: Don't drop accessed bit while updating " Avi Kivity
2010-07-05 13:45 ` [PATCH v3 0/4] Fix accessed bit tracking Marcelo Tosatti
4 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2010-07-05 12:39 UTC (permalink / raw)
To: kvm; +Cc: Marcelo Tosatti
Currently, in the window between the check for the accessed bit, and actually
dropping the spte, a vcpu can access the page through the spte and set the bit,
which will be ignored by the mmu.
Fix by using an exchange operation to atmoically fetch the spte and drop it.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/mmu.c | 28 +++++++++++++++++++++-------
1 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e1e6967..b744fbc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -288,6 +288,21 @@ static void __set_spte(u64 *sptep, u64 spte)
#endif
}
+static u64 __xchg_spte(u64 *sptep, u64 new_spte)
+{
+#ifdef CONFIG_X86_64
+ return xchg(sptep, new_spte);
+#else
+ u64 old_spte;
+
+ do {
+ old_spte = *sptep;
+ } while (cmpxchg64(sptep, old_spte, new_spte) != old_spte);
+
+ return old_spte;
+#endif
+}
+
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
struct kmem_cache *base_cache, int min)
{
@@ -653,18 +668,17 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
{
pfn_t pfn;
+ u64 old_spte;
- if (!is_rmap_spte(*sptep)) {
- __set_spte(sptep, new_spte);
+ old_spte = __xchg_spte(sptep, new_spte);
+ if (!is_rmap_spte(old_spte))
return;
- }
- pfn = spte_to_pfn(*sptep);
- if (*sptep & shadow_accessed_mask)
+ pfn = spte_to_pfn(old_spte);
+ if (old_spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
- if (is_writable_pte(*sptep))
+ if (is_writable_pte(old_spte))
kvm_set_pfn_dirty(pfn);
rmap_remove(kvm, sptep);
- __set_spte(sptep, new_spte);
}
static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
--
1.7.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH v3 4/4] KVM: MMU: Don't drop accessed bit while updating an spte
2010-07-05 12:39 [PATCH v3 0/4] Fix accessed bit tracking Avi Kivity
` (2 preceding siblings ...)
2010-07-05 12:39 ` [PATCH v3 3/4] KVM: MMU: Atomically check for accessed bit when dropping an spte Avi Kivity
@ 2010-07-05 12:39 ` Avi Kivity
2010-07-05 13:45 ` [PATCH v3 0/4] Fix accessed bit tracking Marcelo Tosatti
4 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2010-07-05 12:39 UTC (permalink / raw)
To: kvm; +Cc: Marcelo Tosatti
__set_spte() will happily replace an spte with the accessed bit set with
one that has the accessed bit clear. Add a helper update_spte() which checks
for this condition and updates the page flag if needed.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/mmu.c | 25 +++++++++++++++++++++----
1 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b744fbc..104756b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -303,6 +303,19 @@ static u64 __xchg_spte(u64 *sptep, u64 new_spte)
#endif
}
+static void update_spte(u64 *sptep, u64 new_spte)
+{
+ u64 old_spte;
+
+ if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask)) {
+ __set_spte(sptep, new_spte);
+ } else {
+ old_spte = __xchg_spte(sptep, new_spte);
+ if (old_spte & shadow_accessed_mask)
+ mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte)));
+ }
+}
+
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
struct kmem_cache *base_cache, int min)
{
@@ -721,7 +734,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
BUG_ON(!(*spte & PT_PRESENT_MASK));
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
if (is_writable_pte(*spte)) {
- __set_spte(spte, *spte & ~PT_WRITABLE_MASK);
+ update_spte(spte, *spte & ~PT_WRITABLE_MASK);
write_protected = 1;
}
spte = rmap_next(kvm, rmapp, spte);
@@ -777,7 +790,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
int need_flush = 0;
- u64 *spte, new_spte;
+ u64 *spte, new_spte, old_spte;
pte_t *ptep = (pte_t *)data;
pfn_t new_pfn;
@@ -797,9 +810,13 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
new_spte &= ~PT_WRITABLE_MASK;
new_spte &= ~SPTE_HOST_WRITEABLE;
+ new_spte &= ~shadow_accessed_mask;
if (is_writable_pte(*spte))
kvm_set_pfn_dirty(spte_to_pfn(*spte));
- __set_spte(spte, new_spte);
+ old_spte = __xchg_spte(spte, new_spte);
+ if (is_shadow_present_pte(old_spte)
+ && (old_spte & shadow_accessed_mask))
+ mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte)));
spte = rmap_next(kvm, rmapp, spte);
}
}
@@ -1922,7 +1939,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
mark_page_dirty(vcpu->kvm, gfn);
set_pte:
- __set_spte(sptep, spte);
+ update_spte(sptep, spte);
done:
return ret;
}
--
1.7.1
^ permalink raw reply related [flat|nested] 6+ messages in thread