* [PATCH v2] kvm: x86: fix stale mmio cache bug
@ 2014-08-04 21:10 David Matlack
2014-08-05 0:31 ` Wanpeng Li
2014-08-05 3:36 ` Xiao Guangrong
0 siblings, 2 replies; 7+ messages in thread
From: David Matlack @ 2014-08-04 21:10 UTC (permalink / raw)
To: Gleb Natapov, Paolo Bonzini, Xiao Guangrong, kvm, x86
Cc: Eric Northup, David Matlack
The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
up to userspace:
(1) Guest accesses gpa X without a memory slot. The gfn is cached in
struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
the SPTE write-execute-noread so that future accesses cause
EPT_MISCONFIGs.
(2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
covering the page just accessed.
(3) Guest attempts to read or write to gpa X again. On Intel, this
generates an EPT_MISCONFIG. The memory slot generation number that
was incremented in (2) would normally take care of this but we fast
path mmio faults through quickly_check_mmio_pf(), which only checks
the per-vcpu mmio cache. Since we hit the cache, KVM passes a
KVM_EXIT_MMIO up to userspace.
This patch fixes the issue by doing the following:
- Tag the mmio cache with the memslot generation and use it to
validate mmio cache lookups.
- Extend vcpu_clear_mmio_info to clear mmio_gfn in addition to
mmio_gva, since both can be used to fast path mmio faults.
- In mmu_sync_roots, unconditionally clear the mmio cache since
even direct_map (e.g. tdp) hosts use it.
Signed-off-by: David Matlack <dmatlack@google.com>
---
Changes in v2:
- Use memslot generation to invalidate the mmio cache rather than
actively invalidating the cache.
- Update patch description with new cache invalidation technique.
- Pull mmio cache/clear code up out of x86.h and mmu.c and into
mmu.h.
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/mmu.c | 16 ++--------
arch/x86/kvm/mmu.h | 70 +++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.h | 36 ---------------------
4 files changed, 73 insertions(+), 50 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49205d0..f518d14 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -479,6 +479,7 @@ struct kvm_vcpu_arch {
u64 mmio_gva;
unsigned access;
gfn_t mmio_gfn;
+ unsigned int mmio_gen;
struct kvm_pmu pmu;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9314678..43f1c18 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -206,11 +206,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
#define MMIO_SPTE_GEN_LOW_SHIFT 3
#define MMIO_SPTE_GEN_HIGH_SHIFT 52
-#define MMIO_GEN_SHIFT 19
#define MMIO_GEN_LOW_SHIFT 9
#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1)
-#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
-#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
static u64 generation_mmio_spte_mask(unsigned int gen)
{
@@ -234,16 +231,6 @@ static unsigned int get_mmio_spte_generation(u64 spte)
return gen;
}
-static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
-{
- /*
- * Init kvm generation close to MMIO_MAX_GEN to easily test the
- * code of handling generation number wrap-around.
- */
- return (kvm_memslots(kvm)->generation +
- MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
-}
-
static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
unsigned access)
{
@@ -3157,13 +3144,14 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
int i;
struct kvm_mmu_page *sp;
+ vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
+
if (vcpu->arch.mmu.direct_map)
return;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
- vcpu_clear_mmio_info(vcpu, ~0ul);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b982112..058651a 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -82,6 +82,76 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
bool ept);
+#define MMIO_GEN_SHIFT 19
+#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
+#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
+static inline unsigned int kvm_current_mmio_generation(struct kvm *kvm)
+{
+ /*
+ * Init kvm generation close to MMIO_MAX_GEN to easily test the
+ * code of handling generation number wrap-around.
+ */
+ return (kvm_memslots(kvm)->generation + MMIO_MAX_GEN - 150) &
+ MMIO_GEN_MASK;
+}
+
+static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
+ gva_t gva, gfn_t gfn, unsigned access)
+{
+ vcpu->arch.mmio_gen = kvm_current_mmio_generation(vcpu->kvm);
+
+ /*
+ * Ensure that the mmio_gen is set before the rest of the cache entry.
+ * Otherwise we might see a new generation number attached to an old
+ * cache entry if creating/deleting a memslot races with mmio caching.
+ * The inverse case is possible (old generation number with new cache
+ * info), but that is safe. The next access will just miss the cache
+ * when it should have hit.
+ */
+ smp_wmb();
+
+ vcpu->arch.mmio_gva = gva & PAGE_MASK;
+ vcpu->arch.access = access;
+ vcpu->arch.mmio_gfn = gfn;
+}
+
+/*
+ * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY,
+ * unconditionally clear the mmio cache.
+ */
+#define MMIO_GVA_ANY ~((gva_t) 0)
+static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
+{
+ if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
+ return;
+
+ vcpu->arch.mmio_gva = 0;
+ vcpu->arch.mmio_gfn = 0;
+}
+
+static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mmio_gen == kvm_current_mmio_generation(vcpu->kvm);
+}
+
+static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
+{
+ u64 mmio_gva = vcpu->arch.mmio_gva;
+
+ return vcpu_match_mmio_gen(vcpu) &&
+ mmio_gva &&
+ mmio_gva == (gva & PAGE_MASK);
+}
+
+static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+ gfn_t mmio_gfn = vcpu->arch.mmio_gfn;
+
+ return vcpu_match_mmio_gen(vcpu) &&
+ mmio_gfn &&
+ mmio_gfn == (gpa >> PAGE_SHIFT);
+}
+
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8c97bac..c686c91 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -72,42 +72,6 @@ static inline u32 bit(int bitno)
return 1 << (bitno & 31);
}
-static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
- gva_t gva, gfn_t gfn, unsigned access)
-{
- vcpu->arch.mmio_gva = gva & PAGE_MASK;
- vcpu->arch.access = access;
- vcpu->arch.mmio_gfn = gfn;
-}
-
-/*
- * Clear the mmio cache info for the given gva,
- * specially, if gva is ~0ul, we clear all mmio cache info.
- */
-static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
-{
- if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
- return;
-
- vcpu->arch.mmio_gva = 0;
-}
-
-static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
-{
- if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK))
- return true;
-
- return false;
-}
-
-static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
-{
- if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
- return true;
-
- return false;
-}
-
void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
--
2.0.0.526.g5318336
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v2] kvm: x86: fix stale mmio cache bug
2014-08-04 21:10 [PATCH v2] kvm: x86: fix stale mmio cache bug David Matlack
@ 2014-08-05 0:31 ` Wanpeng Li
2014-08-05 18:56 ` David Matlack
2014-08-05 3:36 ` Xiao Guangrong
1 sibling, 1 reply; 7+ messages in thread
From: Wanpeng Li @ 2014-08-05 0:31 UTC (permalink / raw)
To: David Matlack
Cc: Gleb Natapov, Paolo Bonzini, Xiao Guangrong, kvm, x86,
Eric Northup, David Matlack
Hi David,
On Mon, Aug 04, 2014 at 02:10:20PM -0700, David Matlack wrote:
>The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
>up to userspace:
>
>(1) Guest accesses gpa X without a memory slot. The gfn is cached in
>struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
>the SPTE write-execute-noread so that future accesses cause
>EPT_MISCONFIGs.
>
>(2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
>covering the page just accessed.
>
One question:
Who trigger host userspace creates a mmio memslot? It will be created
just after first mmio #PF?
Regards,
Wanpeng Li
>(3) Guest attempts to read or write to gpa X again. On Intel, this
>generates an EPT_MISCONFIG. The memory slot generation number that
>was incremented in (2) would normally take care of this but we fast
>path mmio faults through quickly_check_mmio_pf(), which only checks
>the per-vcpu mmio cache. Since we hit the cache, KVM passes a
>KVM_EXIT_MMIO up to userspace.
>
>This patch fixes the issue by doing the following:
> - Tag the mmio cache with the memslot generation and use it to
> validate mmio cache lookups.
> - Extend vcpu_clear_mmio_info to clear mmio_gfn in addition to
> mmio_gva, since both can be used to fast path mmio faults.
> - In mmu_sync_roots, unconditionally clear the mmio cache since
> even direct_map (e.g. tdp) hosts use it.
>
>Signed-off-by: David Matlack <dmatlack@google.com>
>---
>Changes in v2:
> - Use memslot generation to invalidate the mmio cache rather than
> actively invalidating the cache.
> - Update patch description with new cache invalidation technique.
> - Pull mmio cache/clear code up out of x86.h and mmu.c and into
> mmu.h.
>
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/mmu.c | 16 ++--------
> arch/x86/kvm/mmu.h | 70 +++++++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/x86.h | 36 ---------------------
> 4 files changed, 73 insertions(+), 50 deletions(-)
>
>diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>index 49205d0..f518d14 100644
>--- a/arch/x86/include/asm/kvm_host.h
>+++ b/arch/x86/include/asm/kvm_host.h
>@@ -479,6 +479,7 @@ struct kvm_vcpu_arch {
> u64 mmio_gva;
> unsigned access;
> gfn_t mmio_gfn;
>+ unsigned int mmio_gen;
>
> struct kvm_pmu pmu;
>
>diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>index 9314678..43f1c18 100644
>--- a/arch/x86/kvm/mmu.c
>+++ b/arch/x86/kvm/mmu.c
>@@ -206,11 +206,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
> #define MMIO_SPTE_GEN_LOW_SHIFT 3
> #define MMIO_SPTE_GEN_HIGH_SHIFT 52
>
>-#define MMIO_GEN_SHIFT 19
> #define MMIO_GEN_LOW_SHIFT 9
> #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1)
>-#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
>-#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
>
> static u64 generation_mmio_spte_mask(unsigned int gen)
> {
>@@ -234,16 +231,6 @@ static unsigned int get_mmio_spte_generation(u64 spte)
> return gen;
> }
>
>-static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
>-{
>- /*
>- * Init kvm generation close to MMIO_MAX_GEN to easily test the
>- * code of handling generation number wrap-around.
>- */
>- return (kvm_memslots(kvm)->generation +
>- MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
>-}
>-
> static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
> unsigned access)
> {
>@@ -3157,13 +3144,14 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
> int i;
> struct kvm_mmu_page *sp;
>
>+ vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
>+
> if (vcpu->arch.mmu.direct_map)
> return;
>
> if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
> return;
>
>- vcpu_clear_mmio_info(vcpu, ~0ul);
> kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
> if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
> hpa_t root = vcpu->arch.mmu.root_hpa;
>diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
>index b982112..058651a 100644
>--- a/arch/x86/kvm/mmu.h
>+++ b/arch/x86/kvm/mmu.h
>@@ -82,6 +82,76 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
> void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> bool ept);
>
>+#define MMIO_GEN_SHIFT 19
>+#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
>+#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
>+static inline unsigned int kvm_current_mmio_generation(struct kvm *kvm)
>+{
>+ /*
>+ * Init kvm generation close to MMIO_MAX_GEN to easily test the
>+ * code of handling generation number wrap-around.
>+ */
>+ return (kvm_memslots(kvm)->generation + MMIO_MAX_GEN - 150) &
>+ MMIO_GEN_MASK;
>+}
>+
>+static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
>+ gva_t gva, gfn_t gfn, unsigned access)
>+{
>+ vcpu->arch.mmio_gen = kvm_current_mmio_generation(vcpu->kvm);
>+
>+ /*
>+ * Ensure that the mmio_gen is set before the rest of the cache entry.
>+ * Otherwise we might see a new generation number attached to an old
>+ * cache entry if creating/deleting a memslot races with mmio caching.
>+ * The inverse case is possible (old generation number with new cache
>+ * info), but that is safe. The next access will just miss the cache
>+ * when it should have hit.
>+ */
>+ smp_wmb();
>+
>+ vcpu->arch.mmio_gva = gva & PAGE_MASK;
>+ vcpu->arch.access = access;
>+ vcpu->arch.mmio_gfn = gfn;
>+}
>+
>+/*
>+ * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY,
>+ * unconditionally clear the mmio cache.
>+ */
>+#define MMIO_GVA_ANY ~((gva_t) 0)
>+static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
>+{
>+ if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
>+ return;
>+
>+ vcpu->arch.mmio_gva = 0;
>+ vcpu->arch.mmio_gfn = 0;
>+}
>+
>+static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu)
>+{
>+ return vcpu->arch.mmio_gen == kvm_current_mmio_generation(vcpu->kvm);
>+}
>+
>+static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
>+{
>+ u64 mmio_gva = vcpu->arch.mmio_gva;
>+
>+ return vcpu_match_mmio_gen(vcpu) &&
>+ mmio_gva &&
>+ mmio_gva == (gva & PAGE_MASK);
>+}
>+
>+static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
>+{
>+ gfn_t mmio_gfn = vcpu->arch.mmio_gfn;
>+
>+ return vcpu_match_mmio_gen(vcpu) &&
>+ mmio_gfn &&
>+ mmio_gfn == (gpa >> PAGE_SHIFT);
>+}
>+
> static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
> {
> if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
>diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
>index 8c97bac..c686c91 100644
>--- a/arch/x86/kvm/x86.h
>+++ b/arch/x86/kvm/x86.h
>@@ -72,42 +72,6 @@ static inline u32 bit(int bitno)
> return 1 << (bitno & 31);
> }
>
>-static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
>- gva_t gva, gfn_t gfn, unsigned access)
>-{
>- vcpu->arch.mmio_gva = gva & PAGE_MASK;
>- vcpu->arch.access = access;
>- vcpu->arch.mmio_gfn = gfn;
>-}
>-
>-/*
>- * Clear the mmio cache info for the given gva,
>- * specially, if gva is ~0ul, we clear all mmio cache info.
>- */
>-static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
>-{
>- if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
>- return;
>-
>- vcpu->arch.mmio_gva = 0;
>-}
>-
>-static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
>-{
>- if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK))
>- return true;
>-
>- return false;
>-}
>-
>-static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
>-{
>- if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
>- return true;
>-
>- return false;
>-}
>-
> void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
> void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
> int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
>--
>2.0.0.526.g5318336
>
>--
>To unsubscribe from this list: send the line "unsubscribe kvm" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2] kvm: x86: fix stale mmio cache bug
2014-08-04 21:10 [PATCH v2] kvm: x86: fix stale mmio cache bug David Matlack
2014-08-05 0:31 ` Wanpeng Li
@ 2014-08-05 3:36 ` Xiao Guangrong
2014-08-05 22:39 ` David Matlack
1 sibling, 1 reply; 7+ messages in thread
From: Xiao Guangrong @ 2014-08-05 3:36 UTC (permalink / raw)
To: David Matlack, Gleb Natapov, Paolo Bonzini, kvm, x86; +Cc: Eric Northup
On 08/05/2014 05:10 AM, David Matlack wrote:
> The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
> up to userspace:
>
> (1) Guest accesses gpa X without a memory slot. The gfn is cached in
> struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
> the SPTE write-execute-noread so that future accesses cause
> EPT_MISCONFIGs.
>
> (2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
> covering the page just accessed.
>
> (3) Guest attempts to read or write to gpa X again. On Intel, this
> generates an EPT_MISCONFIG. The memory slot generation number that
> was incremented in (2) would normally take care of this but we fast
> path mmio faults through quickly_check_mmio_pf(), which only checks
> the per-vcpu mmio cache. Since we hit the cache, KVM passes a
> KVM_EXIT_MMIO up to userspace.
>
> This patch fixes the issue by doing the following:
> - Tag the mmio cache with the memslot generation and use it to
> validate mmio cache lookups.
> - Extend vcpu_clear_mmio_info to clear mmio_gfn in addition to
> mmio_gva, since both can be used to fast path mmio faults.
> - In mmu_sync_roots, unconditionally clear the mmio cache since
> even direct_map (e.g. tdp) hosts use it.
It's not needed.
direct map only uses gpa (and never cache gva) and
vcpu_clear_mmio_info only clears gva.
> +static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
> + gva_t gva, gfn_t gfn, unsigned access)
> +{
> + vcpu->arch.mmio_gen = kvm_current_mmio_generation(vcpu->kvm);
> +
> + /*
> + * Ensure that the mmio_gen is set before the rest of the cache entry.
> + * Otherwise we might see a new generation number attached to an old
> + * cache entry if creating/deleting a memslot races with mmio caching.
> + * The inverse case is possible (old generation number with new cache
> + * info), but that is safe. The next access will just miss the cache
> + * when it should have hit.
> + */
> + smp_wmb();
The memory barrier can't help us, consider this scenario:
CPU 0 CPU 1
page-fault
see gpa is not mapped in memslot
create new memslot containing gpa from Qemu
update the slots's generation number
cache mmio info
!!! later when vcpu accesses gpa again
it will cause mmio-exit.
The easy way to fix this is that we update slots's generation-number
after synchronize_srcu_expedited when memslot is being updated that
ensures other sides can see the new generation-number only after
finishing update.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2] kvm: x86: fix stale mmio cache bug
2014-08-05 0:31 ` Wanpeng Li
@ 2014-08-05 18:56 ` David Matlack
0 siblings, 0 replies; 7+ messages in thread
From: David Matlack @ 2014-08-05 18:56 UTC (permalink / raw)
To: Wanpeng Li
Cc: Gleb Natapov, Paolo Bonzini, Xiao Guangrong, kvm, x86,
Eric Northup
On Mon, Aug 4, 2014 at 5:31 PM, Wanpeng Li <wanpeng.li@linux.intel.com> wrote:
> Hi David,
> On Mon, Aug 04, 2014 at 02:10:20PM -0700, David Matlack wrote:
>>The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
>>up to userspace:
>>
>>(1) Guest accesses gpa X without a memory slot. The gfn is cached in
>>struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
>>the SPTE write-execute-noread so that future accesses cause
>>EPT_MISCONFIGs.
>>
>>(2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
>>covering the page just accessed.
>>
>
> One question:
>
> Who trigger host userspace creates a mmio memslot? It will be created
> just after first mmio #PF?
Devices such as vga can be in modes where their memory behaves
like ram and using a memslot to back the memory makes sense. In
other modes, reading and writing to vga memory has side-effects
and so mmio makes sense (delete memslot). Switching between these
modes is a guest initiated event.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2] kvm: x86: fix stale mmio cache bug
2014-08-05 3:36 ` Xiao Guangrong
@ 2014-08-05 22:39 ` David Matlack
2014-08-06 3:26 ` Xiao Guangrong
0 siblings, 1 reply; 7+ messages in thread
From: David Matlack @ 2014-08-05 22:39 UTC (permalink / raw)
To: Xiao Guangrong; +Cc: Gleb Natapov, Paolo Bonzini, kvm, x86, Eric Northup
On Mon, Aug 4, 2014 at 8:36 PM, Xiao Guangrong
<xiaoguangrong@linux.vnet.ibm.com> wrote:
> On 08/05/2014 05:10 AM, David Matlack wrote:
>>
>> This patch fixes the issue by doing the following:
>> - Tag the mmio cache with the memslot generation and use it to
>> validate mmio cache lookups.
>> - Extend vcpu_clear_mmio_info to clear mmio_gfn in addition to
>> mmio_gva, since both can be used to fast path mmio faults.
>> - In mmu_sync_roots, unconditionally clear the mmio cache since
>> even direct_map (e.g. tdp) hosts use it.
>
> It's not needed.
>
> direct map only uses gpa (and never cache gva) and
> vcpu_clear_mmio_info only clears gva.
Ok thanks for the clarification.
>> +static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
>> + gva_t gva, gfn_t gfn, unsigned access)
>> +{
>> + vcpu->arch.mmio_gen = kvm_current_mmio_generation(vcpu->kvm);
>> +
>> + /*
>> + * Ensure that the mmio_gen is set before the rest of the cache entry.
>> + * Otherwise we might see a new generation number attached to an old
>> + * cache entry if creating/deleting a memslot races with mmio caching.
>> + * The inverse case is possible (old generation number with new cache
>> + * info), but that is safe. The next access will just miss the cache
>> + * when it should have hit.
>> + */
>> + smp_wmb();
>
> The memory barrier can't help us, consider this scenario:
>
> CPU 0 CPU 1
> page-fault
> see gpa is not mapped in memslot
>
> create new memslot containing gpa from Qemu
> update the slots's generation number
> cache mmio info
>
> !!! later when vcpu accesses gpa again
> it will cause mmio-exit.
Ah! Thanks for catching my mistake.
> The easy way to fix this is that we update slots's generation-number
> after synchronize_srcu_expedited when memslot is being updated that
> ensures other sides can see the new generation-number only after
> finishing update.
It would be possible for a vcpu to see an inconsistent kvm_memslots struct
(new set of slots with an old generation number). Is that not an issue?
We could just use the generation number that is stored in the
spte. The only downside (that I can see) is that handle_abnormal_pfn()
calls vcpu_cache_mmio_info() but does not have access to the spte.
So presumably we'd have to do a page table walk.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2] kvm: x86: fix stale mmio cache bug
2014-08-05 22:39 ` David Matlack
@ 2014-08-06 3:26 ` Xiao Guangrong
2014-08-07 4:20 ` David Matlack
0 siblings, 1 reply; 7+ messages in thread
From: Xiao Guangrong @ 2014-08-06 3:26 UTC (permalink / raw)
To: David Matlack; +Cc: Gleb Natapov, Paolo Bonzini, kvm, x86, Eric Northup
On 08/06/2014 06:39 AM, David Matlack wrote:
> On Mon, Aug 4, 2014 at 8:36 PM, Xiao Guangrong
> <xiaoguangrong@linux.vnet.ibm.com> wrote:
>> On 08/05/2014 05:10 AM, David Matlack wrote:
>>>
>>> This patch fixes the issue by doing the following:
>>> - Tag the mmio cache with the memslot generation and use it to
>>> validate mmio cache lookups.
>>> - Extend vcpu_clear_mmio_info to clear mmio_gfn in addition to
>>> mmio_gva, since both can be used to fast path mmio faults.
>>> - In mmu_sync_roots, unconditionally clear the mmio cache since
>>> even direct_map (e.g. tdp) hosts use it.
>>
>> It's not needed.
>>
>> direct map only uses gpa (and never cache gva) and
>> vcpu_clear_mmio_info only clears gva.
>
> Ok thanks for the clarification.
>
>>> +static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
>>> + gva_t gva, gfn_t gfn, unsigned access)
>>> +{
>>> + vcpu->arch.mmio_gen = kvm_current_mmio_generation(vcpu->kvm);
>>> +
>>> + /*
>>> + * Ensure that the mmio_gen is set before the rest of the cache entry.
>>> + * Otherwise we might see a new generation number attached to an old
>>> + * cache entry if creating/deleting a memslot races with mmio caching.
>>> + * The inverse case is possible (old generation number with new cache
>>> + * info), but that is safe. The next access will just miss the cache
>>> + * when it should have hit.
>>> + */
>>> + smp_wmb();
>>
>> The memory barrier can't help us, consider this scenario:
>>
>> CPU 0 CPU 1
>> page-fault
>> see gpa is not mapped in memslot
>>
>> create new memslot containing gpa from Qemu
>> update the slots's generation number
>> cache mmio info
>>
>> !!! later when vcpu accesses gpa again
>> it will cause mmio-exit.
>
> Ah! Thanks for catching my mistake.
>
>> The easy way to fix this is that we update slots's generation-number
>> after synchronize_srcu_expedited when memslot is being updated that
>> ensures other sides can see the new generation-number only after
>> finishing update.
>
> It would be possible for a vcpu to see an inconsistent kvm_memslots struct
> (new set of slots with an old generation number). Is that not an issue?
In this case, checking generation-number will fail, we will goto the slow path
to handle mmio access - that's very rare, so i think it's ok.
>
> We could just use the generation number that is stored in the
> spte. The only downside (that I can see) is that handle_abnormal_pfn()
> calls vcpu_cache_mmio_info() but does not have access to the spte.
> So presumably we'd have to do a page table walk.
The issue is not only in vcpu_cache_mmio_info but also in
mark_mmio_spte() where we may cache new generation-number and old memslots
info.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2] kvm: x86: fix stale mmio cache bug
2014-08-06 3:26 ` Xiao Guangrong
@ 2014-08-07 4:20 ` David Matlack
0 siblings, 0 replies; 7+ messages in thread
From: David Matlack @ 2014-08-07 4:20 UTC (permalink / raw)
To: Xiao Guangrong; +Cc: Gleb Natapov, Paolo Bonzini, kvm, x86, Eric Northup
On Tue, Aug 5, 2014 at 8:26 PM, Xiao Guangrong
<xiaoguangrong@linux.vnet.ibm.com> wrote:
> On 08/06/2014 06:39 AM, David Matlack wrote:
>> On Mon, Aug 4, 2014 at 8:36 PM, Xiao Guangrong
>> <xiaoguangrong@linux.vnet.ibm.com> wrote:
>>> The memory barrier can't help us, consider this scenario:
>>>
>>> CPU 0 CPU 1
>>> page-fault
>>> see gpa is not mapped in memslot
>>>
>>> create new memslot containing gpa from Qemu
>>> update the slots's generation number
>>> cache mmio info
>>>
>>> !!! later when vcpu accesses gpa again
>>> it will cause mmio-exit.
>>
>> Ah! Thanks for catching my mistake.
>>
>>> The easy way to fix this is that we update slots's generation-number
>>> after synchronize_srcu_expedited when memslot is being updated that
>>> ensures other sides can see the new generation-number only after
>>> finishing update.
>>
>> It would be possible for a vcpu to see an inconsistent kvm_memslots struct
>> (new set of slots with an old generation number). Is that not an issue?
>
> In this case, checking generation-number will fail, we will goto the slow path
> to handle mmio access - that's very rare, so i think it's ok.
>
>>
>> We could just use the generation number that is stored in the
>> spte. The only downside (that I can see) is that handle_abnormal_pfn()
>> calls vcpu_cache_mmio_info() but does not have access to the spte.
>> So presumably we'd have to do a page table walk.
>
> The issue is not only in vcpu_cache_mmio_info but also in
> mark_mmio_spte() where we may cache new generation-number and old memslots
> info.
This is now a separate bug from the one I originally reported :). For now, I
will resend a 3rd version of my patch with the fixes you suggested (the memory
barrier is useless and no need to change to mmu_sync_roots).
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2014-08-07 4:20 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-08-04 21:10 [PATCH v2] kvm: x86: fix stale mmio cache bug David Matlack
2014-08-05 0:31 ` Wanpeng Li
2014-08-05 18:56 ` David Matlack
2014-08-05 3:36 ` Xiao Guangrong
2014-08-05 22:39 ` David Matlack
2014-08-06 3:26 ` Xiao Guangrong
2014-08-07 4:20 ` David Matlack
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).