[PATCH 2/9] KVM: x86: Fix recording of guest steal time / preempted status

stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 2/9] KVM: x86: Fix recording of guest steal time / preempted status
       [not found] <20221014222643.25815-1-risbhat@amazon.com>
@ 2022-10-14 22:26 ` Rishabh Bhatnagar
  2022-10-15  5:45   ` Greg KH
  2022-10-14 22:26 ` [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes Rishabh Bhatnagar
  2022-10-14 22:26 ` [PATCH 8/9] KVM: x86: do not report preemption if the steal time cache is stale Rishabh Bhatnagar
  2 siblings, 1 reply; 8+ messages in thread
From: Rishabh Bhatnagar @ 2022-10-14 22:26 UTC (permalink / raw)
  To: surajjs, luizcap, aams
  Cc: mbacco, David Woodhouse, stable, David Woodhouse, Paolo Bonzini,
	Rishabh Bhatnagar

From: David Woodhouse <dwmw2@infradead.org>

commit 7e2175ebd695f17860c5bd4ad7616cce12ed4591 upstream.

In commit b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is
not missed") we switched to using a gfn_to_pfn_cache for accessing the
guest steal time structure in order to allow for an atomic xchg of the
preempted field. This has a couple of problems.

Firstly, kvm_map_gfn() doesn't work at all for IOMEM pages when the
atomic flag is set, which it is in kvm_steal_time_set_preempted(). So a
guest vCPU using an IOMEM page for its steal time would never have its
preempted field set.

Secondly, the gfn_to_pfn_cache is not invalidated in all cases where it
should have been. There are two stages to the GFN->PFN conversion;
first the GFN is converted to a userspace HVA, and then that HVA is
looked up in the process page tables to find the underlying host PFN.
Correct invalidation of the latter would require being hooked up to the
MMU notifiers, but that doesn't happen---so it just keeps mapping and
unmapping the *wrong* PFN after the userspace page tables change.

In the !IOMEM case at least the stale page *is* pinned all the time it's
cached, so it won't be freed and reused by anyone else while still
receiving the steal time updates. The map/unmap dance only takes care
of the KVM administrivia such as marking the page dirty.

Until the gfn_to_pfn cache handles the remapping automatically by
integrating with the MMU notifiers, we might as well not get a
kernel mapping of it, and use the perfectly serviceable userspace HVA
that we already have.  We just need to implement the atomic xchg on
the userspace address with appropriate exception handling, which is
fairly trivial.

Cc: stable@vger.kernel.org
Fixes: b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed")
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <3645b9b889dac6438394194bb5586a46b68d581f.camel@infradead.org>
[I didn't entirely agree with David's assessment of the
 usefulness of the gfn_to_pfn cache, and integrated the outcome
 of the discussion in the above commit message. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[risbhat@amazon.com: Use the older mark_page_dirty_in_slot api without
kvm argument]
Signed-off-by: Rishabh Bhatnagar <risbhat@amazon.com>
---
 arch/x86/include/asm/kvm_host.h |   2 +-
 arch/x86/kvm/x86.c              | 105 +++++++++++++++++++++++---------
 2 files changed, 76 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 38c63a78aba6..2b35f8139f15 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -663,7 +663,7 @@ struct kvm_vcpu_arch {
 		u8 preempted;
 		u64 msr_val;
 		u64 last_steal;
-		struct gfn_to_pfn_cache cache;
+		struct gfn_to_hva_cache cache;
 	} st;
 
 	u64 l1_tsc_offset;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3640b298c42e..cd1e6710bc33 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3013,53 +3013,92 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
 
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
-	struct kvm_host_map map;
-	struct kvm_steal_time *st;
+	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+	struct kvm_steal_time __user *st;
+	struct kvm_memslots *slots;
+	u64 steal;
+	u32 version;
 
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
 
-	/* -EAGAIN is returned in atomic context so we can just return. */
-	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
-			&map, &vcpu->arch.st.cache, false))
+	if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
 		return;
 
-	st = map.hva +
-		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+	slots = kvm_memslots(vcpu->kvm);
+
+	if (unlikely(slots->generation != ghc->generation ||
+		     kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
+		gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
+
+		/* We rely on the fact that it fits in a single page. */
+		BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
+
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+		    kvm_is_error_hva(ghc->hva) || !ghc->memslot)
+			return;
+	}
+
+	st = (struct kvm_steal_time __user *)ghc->hva;
+	if (!user_access_begin(st, sizeof(*st)))
+		return;
 
 	/*
 	 * Doing a TLB flush here, on the guest's behalf, can avoid
 	 * expensive IPIs.
 	 */
 	if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
-		u8 st_preempted = xchg(&st->preempted, 0);
+		u8 st_preempted = 0;
+		int err = -EFAULT;
+
+		asm volatile("1: xchgb %0, %2\n"
+			     "xor %1, %1\n"
+			     "2:\n"
+			     _ASM_EXTABLE_UA(1b, 2b)
+			     : "+r" (st_preempted),
+			       "+&r" (err)
+			     : "m" (st->preempted));
+		if (err)
+			goto out;
+
+		user_access_end();
+
+		vcpu->arch.st.preempted = 0;
 
 		trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
 				       st_preempted & KVM_VCPU_FLUSH_TLB);
 		if (st_preempted & KVM_VCPU_FLUSH_TLB)
 			kvm_vcpu_flush_tlb_guest(vcpu);
+
+		if (!user_access_begin(st, sizeof(*st)))
+			goto dirty;
 	} else {
-		st->preempted = 0;
+		unsafe_put_user(0, &st->preempted, out);
+		vcpu->arch.st.preempted = 0;
 	}
 
-	vcpu->arch.st.preempted = 0;
-
-	if (st->version & 1)
-		st->version += 1;  /* first time write, random junk */
+	unsafe_get_user(version, &st->version, out);
+	if (version & 1)
+		version += 1;  /* first time write, random junk */
 
-	st->version += 1;
+	version += 1;
+	unsafe_put_user(version, &st->version, out);
 
 	smp_wmb();
 
-	st->steal += current->sched_info.run_delay -
+	unsafe_get_user(steal, &st->steal, out);
+	steal += current->sched_info.run_delay -
 		vcpu->arch.st.last_steal;
 	vcpu->arch.st.last_steal = current->sched_info.run_delay;
+	unsafe_put_user(steal, &st->steal, out);
 
-	smp_wmb();
-
-	st->version += 1;
+	version += 1;
+	unsafe_put_user(version, &st->version, out);
 
-	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
+ out:
+	user_access_end();
+ dirty:
+	mark_page_dirty_in_slot(ghc->memslot, gpa_to_gfn(ghc->gpa));
 }
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -4044,8 +4083,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 {
-	struct kvm_host_map map;
-	struct kvm_steal_time *st;
+	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+	struct kvm_steal_time __user *st;
+	struct kvm_memslots *slots;
+	static const u8 preempted = KVM_VCPU_PREEMPTED;
 
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
@@ -4053,16 +4094,23 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.st.preempted)
 		return;
 
-	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
-			&vcpu->arch.st.cache, true))
+	/* This happens on process exit */
+	if (unlikely(current->mm != vcpu->kvm->mm))
 		return;
 
-	st = map.hva +
-		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+	slots = kvm_memslots(vcpu->kvm);
+
+	if (unlikely(slots->generation != ghc->generation ||
+		     kvm_is_error_hva(ghc->hva) || !ghc->memslot))
+		return;
 
-	st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+	st = (struct kvm_steal_time __user *)ghc->hva;
+	BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
 
-	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+	if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
+		vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+
+	mark_page_dirty_in_slot(ghc->memslot, gpa_to_gfn(ghc->gpa));
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -10145,11 +10193,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
-	struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
 	int idx;
 
-	kvm_release_pfn(cache->pfn, cache->dirty, cache);
-
 	kvmclock_reset(vcpu);
 
 	kvm_x86_ops.vcpu_free(vcpu);
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/9] KVM: x86: Fix recording of guest steal time / preempted status
  2022-10-14 22:26 ` [PATCH 2/9] KVM: x86: Fix recording of guest steal time / preempted status Rishabh Bhatnagar
@ 2022-10-15  5:45   ` Greg KH
  2022-10-15 16:28     ` Bhatnagar, Rishabh
  0 siblings, 1 reply; 8+ messages in thread
From: Greg KH @ 2022-10-15  5:45 UTC (permalink / raw)
  To: Rishabh Bhatnagar
  Cc: surajjs, luizcap, aams, mbacco, David Woodhouse, stable,
	David Woodhouse, Paolo Bonzini

On Fri, Oct 14, 2022 at 10:26:36PM +0000, Rishabh Bhatnagar wrote:
> From: David Woodhouse <dwmw2@infradead.org>
> 
> commit 7e2175ebd695f17860c5bd4ad7616cce12ed4591 upstream.
> 
> In commit b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is
> not missed") we switched to using a gfn_to_pfn_cache for accessing the
> guest steal time structure in order to allow for an atomic xchg of the
> preempted field. This has a couple of problems.
> 
> Firstly, kvm_map_gfn() doesn't work at all for IOMEM pages when the
> atomic flag is set, which it is in kvm_steal_time_set_preempted(). So a
> guest vCPU using an IOMEM page for its steal time would never have its
> preempted field set.
> 
> Secondly, the gfn_to_pfn_cache is not invalidated in all cases where it
> should have been. There are two stages to the GFN->PFN conversion;
> first the GFN is converted to a userspace HVA, and then that HVA is
> looked up in the process page tables to find the underlying host PFN.
> Correct invalidation of the latter would require being hooked up to the
> MMU notifiers, but that doesn't happen---so it just keeps mapping and
> unmapping the *wrong* PFN after the userspace page tables change.
> 
> In the !IOMEM case at least the stale page *is* pinned all the time it's
> cached, so it won't be freed and reused by anyone else while still
> receiving the steal time updates. The map/unmap dance only takes care
> of the KVM administrivia such as marking the page dirty.
> 
> Until the gfn_to_pfn cache handles the remapping automatically by
> integrating with the MMU notifiers, we might as well not get a
> kernel mapping of it, and use the perfectly serviceable userspace HVA
> that we already have.  We just need to implement the atomic xchg on
> the userspace address with appropriate exception handling, which is
> fairly trivial.
> 
> Cc: stable@vger.kernel.org
> Fixes: b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed")
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Message-Id: <3645b9b889dac6438394194bb5586a46b68d581f.camel@infradead.org>
> [I didn't entirely agree with David's assessment of the
>  usefulness of the gfn_to_pfn cache, and integrated the outcome
>  of the discussion in the above commit message. - Paolo]
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> [risbhat@amazon.com: Use the older mark_page_dirty_in_slot api without
> kvm argument]
> Signed-off-by: Rishabh Bhatnagar <risbhat@amazon.com>
> ---
>  arch/x86/include/asm/kvm_host.h |   2 +-
>  arch/x86/kvm/x86.c              | 105 +++++++++++++++++++++++---------
>  2 files changed, 76 insertions(+), 31 deletions(-)

This is already in some stable kernels, why send it again and what
tree(s) are you asking for it to be applied to?

Same for the other backports you sent.

confused,

greg k-h

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/9] KVM: x86: Fix recording of guest steal time / preempted status
  2022-10-15  5:45   ` Greg KH
@ 2022-10-15 16:28     ` Bhatnagar, Rishabh
  0 siblings, 0 replies; 8+ messages in thread
From: Bhatnagar, Rishabh @ 2022-10-15 16:28 UTC (permalink / raw)
  To: Greg KH
  Cc: David Woodhouse, stable@vger.kernel.org, Woodhouse, David,
	Paolo Bonzini

Please ignore/drop this patch series. CC list expanded from patch by mistake.
Planning to get some more reviews/testing before resending it later.

On 10/14/22, 10:45 PM, "Greg KH" <gregkh@linuxfoundation.org> wrote:

    CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.



    On Fri, Oct 14, 2022 at 10:26:36PM +0000, Rishabh Bhatnagar wrote:
    > From: David Woodhouse <dwmw2@infradead.org>
    >
    > commit 7e2175ebd695f17860c5bd4ad7616cce12ed4591 upstream.
    >
    > In commit b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is
    > not missed") we switched to using a gfn_to_pfn_cache for accessing the
    > guest steal time structure in order to allow for an atomic xchg of the
    > preempted field. This has a couple of problems.
    >
    > Firstly, kvm_map_gfn() doesn't work at all for IOMEM pages when the
    > atomic flag is set, which it is in kvm_steal_time_set_preempted(). So a
    > guest vCPU using an IOMEM page for its steal time would never have its
    > preempted field set.
    >
    > Secondly, the gfn_to_pfn_cache is not invalidated in all cases where it
    > should have been. There are two stages to the GFN->PFN conversion;
    > first the GFN is converted to a userspace HVA, and then that HVA is
    > looked up in the process page tables to find the underlying host PFN.
    > Correct invalidation of the latter would require being hooked up to the
    > MMU notifiers, but that doesn't happen---so it just keeps mapping and
    > unmapping the *wrong* PFN after the userspace page tables change.
    >
    > In the !IOMEM case at least the stale page *is* pinned all the time it's
    > cached, so it won't be freed and reused by anyone else while still
    > receiving the steal time updates. The map/unmap dance only takes care
    > of the KVM administrivia such as marking the page dirty.
    >
    > Until the gfn_to_pfn cache handles the remapping automatically by
    > integrating with the MMU notifiers, we might as well not get a
    > kernel mapping of it, and use the perfectly serviceable userspace HVA
    > that we already have.  We just need to implement the atomic xchg on
    > the userspace address with appropriate exception handling, which is
    > fairly trivial.
    >
    > Cc: stable@vger.kernel.org
    > Fixes: b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed")
    > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
    > Message-Id: <3645b9b889dac6438394194bb5586a46b68d581f.camel@infradead.org>
    > [I didn't entirely agree with David's assessment of the
    >  usefulness of the gfn_to_pfn cache, and integrated the outcome
    >  of the discussion in the above commit message. - Paolo]
    > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
    > [risbhat@amazon.com: Use the older mark_page_dirty_in_slot api without
    > kvm argument]
    > Signed-off-by: Rishabh Bhatnagar <risbhat@amazon.com>
    > ---
    >  arch/x86/include/asm/kvm_host.h |   2 +-
    >  arch/x86/kvm/x86.c              | 105 +++++++++++++++++++++++---------
    >  2 files changed, 76 insertions(+), 31 deletions(-)

    This is already in some stable kernels, why send it again and what
    tree(s) are you asking for it to be applied to?

    Same for the other backports you sent.

    confused,

    greg k-h


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes
       [not found] <20221014222643.25815-1-risbhat@amazon.com>
  2022-10-14 22:26 ` [PATCH 2/9] KVM: x86: Fix recording of guest steal time / preempted status Rishabh Bhatnagar
@ 2022-10-14 22:26 ` Rishabh Bhatnagar
  2022-10-14 22:26 ` [PATCH 8/9] KVM: x86: do not report preemption if the steal time cache is stale Rishabh Bhatnagar
  2 siblings, 0 replies; 8+ messages in thread
From: Rishabh Bhatnagar @ 2022-10-14 22:26 UTC (permalink / raw)
  To: surajjs, luizcap, aams
  Cc: mbacco, Paolo Bonzini, Dave Young, Xiaoying Yan,
	Dr . David Alan Gilbert, David Woodhouse, stable,
	Rishabh Bhatnagar

From: Paolo Bonzini <pbonzini@redhat.com>

commit 901d3765fa804ce42812f1d5b1f3de2dfbb26723 upstream.

Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time
/ preempted status", 2021-11-11) open coded the previous call to
kvm_map_gfn, but in doing so it dropped the comparison between the cached
guest physical address and the one in the MSR.  This cause an incorrect
cache hit if the guest modifies the steal time address while the memslots
remain the same.  This can happen with kexec, in which case the steal
time data is written at the address used by the old kernel instead of
the old one.

While at it, rename the variable from gfn to gpa since it is a plain
physical address and not a right-shifted one.

Reported-by: Dave Young <ruyang@redhat.com>
Reported-by: Xiaoying Yan  <yiyan@redhat.com>
Analyzed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: stable@vger.kernel.org
Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Rishabh Bhatnagar <risbhat@amazon.com>
---
 arch/x86/kvm/x86.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 75494b3c2d1e..111aa95f3de3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3018,6 +3018,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
 	struct kvm_steal_time __user *st;
 	struct kvm_memslots *slots;
+	gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
 	u64 steal;
 	u32 version;
 
@@ -3030,13 +3031,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	slots = kvm_memslots(vcpu->kvm);
 
 	if (unlikely(slots->generation != ghc->generation ||
+		     gpa != ghc->gpa ||
 		     kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
-		gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
-
 		/* We rely on the fact that it fits in a single page. */
 		BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
 
-		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) ||
 		    kvm_is_error_hva(ghc->hva) || !ghc->memslot)
 			return;
 	}
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 8/9] KVM: x86: do not report preemption if the steal time cache is stale
       [not found] <20221014222643.25815-1-risbhat@amazon.com>
  2022-10-14 22:26 ` [PATCH 2/9] KVM: x86: Fix recording of guest steal time / preempted status Rishabh Bhatnagar
  2022-10-14 22:26 ` [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes Rishabh Bhatnagar
@ 2022-10-14 22:26 ` Rishabh Bhatnagar
  2 siblings, 0 replies; 8+ messages in thread
From: Rishabh Bhatnagar @ 2022-10-14 22:26 UTC (permalink / raw)
  To: surajjs, luizcap, aams
  Cc: mbacco, Paolo Bonzini, David Woodhouse, stable, Rishabh Bhatnagar

From: Paolo Bonzini <pbonzini@redhat.com>

commit c3c28d24d910a746b02f496d190e0e8c6560224b upstream.

Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time
/ preempted status", 2021-11-11) open coded the previous call to
kvm_map_gfn, but in doing so it dropped the comparison between the cached
guest physical address and the one in the MSR.  This cause an incorrect
cache hit if the guest modifies the steal time address while the memslots
remain the same.  This can happen with kexec, in which case the preempted
bit is written at the address used by the old kernel instead of
the old one.

Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: stable@vger.kernel.org
Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Rishabh Bhatnagar <risbhat@amazon.com>
---
 arch/x86/kvm/x86.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 111aa95f3de3..9e9298c333c8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4089,6 +4089,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 	struct kvm_steal_time __user *st;
 	struct kvm_memslots *slots;
 	static const u8 preempted = KVM_VCPU_PREEMPTED;
+	gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
 
 	/*
 	 * The vCPU can be marked preempted if and only if the VM-Exit was on
@@ -4116,6 +4117,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 	slots = kvm_memslots(vcpu->kvm);
 
 	if (unlikely(slots->generation != ghc->generation ||
+		     gpa != ghc->gpa ||
 		     kvm_is_error_hva(ghc->hva) || !ghc->memslot))
 		return;
 
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 0/9] KVM backports to 5.10
@ 2023-05-10 18:15 Rishabh Bhatnagar
  2023-05-10 18:15 ` [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes Rishabh Bhatnagar
  0 siblings, 1 reply; 8+ messages in thread
From: Rishabh Bhatnagar @ 2023-05-10 18:15 UTC (permalink / raw)
  To: gregkh, stable
  Cc: lee, seanjc, kvm, bp, mingo, tglx, pbonzini, vkuznets, wanpengli,
	jmattson, joro, Rishabh Bhatnagar

This patch series backports a few VM preemption_status, steal_time and
PV TLB flushing fixes to 5.10 stable kernel.

Most of the changes backport cleanly except i had to work around a few
because of missing support/APIs in 5.10 kernel. I have captured those in
the changelog as well in the individual patches.

Earlier patch series that i'm resending for stable.
https://lore.kernel.org/all/20220909181351.23983-1-risbhat@amazon.com/

Changelog
- Use mark_page_dirty_in_slot api without kvm argument (KVM: x86: Fix
  recording of guest steal time / preempted status)
- Avoid checking for xen_msr and SEV-ES conditions (KVM: x86:
  do not set st->preempted when going back to user space)
- Use VCPU_STAT macro to expose preemption_reported and
  preemption_other fields (KVM: x86: do not report a vCPU as preempted
  outside instruction boundaries)

David Woodhouse (2):
  KVM: x86: Fix recording of guest steal time / preempted status
  KVM: Fix steal time asm constraints

Lai Jiangshan (1):
  KVM: x86: Ensure PV TLB flush tracepoint reflects KVM behavior

Paolo Bonzini (5):
  KVM: x86: do not set st->preempted when going back to user space
  KVM: x86: do not report a vCPU as preempted outside instruction
    boundaries
  KVM: x86: revalidate steal time cache if MSR value changes
  KVM: x86: do not report preemption if the steal time cache is stale
  KVM: x86: move guest_pv_has out of user_access section

Sean Christopherson (1):
  KVM: x86: Remove obsolete disabling of page faults in
    kvm_arch_vcpu_put()

 arch/x86/include/asm/kvm_host.h |   5 +-
 arch/x86/kvm/svm/svm.c          |   2 +
 arch/x86/kvm/vmx/vmx.c          |   1 +
 arch/x86/kvm/x86.c              | 164 ++++++++++++++++++++++----------
 4 files changed, 122 insertions(+), 50 deletions(-)

-- 
2.39.2

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes
  2023-05-10 18:15 [PATCH 0/9] KVM backports to 5.10 Rishabh Bhatnagar
@ 2023-05-10 18:15 ` Rishabh Bhatnagar
  0 siblings, 0 replies; 8+ messages in thread
From: Rishabh Bhatnagar @ 2023-05-10 18:15 UTC (permalink / raw)
  To: gregkh, stable
  Cc: lee, seanjc, kvm, bp, mingo, tglx, pbonzini, vkuznets, wanpengli,
	jmattson, joro, Dave Young, Xiaoying Yan, Dr . David Alan Gilbert,
	David Woodhouse, Rishabh Bhatnagar, Allen Pais

From: Paolo Bonzini <pbonzini@redhat.com>

commit 901d3765fa804ce42812f1d5b1f3de2dfbb26723 upstream.

Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time
/ preempted status", 2021-11-11) open coded the previous call to
kvm_map_gfn, but in doing so it dropped the comparison between the cached
guest physical address and the one in the MSR.  This cause an incorrect
cache hit if the guest modifies the steal time address while the memslots
remain the same.  This can happen with kexec, in which case the steal
time data is written at the address used by the old kernel instead of
the old one.

While at it, rename the variable from gfn to gpa since it is a plain
physical address and not a right-shifted one.

Reported-by: Dave Young <ruyang@redhat.com>
Reported-by: Xiaoying Yan  <yiyan@redhat.com>
Analyzed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: stable@vger.kernel.org
Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Rishabh Bhatnagar <risbhat@amazon.com>
Tested-by: Allen Pais <apais@linux.microsoft.com>
Acked-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/x86.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 116a225fb26e..0503bb8b64e5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3025,6 +3025,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
 	struct kvm_steal_time __user *st;
 	struct kvm_memslots *slots;
+	gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
 	u64 steal;
 	u32 version;
 
@@ -3037,13 +3038,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	slots = kvm_memslots(vcpu->kvm);
 
 	if (unlikely(slots->generation != ghc->generation ||
+		     gpa != ghc->gpa ||
 		     kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
-		gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
-
 		/* We rely on the fact that it fits in a single page. */
 		BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
 
-		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) ||
 		    kvm_is_error_hva(ghc->hva) || !ghc->memslot)
 			return;
 	}
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 0/9] KVM backports to 5.10
@ 2022-09-09 18:55 Rishabh Bhatnagar
  2022-09-09 18:55 ` [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes Rishabh Bhatnagar
  0 siblings, 1 reply; 8+ messages in thread
From: Rishabh Bhatnagar @ 2022-09-09 18:55 UTC (permalink / raw)
  To: stable
  Cc: gregkh, surajjs, mbacco, bp, mingo, tglx, pbonzini, seanjc,
	vkuznets, wanpengli, jmattson, joro, Rishabh Bhatnagar

This patch series backports a few VM preemption_status, steal_time and
PV TLB flushing fixes to 5.10 stable kernel.

Most of the changes backport cleanly except i had to work around a few
becauseof missing support/APIs in 5.10 kernel. I have captured those in
the changelog as well in the individual patches.

Changelog
- Use mark_page_dirty_in_slot api without kvm argument (KVM: x86: Fix
  recording of guest steal time / preempted status)
- Avoid checking for xen_msr and SEV-ES conditions (KVM: x86:
  do not set st->preempted when going back to user space)
- Use VCPU_STAT macro to expose preemption_reported and
  preemption_other fields (KVM: x86: do not report a vCPU as preempted
  outside instruction boundaries)

David Woodhouse (2):
  KVM: x86: Fix recording of guest steal time / preempted status
  KVM: Fix steal time asm constraints

Lai Jiangshan (1):
  KVM: x86: Ensure PV TLB flush tracepoint reflects KVM behavior

Paolo Bonzini (5):
  KVM: x86: do not set st->preempted when going back to user space
  KVM: x86: do not report a vCPU as preempted outside instruction
    boundaries
  KVM: x86: revalidate steal time cache if MSR value changes
  KVM: x86: do not report preemption if the steal time cache is stale
  KVM: x86: move guest_pv_has out of user_access section

Sean Christopherson (1):
  KVM: x86: Remove obsolete disabling of page faults in
    kvm_arch_vcpu_put()

 arch/x86/include/asm/kvm_host.h |   5 +-
 arch/x86/kvm/svm/svm.c          |   2 +
 arch/x86/kvm/vmx/vmx.c          |   1 +
 arch/x86/kvm/x86.c              | 164 ++++++++++++++++++++++----------
 4 files changed, 122 insertions(+), 50 deletions(-)

-- 
2.37.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes
  2022-09-09 18:55 [PATCH 0/9] KVM backports to 5.10 Rishabh Bhatnagar
@ 2022-09-09 18:55 ` Rishabh Bhatnagar
  0 siblings, 0 replies; 8+ messages in thread
From: Rishabh Bhatnagar @ 2022-09-09 18:55 UTC (permalink / raw)
  To: stable
  Cc: gregkh, surajjs, mbacco, bp, mingo, tglx, pbonzini, seanjc,
	vkuznets, wanpengli, jmattson, joro, Dave Young, Xiaoying Yan,
	Dr . David Alan Gilbert, David Woodhouse, Rishabh Bhatnagar

From: Paolo Bonzini <pbonzini@redhat.com>

commit 901d3765fa804ce42812f1d5b1f3de2dfbb26723 upstream.

Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time
/ preempted status", 2021-11-11) open coded the previous call to
kvm_map_gfn, but in doing so it dropped the comparison between the cached
guest physical address and the one in the MSR.  This cause an incorrect
cache hit if the guest modifies the steal time address while the memslots
remain the same.  This can happen with kexec, in which case the steal
time data is written at the address used by the old kernel instead of
the old one.

While at it, rename the variable from gfn to gpa since it is a plain
physical address and not a right-shifted one.

Reported-by: Dave Young <ruyang@redhat.com>
Reported-by: Xiaoying Yan  <yiyan@redhat.com>
Analyzed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: stable@vger.kernel.org
Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Rishabh Bhatnagar <risbhat@amazon.com>
---
 arch/x86/kvm/x86.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 75494b3c2d1e..111aa95f3de3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3018,6 +3018,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
 	struct kvm_steal_time __user *st;
 	struct kvm_memslots *slots;
+	gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
 	u64 steal;
 	u32 version;
 
@@ -3030,13 +3031,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	slots = kvm_memslots(vcpu->kvm);
 
 	if (unlikely(slots->generation != ghc->generation ||
+		     gpa != ghc->gpa ||
 		     kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
-		gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
-
 		/* We rely on the fact that it fits in a single page. */
 		BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
 
-		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) ||
 		    kvm_is_error_hva(ghc->hva) || !ghc->memslot)
 			return;
 	}
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 0/9] KVM backports to 5.10
@ 2022-09-09 18:13 Rishabh Bhatnagar
  2022-09-09 18:13 ` [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes Rishabh Bhatnagar
  0 siblings, 1 reply; 8+ messages in thread
From: Rishabh Bhatnagar @ 2022-09-09 18:13 UTC (permalink / raw)
  To: stable; +Cc: gregkh, surajjs, mbacco, Rishabh Bhatnagar

This patch series backports a few VM preemption_status, steal_time and
PV TLB flushing fixes to 5.10 stable kernel.

Most of the changes backport cleanly except i had to work around a few
becauseof missing support/APIs in 5.10 kernel. I have captured those in
the changelog as well in the individual patches.

Changelog
- Use mark_page_dirty_in_slot api without kvm argument (KVM: x86: Fix
  recording of guest steal time / preempted status)
- Avoid checking for xen_msr and SEV-ES conditions (KVM: x86:
  do not set st->preempted when going back to user space)
- Use VCPU_STAT macro to expose preemption_reported and
  preemption_other fields (KVM: x86: do not report a vCPU as preempted
  outside instruction boundaries)

David Woodhouse (2):
  KVM: x86: Fix recording of guest steal time / preempted status
  KVM: Fix steal time asm constraints

Lai Jiangshan (1):
  KVM: x86: Ensure PV TLB flush tracepoint reflects KVM behavior

Paolo Bonzini (5):
  KVM: x86: do not set st->preempted when going back to user space
  KVM: x86: do not report a vCPU as preempted outside instruction
    boundaries
  KVM: x86: revalidate steal time cache if MSR value changes
  KVM: x86: do not report preemption if the steal time cache is stale
  KVM: x86: move guest_pv_has out of user_access section

Sean Christopherson (1):
  KVM: x86: Remove obsolete disabling of page faults in
    kvm_arch_vcpu_put()

 arch/x86/include/asm/kvm_host.h |   5 +-
 arch/x86/kvm/svm/svm.c          |   2 +
 arch/x86/kvm/vmx/vmx.c          |   1 +
 arch/x86/kvm/x86.c              | 164 ++++++++++++++++++++++----------
 4 files changed, 122 insertions(+), 50 deletions(-)

-- 
2.37.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes
  2022-09-09 18:13 [PATCH 0/9] KVM backports to 5.10 Rishabh Bhatnagar
@ 2022-09-09 18:13 ` Rishabh Bhatnagar
  0 siblings, 0 replies; 8+ messages in thread
From: Rishabh Bhatnagar @ 2022-09-09 18:13 UTC (permalink / raw)
  To: stable
  Cc: gregkh, surajjs, mbacco, Paolo Bonzini, Dave Young, Xiaoying Yan,
	Dr . David Alan Gilbert, David Woodhouse, Rishabh Bhatnagar

From: Paolo Bonzini <pbonzini@redhat.com>

commit 901d3765fa804ce42812f1d5b1f3de2dfbb26723 upstream.

Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time
/ preempted status", 2021-11-11) open coded the previous call to
kvm_map_gfn, but in doing so it dropped the comparison between the cached
guest physical address and the one in the MSR.  This cause an incorrect
cache hit if the guest modifies the steal time address while the memslots
remain the same.  This can happen with kexec, in which case the steal
time data is written at the address used by the old kernel instead of
the old one.

While at it, rename the variable from gfn to gpa since it is a plain
physical address and not a right-shifted one.

Reported-by: Dave Young <ruyang@redhat.com>
Reported-by: Xiaoying Yan  <yiyan@redhat.com>
Analyzed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: stable@vger.kernel.org
Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Rishabh Bhatnagar <risbhat@amazon.com>
---
 arch/x86/kvm/x86.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 75494b3c2d1e..111aa95f3de3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3018,6 +3018,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
 	struct kvm_steal_time __user *st;
 	struct kvm_memslots *slots;
+	gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
 	u64 steal;
 	u32 version;
 
@@ -3030,13 +3031,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	slots = kvm_memslots(vcpu->kvm);
 
 	if (unlikely(slots->generation != ghc->generation ||
+		     gpa != ghc->gpa ||
 		     kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
-		gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
-
 		/* We rely on the fact that it fits in a single page. */
 		BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
 
-		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) ||
 		    kvm_is_error_hva(ghc->hva) || !ghc->memslot)
 			return;
 	}
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-05-10 18:16 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20221014222643.25815-1-risbhat@amazon.com>
2022-10-14 22:26 ` [PATCH 2/9] KVM: x86: Fix recording of guest steal time / preempted status Rishabh Bhatnagar
2022-10-15  5:45   ` Greg KH
2022-10-15 16:28     ` Bhatnagar, Rishabh
2022-10-14 22:26 ` [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes Rishabh Bhatnagar
2022-10-14 22:26 ` [PATCH 8/9] KVM: x86: do not report preemption if the steal time cache is stale Rishabh Bhatnagar
2023-05-10 18:15 [PATCH 0/9] KVM backports to 5.10 Rishabh Bhatnagar
2023-05-10 18:15 ` [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes Rishabh Bhatnagar
  -- strict thread matches above, loose matches on Subject: below --
2022-09-09 18:55 [PATCH 0/9] KVM backports to 5.10 Rishabh Bhatnagar
2022-09-09 18:55 ` [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes Rishabh Bhatnagar
2022-09-09 18:13 [PATCH 0/9] KVM backports to 5.10 Rishabh Bhatnagar
2022-09-09 18:13 ` [PATCH 7/9] KVM: x86: revalidate steal time cache if MSR value changes Rishabh Bhatnagar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).