From: Sean Christopherson <seanjc@google.com>
To: David Matlack <dmatlack@google.com>
Cc: Ben Gardon <bgardon@google.com>,
Vipin Sharma <vipinsh@google.com>,
pbonzini@redhat.com, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: Re: [Patch] KVM: x86/mmu: Make optimized __handle_changed_spte() for clear dirty log
Date: Mon, 30 Jan 2023 23:49:47 +0000 [thread overview]
Message-ID: <Y9hXmz/nDOr1hQal@google.com> (raw)
In-Reply-To: <CALzav=fkmS0U4tb4trDtbCmDxo27EcJeOWbiwb+meagj9+PFiw@mail.gmail.com>
On Mon, Jan 30, 2023, David Matlack wrote:
> On Wed, Jan 25, 2023 at 5:49 PM Sean Christopherson <seanjc@google.com> wrote:
> [...]
> > ---
> > arch/x86/kvm/mmu/tdp_mmu.c | 92 ++++++++++----------------------------
> > 1 file changed, 24 insertions(+), 68 deletions(-)
> >
> > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> > index bba33aea0fb0..2f78ca43a276 100644
> > --- a/arch/x86/kvm/mmu/tdp_mmu.c
> > +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> [...]
> > @@ -1289,8 +1244,7 @@ static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter,
> > new_spte = mark_spte_for_access_track(new_spte);
> > }
> >
> > - tdp_mmu_set_spte_no_acc_track(kvm, iter, new_spte);
> > -
> > + kvm_tdp_mmu_write_spte(iter->sptep, iter->old_spte, new_spte, iter->level);
>
> This can race with fast_page_fault() setting the W-bit and the CPU
> setting the D-bit. i.e. This call to kvm_tdp_mmu_write_spte() could
> clear the W-bit or D-bit.
Ugh, right. Hrm. Duh, I just didn't go far enough. A straight XCHG is silly.
Except for the access-tracking mess, KVM wants to clear a single bit. Writing
the entire thing and potentially clobbering bits is wasteful and unnecessarily
dangerous. And the access-tracking code already needs special handling.
We can just simplify this all by adding a helper to clear a single bit (and
maybe even use clear_bit() and __clear_bit() if we save the bit number for the
W/A/D bits and not just the mask). And if it weren't for EPT (different A/D
locations), we could even use static asserts to restrict the usage to the W/A/D
bits :-/ Oh well.
E.g. this
static inline void kvm_tdp_mmu_clear_spte_bit(struct tdp_iter *iter, u64 mask)
{
if (kvm_tdp_mmu_spte_has_volatile_bits(iter->old_spte, iter->level))
atomic64_and(~mask, (atomic64_t *)rcu_dereference(iter->sptep));
else
__kvm_tdp_mmu_write_spte(iter->sptep, iter->old_spte & ~mask);
}
yields another untested patch:
---
arch/x86/kvm/mmu/tdp_iter.h | 24 +++++++++++++++++++-----
arch/x86/kvm/mmu/tdp_mmu.c | 33 +++++++++++++--------------------
2 files changed, 32 insertions(+), 25 deletions(-)
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index f0af385c56e0..09c8f2640ccc 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -29,11 +29,10 @@ static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
WRITE_ONCE(*rcu_dereference(sptep), new_spte);
}
-static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
- u64 new_spte, int level)
+static inline bool kvm_tdp_mmu_spte_has_volatile_bits(u64 old_spte, int level)
{
/*
- * Atomically write the SPTE if it is a shadow-present, leaf SPTE with
+ * Atomically write SPTEs if it is a shadow-present, leaf SPTE with
* volatile bits, i.e. has bits that can be set outside of mmu_lock.
* The Writable bit can be set by KVM's fast page fault handler, and
* Accessed and Dirty bits can be set by the CPU.
@@ -44,8 +43,15 @@ static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
* logic needs to be reassessed if KVM were to use non-leaf Accessed
* bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
*/
- if (is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) &&
- spte_has_volatile_bits(old_spte))
+ return is_shadow_present_pte(old_spte) &&
+ is_last_spte(old_spte, level) &&
+ spte_has_volatile_bits(old_spte);
+}
+
+static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
+ u64 new_spte, int level)
+{
+ if (kvm_tdp_mmu_spte_has_volatile_bits(old_spte, level))
return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
__kvm_tdp_mmu_write_spte(sptep, new_spte);
@@ -115,4 +121,12 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
void tdp_iter_next(struct tdp_iter *iter);
void tdp_iter_restart(struct tdp_iter *iter);
+static inline void kvm_tdp_mmu_clear_spte_bit(struct tdp_iter *iter, u64 mask)
+{
+ if (kvm_tdp_mmu_spte_has_volatile_bits(iter->old_spte, iter->level))
+ atomic64_and(~mask, (atomic64_t *)rcu_dereference(iter->sptep));
+ else
+ __kvm_tdp_mmu_write_spte(iter->sptep, iter->old_spte & ~mask);
+}
+
#endif /* __KVM_X86_MMU_TDP_ITER_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 2f78ca43a276..32a7209a522d 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1223,28 +1223,26 @@ static __always_inline bool kvm_tdp_mmu_handle_gfn(struct kvm *kvm,
static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter,
struct kvm_gfn_range *range)
{
- u64 new_spte = 0;
+ u64 new_spte;
/* If we have a non-accessed entry we don't need to change the pte. */
if (!is_accessed_spte(iter->old_spte))
return false;
- new_spte = iter->old_spte;
-
- if (spte_ad_enabled(new_spte)) {
- new_spte &= ~shadow_accessed_mask;
+ if (spte_ad_enabled(iter->old_spte)) {
+ kvm_tdp_mmu_clear_spte_bit(iter, shadow_accessed_mask);
} else {
+ new_spte = mark_spte_for_access_track(iter->old_spte);
+ iter->old_spte = kvm_tdp_mmu_write_spte(iter->sptep, iter->old_spte,
+ new_spte, iter->level);
/*
* Capture the dirty status of the page, so that it doesn't get
* lost when the SPTE is marked for access tracking.
*/
- if (is_writable_pte(new_spte))
- kvm_set_pfn_dirty(spte_to_pfn(new_spte));
-
- new_spte = mark_spte_for_access_track(new_spte);
+ if (is_writable_pte(iter->old_spte))
+ kvm_set_pfn_dirty(spte_to_pfn(iter->old_spte));
}
- kvm_tdp_mmu_write_spte(iter->sptep, iter->old_spte, new_spte, iter->level);
return true;
}
@@ -1632,7 +1630,6 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t gfn, unsigned long mask, bool wrprot)
{
struct tdp_iter iter;
- u64 new_spte;
rcu_read_lock();
@@ -1648,20 +1645,16 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
mask &= ~(1UL << (iter.gfn - gfn));
if (wrprot || spte_ad_need_write_protect(iter.old_spte)) {
- if (is_writable_pte(iter.old_spte))
- new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
- else
+ if (!is_writable_pte(iter.old_spte))
continue;
+
+ kvm_tdp_mmu_clear_spte_bit(&iter, PT_WRITABLE_MASK);
} else {
- if (iter.old_spte & shadow_dirty_mask)
- new_spte = iter.old_spte & ~shadow_dirty_mask;
- else
+ if (!(iter.old_spte & shadow_dirty_mask))
continue;
- kvm_set_pfn_dirty(spte_to_pfn(iter.old_spte));
+ kvm_tdp_mmu_clear_spte_bit(&iter, shadow_dirty_mask);
}
-
- kvm_tdp_mmu_write_spte(iter.sptep, iter.old_spte, new_spte, iter.level);
}
rcu_read_unlock();
base-commit: 7bb67c88a2d77cc95524912f3b1ca51daf5c0224
--
next prev parent reply other threads:[~2023-01-30 23:49 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-25 21:38 [Patch] KVM: x86/mmu: Make optimized __handle_changed_spte() for clear dirty log Vipin Sharma
2023-01-25 22:00 ` David Matlack
2023-01-25 22:25 ` Ben Gardon
2023-01-26 0:40 ` Vipin Sharma
2023-01-26 1:49 ` Sean Christopherson
2023-01-28 17:20 ` Vipin Sharma
2023-01-30 18:09 ` Sean Christopherson
2023-01-30 20:17 ` Vipin Sharma
2023-01-30 22:49 ` Sean Christopherson
2023-01-30 22:12 ` David Matlack
2023-01-30 23:49 ` Sean Christopherson [this message]
2023-01-31 17:41 ` David Matlack
2023-01-31 18:01 ` Vipin Sharma
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y9hXmz/nDOr1hQal@google.com \
--to=seanjc@google.com \
--cc=bgardon@google.com \
--cc=dmatlack@google.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=vipinsh@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox