From: James Houghton <jthoughton@google.com>
To: Sean Christopherson <seanjc@google.com>,
Paolo Bonzini <pbonzini@redhat.com>
Cc: David Matlack <dmatlack@google.com>,
David Rientjes <rientjes@google.com>,
James Houghton <jthoughton@google.com>,
Marc Zyngier <maz@kernel.org>,
Oliver Upton <oliver.upton@linux.dev>,
Wei Xu <weixugc@google.com>, Yu Zhao <yuzhao@google.com>,
Axel Rasmussen <axelrasmussen@google.com>,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v9 11/11] KVM: x86/mmu: Support rmap walks without holding mmu_lock when aging gfns
Date: Tue, 4 Feb 2025 00:40:38 +0000 [thread overview]
Message-ID: <20250204004038.1680123-12-jthoughton@google.com> (raw)
In-Reply-To: <20250204004038.1680123-1-jthoughton@google.com>
From: Sean Christopherson <seanjc@google.com>
When A/D bits are supported on sptes, it is safe to simply clear the
Accessed bits.
The less obvious case is marking sptes for access tracking in the
non-A/D case (for EPT only). In this case, we have to be sure that it is
okay for TLB entries to exist for non-present sptes. For example, when
doing dirty tracking, if we come across a non-present SPTE, we need to
know that we need to do a TLB invalidation.
This case is already supported today (as we already support *not* doing
TLBIs for clear_young(); there is a separate notifier for clearing *and*
flushing, clear_flush_young()). This works today because GET_DIRTY_LOG
flushes the TLB before returning to userspace.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Co-developed-by: James Houghton <jthoughton@google.com>
Signed-off-by: James Houghton <jthoughton@google.com>
---
arch/x86/kvm/mmu/mmu.c | 72 +++++++++++++++++++++++-------------------
1 file changed, 39 insertions(+), 33 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a0f735eeaaeb..57b99daa8614 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -970,7 +970,6 @@ static unsigned long kvm_rmap_get(struct kvm_rmap_head *rmap_head)
* actual locking is the same, but the caller is disallowed from modifying the
* rmap, and so the unlock flow is a nop if the rmap is/was empty.
*/
-__maybe_unused
static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head)
{
unsigned long rmap_val;
@@ -984,7 +983,6 @@ static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head)
return rmap_val;
}
-__maybe_unused
static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head,
unsigned long old_val)
{
@@ -1705,37 +1703,48 @@ static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot,
}
static bool kvm_rmap_age_gfn_range(struct kvm *kvm,
- struct kvm_gfn_range *range, bool test_only)
+ struct kvm_gfn_range *range,
+ bool test_only)
{
- struct slot_rmap_walk_iterator iterator;
+ struct kvm_rmap_head *rmap_head;
struct rmap_iterator iter;
+ unsigned long rmap_val;
bool young = false;
u64 *sptep;
+ gfn_t gfn;
+ int level;
+ u64 spte;
- for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
- range->start, range->end - 1, &iterator) {
- for_each_rmap_spte(iterator.rmap, &iter, sptep) {
- u64 spte = *sptep;
+ for (level = PG_LEVEL_4K; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
+ for (gfn = range->start; gfn < range->end;
+ gfn += KVM_PAGES_PER_HPAGE(level)) {
+ rmap_head = gfn_to_rmap(gfn, level, range->slot);
+ rmap_val = kvm_rmap_lock_readonly(rmap_head);
- if (!is_accessed_spte(spte))
- continue;
+ for_each_rmap_spte_lockless(rmap_head, &iter, sptep, spte) {
+ if (!is_accessed_spte(spte))
+ continue;
+
+ if (test_only) {
+ kvm_rmap_unlock_readonly(rmap_head, rmap_val);
+ return true;
+ }
- if (test_only)
- return true;
-
- if (spte_ad_enabled(spte)) {
- clear_bit((ffs(shadow_accessed_mask) - 1),
- (unsigned long *)sptep);
- } else {
- /*
- * WARN if mmu_spte_update() signals the need
- * for a TLB flush, as Access tracking a SPTE
- * should never trigger an _immediate_ flush.
- */
- spte = mark_spte_for_access_track(spte);
- WARN_ON_ONCE(mmu_spte_update(sptep, spte));
+ if (spte_ad_enabled(spte))
+ clear_bit((ffs(shadow_accessed_mask) - 1),
+ (unsigned long *)sptep);
+ else
+ /*
+ * If the following cmpxchg fails, the
+ * spte is being concurrently modified
+ * and should most likely stay young.
+ */
+ cmpxchg64(sptep, spte,
+ mark_spte_for_access_track(spte));
+ young = true;
}
- young = true;
+
+ kvm_rmap_unlock_readonly(rmap_head, rmap_val);
}
}
return young;
@@ -1753,11 +1762,8 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
if (tdp_mmu_enabled)
young = kvm_tdp_mmu_age_gfn_range(kvm, range);
- if (kvm_may_have_shadow_mmu_sptes(kvm)) {
- write_lock(&kvm->mmu_lock);
+ if (kvm_may_have_shadow_mmu_sptes(kvm))
young |= kvm_rmap_age_gfn_range(kvm, range, false);
- write_unlock(&kvm->mmu_lock);
- }
return young;
}
@@ -1769,11 +1775,11 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
if (tdp_mmu_enabled)
young = kvm_tdp_mmu_test_age_gfn(kvm, range);
- if (!young && kvm_may_have_shadow_mmu_sptes(kvm)) {
- write_lock(&kvm->mmu_lock);
+ if (young)
+ return young;
+
+ if (kvm_may_have_shadow_mmu_sptes(kvm))
young |= kvm_rmap_age_gfn_range(kvm, range, true);
- write_unlock(&kvm->mmu_lock);
- }
return young;
}
--
2.48.1.362.g079036d154-goog
next prev parent reply other threads:[~2025-02-04 0:41 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-04 0:40 [PATCH v9 00/11] KVM: x86/mmu: Age sptes locklessly James Houghton
2025-02-04 0:40 ` [PATCH v9 01/11] KVM: Rename kvm_handle_hva_range() James Houghton
2025-02-04 0:40 ` [PATCH v9 02/11] KVM: Add lockless memslot walk to KVM James Houghton
2025-02-14 15:26 ` Sean Christopherson
2025-02-14 19:27 ` James Houghton
2025-02-04 0:40 ` [PATCH v9 03/11] KVM: x86/mmu: Factor out spte atomic bit clearing routine James Houghton
2025-02-04 0:40 ` [PATCH v9 04/11] KVM: x86/mmu: Relax locking for kvm_test_age_gfn() and kvm_age_gfn() James Houghton
2025-02-12 22:07 ` Sean Christopherson
2025-02-13 0:25 ` James Houghton
2025-02-04 0:40 ` [PATCH v9 05/11] KVM: x86/mmu: Rename spte_has_volatile_bits() to spte_needs_atomic_write() James Houghton
2025-02-12 22:09 ` Sean Christopherson
2025-02-13 0:26 ` James Houghton
2025-02-04 0:40 ` [PATCH v9 06/11] KVM: x86/mmu: Skip shadow MMU test_young if TDP MMU reports page as young James Houghton
2025-02-04 0:40 ` [PATCH v9 07/11] KVM: x86/mmu: Only check gfn age in shadow MMU if indirect_shadow_pages > 0 James Houghton
2025-02-04 0:40 ` [PATCH v9 08/11] KVM: x86/mmu: Refactor low level rmap helpers to prep for walking w/o mmu_lock James Houghton
2025-02-04 0:40 ` [PATCH v9 09/11] KVM: x86/mmu: Add infrastructure to allow walking rmaps outside of mmu_lock James Houghton
2025-02-04 0:40 ` [PATCH v9 10/11] KVM: x86/mmu: Add support for lockless walks of rmap SPTEs James Houghton
2025-02-04 0:40 ` James Houghton [this message]
2025-02-15 0:50 ` [PATCH v9 00/11] KVM: x86/mmu: Age sptes locklessly Sean Christopherson
2025-02-18 19:29 ` Maxim Levitsky
2025-02-19 1:13 ` Sean Christopherson
2025-02-19 18:56 ` James Houghton
2025-02-25 22:00 ` Maxim Levitsky
2025-02-26 0:50 ` Sean Christopherson
2025-02-26 18:39 ` Maxim Levitsky
2025-02-27 0:51 ` Sean Christopherson
2025-02-27 1:54 ` Maxim Levitsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250204004038.1680123-12-jthoughton@google.com \
--to=jthoughton@google.com \
--cc=axelrasmussen@google.com \
--cc=dmatlack@google.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maz@kernel.org \
--cc=oliver.upton@linux.dev \
--cc=pbonzini@redhat.com \
--cc=rientjes@google.com \
--cc=seanjc@google.com \
--cc=weixugc@google.com \
--cc=yuzhao@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox