public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: James Houghton <jthoughton@google.com>
To: Sean Christopherson <seanjc@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Cc: David Matlack <dmatlack@google.com>,
	David Rientjes <rientjes@google.com>,
	 James Houghton <jthoughton@google.com>,
	Marc Zyngier <maz@kernel.org>,
	 Oliver Upton <oliver.upton@linux.dev>,
	Wei Xu <weixugc@google.com>, Yu Zhao <yuzhao@google.com>,
	 Axel Rasmussen <axelrasmussen@google.com>,
	kvm@vger.kernel.org,  linux-kernel@vger.kernel.org
Subject: [PATCH v9 11/11] KVM: x86/mmu: Support rmap walks without holding mmu_lock when aging gfns
Date: Tue,  4 Feb 2025 00:40:38 +0000	[thread overview]
Message-ID: <20250204004038.1680123-12-jthoughton@google.com> (raw)
In-Reply-To: <20250204004038.1680123-1-jthoughton@google.com>

From: Sean Christopherson <seanjc@google.com>

When A/D bits are supported on sptes, it is safe to simply clear the
Accessed bits.

The less obvious case is marking sptes for access tracking in the
non-A/D case (for EPT only). In this case, we have to be sure that it is
okay for TLB entries to exist for non-present sptes. For example, when
doing dirty tracking, if we come across a non-present SPTE, we need to
know that we need to do a TLB invalidation.

This case is already supported today (as we already support *not* doing
TLBIs for clear_young(); there is a separate notifier for clearing *and*
flushing, clear_flush_young()). This works today because GET_DIRTY_LOG
flushes the TLB before returning to userspace.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Co-developed-by: James Houghton <jthoughton@google.com>
Signed-off-by: James Houghton <jthoughton@google.com>
---
 arch/x86/kvm/mmu/mmu.c | 72 +++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a0f735eeaaeb..57b99daa8614 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -970,7 +970,6 @@ static unsigned long kvm_rmap_get(struct kvm_rmap_head *rmap_head)
  * actual locking is the same, but the caller is disallowed from modifying the
  * rmap, and so the unlock flow is a nop if the rmap is/was empty.
  */
-__maybe_unused
 static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head)
 {
 	unsigned long rmap_val;
@@ -984,7 +983,6 @@ static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head)
 	return rmap_val;
 }
 
-__maybe_unused
 static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head,
 				     unsigned long old_val)
 {
@@ -1705,37 +1703,48 @@ static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot,
 }
 
 static bool kvm_rmap_age_gfn_range(struct kvm *kvm,
-				   struct kvm_gfn_range *range, bool test_only)
+				   struct kvm_gfn_range *range,
+				   bool test_only)
 {
-	struct slot_rmap_walk_iterator iterator;
+	struct kvm_rmap_head *rmap_head;
 	struct rmap_iterator iter;
+	unsigned long rmap_val;
 	bool young = false;
 	u64 *sptep;
+	gfn_t gfn;
+	int level;
+	u64 spte;
 
-	for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
-				 range->start, range->end - 1, &iterator) {
-		for_each_rmap_spte(iterator.rmap, &iter, sptep) {
-			u64 spte = *sptep;
+	for (level = PG_LEVEL_4K; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
+		for (gfn = range->start; gfn < range->end;
+		     gfn += KVM_PAGES_PER_HPAGE(level)) {
+			rmap_head = gfn_to_rmap(gfn, level, range->slot);
+			rmap_val = kvm_rmap_lock_readonly(rmap_head);
 
-			if (!is_accessed_spte(spte))
-				continue;
+			for_each_rmap_spte_lockless(rmap_head, &iter, sptep, spte) {
+				if (!is_accessed_spte(spte))
+					continue;
+
+				if (test_only) {
+					kvm_rmap_unlock_readonly(rmap_head, rmap_val);
+					return true;
+				}
 
-			if (test_only)
-				return true;
-
-			if (spte_ad_enabled(spte)) {
-				clear_bit((ffs(shadow_accessed_mask) - 1),
-					(unsigned long *)sptep);
-			} else {
-				/*
-				 * WARN if mmu_spte_update() signals the need
-				 * for a TLB flush, as Access tracking a SPTE
-				 * should never trigger an _immediate_ flush.
-				 */
-				spte = mark_spte_for_access_track(spte);
-				WARN_ON_ONCE(mmu_spte_update(sptep, spte));
+				if (spte_ad_enabled(spte))
+					clear_bit((ffs(shadow_accessed_mask) - 1),
+						  (unsigned long *)sptep);
+				else
+					/*
+					 * If the following cmpxchg fails, the
+					 * spte is being concurrently modified
+					 * and should most likely stay young.
+					 */
+					cmpxchg64(sptep, spte,
+					      mark_spte_for_access_track(spte));
+				young = true;
 			}
-			young = true;
+
+			kvm_rmap_unlock_readonly(rmap_head, rmap_val);
 		}
 	}
 	return young;
@@ -1753,11 +1762,8 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	if (tdp_mmu_enabled)
 		young = kvm_tdp_mmu_age_gfn_range(kvm, range);
 
-	if (kvm_may_have_shadow_mmu_sptes(kvm)) {
-		write_lock(&kvm->mmu_lock);
+	if (kvm_may_have_shadow_mmu_sptes(kvm))
 		young |= kvm_rmap_age_gfn_range(kvm, range, false);
-		write_unlock(&kvm->mmu_lock);
-	}
 
 	return young;
 }
@@ -1769,11 +1775,11 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	if (tdp_mmu_enabled)
 		young = kvm_tdp_mmu_test_age_gfn(kvm, range);
 
-	if (!young && kvm_may_have_shadow_mmu_sptes(kvm)) {
-		write_lock(&kvm->mmu_lock);
+	if (young)
+		return young;
+
+	if (kvm_may_have_shadow_mmu_sptes(kvm))
 		young |= kvm_rmap_age_gfn_range(kvm, range, true);
-		write_unlock(&kvm->mmu_lock);
-	}
 
 	return young;
 }
-- 
2.48.1.362.g079036d154-goog


  parent reply	other threads:[~2025-02-04  0:41 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-04  0:40 [PATCH v9 00/11] KVM: x86/mmu: Age sptes locklessly James Houghton
2025-02-04  0:40 ` [PATCH v9 01/11] KVM: Rename kvm_handle_hva_range() James Houghton
2025-02-04  0:40 ` [PATCH v9 02/11] KVM: Add lockless memslot walk to KVM James Houghton
2025-02-14 15:26   ` Sean Christopherson
2025-02-14 19:27     ` James Houghton
2025-02-04  0:40 ` [PATCH v9 03/11] KVM: x86/mmu: Factor out spte atomic bit clearing routine James Houghton
2025-02-04  0:40 ` [PATCH v9 04/11] KVM: x86/mmu: Relax locking for kvm_test_age_gfn() and kvm_age_gfn() James Houghton
2025-02-12 22:07   ` Sean Christopherson
2025-02-13  0:25     ` James Houghton
2025-02-04  0:40 ` [PATCH v9 05/11] KVM: x86/mmu: Rename spte_has_volatile_bits() to spte_needs_atomic_write() James Houghton
2025-02-12 22:09   ` Sean Christopherson
2025-02-13  0:26     ` James Houghton
2025-02-04  0:40 ` [PATCH v9 06/11] KVM: x86/mmu: Skip shadow MMU test_young if TDP MMU reports page as young James Houghton
2025-02-04  0:40 ` [PATCH v9 07/11] KVM: x86/mmu: Only check gfn age in shadow MMU if indirect_shadow_pages > 0 James Houghton
2025-02-04  0:40 ` [PATCH v9 08/11] KVM: x86/mmu: Refactor low level rmap helpers to prep for walking w/o mmu_lock James Houghton
2025-02-04  0:40 ` [PATCH v9 09/11] KVM: x86/mmu: Add infrastructure to allow walking rmaps outside of mmu_lock James Houghton
2025-02-04  0:40 ` [PATCH v9 10/11] KVM: x86/mmu: Add support for lockless walks of rmap SPTEs James Houghton
2025-02-04  0:40 ` James Houghton [this message]
2025-02-15  0:50 ` [PATCH v9 00/11] KVM: x86/mmu: Age sptes locklessly Sean Christopherson
2025-02-18 19:29 ` Maxim Levitsky
2025-02-19  1:13   ` Sean Christopherson
2025-02-19 18:56     ` James Houghton
2025-02-25 22:00     ` Maxim Levitsky
2025-02-26  0:50       ` Sean Christopherson
2025-02-26 18:39         ` Maxim Levitsky
2025-02-27  0:51           ` Sean Christopherson
2025-02-27  1:54             ` Maxim Levitsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250204004038.1680123-12-jthoughton@google.com \
    --to=jthoughton@google.com \
    --cc=axelrasmussen@google.com \
    --cc=dmatlack@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=oliver.upton@linux.dev \
    --cc=pbonzini@redhat.com \
    --cc=rientjes@google.com \
    --cc=seanjc@google.com \
    --cc=weixugc@google.com \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox