public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Takahiro Itazuri <itazur@amazon.com>
To: <kvm@vger.kernel.org>, Sean Christopherson <seanjc@google.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>,
	Fuad Tabba <tabba@google.com>,
	Brendan Jackman <jackmanb@google.com>,
	David Hildenbrand <david@kernel.org>,
	David Woodhouse <dwmw2@infradead.org>,
	Paul Durrant <pdurrant@amazon.com>,
	Nikita Kalyazin <kalyazin@amazon.com>,
	Patrick Roy <patrick.roy@campus.lmu.de>,
	Takahiro Itazuri <zulinx86@gmail.com>
Subject: [RFC PATCH v2 7/7] KVM: pfncache: Invalidate on gmem invalidation and memattr updates
Date: Thu, 26 Feb 2026 13:53:08 +0000	[thread overview]
Message-ID: <20260226135309.29493-8-itazur@amazon.com> (raw)
In-Reply-To: <20260226135309.29493-1-itazur@amazon.com>

Invalidate pfncaches when guest_memfd invalidation or memory attribute
updates render cached PFN resolutions stale.

Reuse active_invalidate_count to synchronize with the existing retry
logic and preserve ordering against mmu_invalidate_seq.

Invalidation needs to be performed using HVA ranges so that both
GPA-based and HVA-based pfncaches are covered.  Internally GPA-based
ones translate GPA to memslot/UHVA first and then resolve PFN, while
HVA-based ones only resolve PFN and do not store memslot/GPA context.
Technically, it is possible to make HVA-based pfncaches search the
corresponding memslot/GPA when activated / refreshed, but it would add
overhead to a greater ot lesser extent, regardless of guest_memfd-backed
or not.  At the time of writing, only Xen uses HVA-based pfncaches.

Signed-off-by: Takahiro Itazuri <itazur@amazon.com>
Suggested-by: David Hildenbrand (Red Hat) <david@kernel.org>
---
 virt/kvm/guest_memfd.c | 50 ++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c    | 45 +++++++++++++++++++++++++++++++++++++
 virt/kvm/pfncache.c    |  4 ++--
 3 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 79f34dad0c2f..eb2f1a7e54dc 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -215,6 +215,33 @@ static void __kvm_gmem_invalidate_start(struct gmem_file *f, pgoff_t start,
 	struct kvm *kvm = f->kvm;
 	unsigned long index;
 
+	/*
+	 * Prevent pfncaches from being activated / refreshed using stale PFN
+	 * resolutions.  To invalidate pfncaches _before_ invalidating the
+	 * secondary MMUs (i.e. without acquiring mmu_lock), pfncaches must use
+	 * active_invalidate_count instead of mmu_invalidate_in_progress.
+	 */
+	spin_lock(&kvm->invalidate_lock);
+	kvm->active_invalidate_count++;
+	spin_unlock(&kvm->invalidate_lock);
+
+	/*
+	 * Invalidation of pfncaches must be done using a HVA range.  pfncaches
+	 * can be either GPA-based or HVA-based, and all pfncaches store uhva
+	 * while HVA-based pfncaches do not have gpa/memslot info.  Thus,
+	 * using GFN ranges would miss invalidating HVA-based ones.
+	 */
+	xa_for_each_range(&f->bindings, index, slot, start, end - 1) {
+		pgoff_t pgoff = slot->gmem.pgoff;
+		gfn_t gfn_start = slot->base_gfn + max(pgoff, start) - pgoff;
+		gfn_t gfn_end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff;
+
+		unsigned long hva_start = gfn_to_hva_memslot(slot, gfn_start);
+		unsigned long hva_end = gfn_to_hva_memslot(slot, gfn_end);
+
+		gpc_invalidate_hva_range_start(kvm, hva_start, hva_end);
+	}
+
 	xa_for_each_range(&f->bindings, index, slot, start, end - 1) {
 		pgoff_t pgoff = slot->gmem.pgoff;
 
@@ -259,12 +286,35 @@ static void __kvm_gmem_invalidate_end(struct gmem_file *f, pgoff_t start,
 				      pgoff_t end)
 {
 	struct kvm *kvm = f->kvm;
+	bool wake;
 
 	if (xa_find(&f->bindings, &start, end - 1, XA_PRESENT)) {
 		KVM_MMU_LOCK(kvm);
 		kvm_mmu_invalidate_end(kvm);
 		KVM_MMU_UNLOCK(kvm);
 	}
+
+	/*
+	 * This must be done after the increment of mmu_invalidate_seq and
+	 * smp_wmb() in kvm_mmu_invalidate_end() to guarantee that
+	 * gpc_invalidate_retry() observes either the old (non-zero)
+	 * active_invalidate_count or the new (incremented) mmu_invalidate_seq.
+	 */
+	spin_lock(&kvm->invalidate_lock);
+	if (!WARN_ON_ONCE(!kvm->active_invalidate_count))
+		kvm->active_invalidate_count--;
+	wake = !kvm->active_invalidate_count;
+	spin_unlock(&kvm->invalidate_lock);
+
+	/*
+	 * guest_memfd invalidation itself doesn't need to block active memslots
+	 * swap as bindings updates are serialized by filemap_invalidate_lock().
+	 * However, active_invalidate_count is shared with the MMU notifier
+	 * path, so the waiter must be waked when active_invalidate_count drops
+	 * to zero.
+	 */
+	if (wake)
+		rcuwait_wake_up(&kvm->memslots_update_rcuwait);
 }
 
 static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f51056e971d0..f56b98c85175 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2583,6 +2583,8 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
 		.on_lock = kvm_mmu_invalidate_end,
 		.may_block = true,
 	};
+	struct kvm_memslots *slots = kvm_memslots(kvm);
+	struct kvm_memory_slot *slot;
 	unsigned long i;
 	void *entry;
 	int r = 0;
@@ -2609,6 +2611,34 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
 		cond_resched();
 	}
 
+	/*
+	 * Prevent pfncaches from being activated / refreshed using stale PFN
+	 * resolutions.  To invalidate pfncaches _before_ invalidating the
+	 * secondary MMUs (i.e. without acquiring mmu_lock), pfncaches must use
+	 * active_invalidate_count instead of mmu_invalidate_in_progress.
+	 */
+	spin_lock(&kvm->invalidate_lock);
+	kvm->active_invalidate_count++;
+	spin_unlock(&kvm->invalidate_lock);
+
+	/*
+	 * Invalidation of pfncaches must be done using a HVA range.  pfncaches
+	 * can be either GPA-based or HVA-based, and all pfncaches store uhva
+	 * while HVA-based pfncaches do not have gpa/memslot info.  Thus,
+	 * using GFN ranges would miss invalidating HVA-based ones.
+	 */
+	kvm_for_each_memslot(slot, slots) {
+		gfn_t gfn_start = max(start, slot->base_gfn);
+		gfn_t gfn_end = min(end, slot->base_gfn + slot->npages);
+
+		if (gfn_start < gfn_end) {
+			unsigned long hva_start = gfn_to_hva_memslot(slot, gfn_start);
+			unsigned long hva_end = gfn_to_hva_memslot(slot, gfn_end);
+
+			gpc_invalidate_hva_range_start(kvm, hva_start, hva_end);
+		}
+	}
+
 	kvm_handle_gfn_range(kvm, &pre_set_range);
 
 	for (i = start; i < end; i++) {
@@ -2620,6 +2650,21 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
 
 	kvm_handle_gfn_range(kvm, &post_set_range);
 
+	/*
+	 * This must be done after the increment of mmu_invalidate_seq and
+	 * smp_wmb() in kvm_mmu_invalidate_end() to guarantee that
+	 * gpc_invalidate_retry() observes either the old (non-zero)
+	 * active_invalidate_count or the new (incremented) mmu_invalidate_seq.
+	 *
+	 * memslots_update_rcuwait does not need to be waked when
+	 * active_invalidate_count drops to zero because active memslots swap is
+	 * also done while holding slots_lock.
+	 */
+	spin_lock(&kvm->invalidate_lock);
+	if (!WARN_ON_ONCE(!kvm->active_invalidate_count))
+		kvm->active_invalidate_count--;
+	spin_unlock(&kvm->invalidate_lock);
+
 out_unlock:
 	mutex_unlock(&kvm->slots_lock);
 
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 2880a36257c2..2b44da46d2ab 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -144,7 +144,7 @@ static void gpc_unmap(kvm_pfn_t pfn, void *khva)
 #endif
 }
 
-static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq)
+static inline bool gpc_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
 {
 	/*
 	 * active_invalidate_count acts for all intents and purposes like
@@ -274,7 +274,7 @@ static kvm_pfn_t gpc_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 		 * attempting to refresh.
 		 */
 		WARN_ON_ONCE(gpc->valid);
-	} while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq));
+	} while (gpc_invalidate_retry(gpc->kvm, mmu_seq));
 
 	gpc->valid = true;
 	gpc->pfn = new_pfn;
-- 
2.50.1


      parent reply	other threads:[~2026-02-26 13:54 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-26 13:53 [RFC PATCH v2 0/7] KVM: pfncache: Add guest_memfd support to pfncache Takahiro Itazuri
2026-02-26 13:53 ` [RFC PATCH v2 1/7] KVM: x86: Avoid silent kvm-clock activation failures Takahiro Itazuri
2026-03-05 17:50   ` Sean Christopherson
2026-03-10  5:58     ` Takahiro Itazuri
2026-02-26 13:53 ` [RFC PATCH v2 2/7] KVM: pfncache: Resolve PFNs via kvm_gmem_get_pfn() for gmem-backed GPAs Takahiro Itazuri
2026-02-26 13:53 ` [RFC PATCH v2 3/7] KVM: pfncache: Obtain KHVA via vmap() for gmem with NO_DIRECT_MAP Takahiro Itazuri
2026-02-26 13:53 ` [RFC PATCH v2 4/7] KVM: Rename invalidate_begin to invalidate_start for consistency Takahiro Itazuri
2026-02-26 13:53 ` [RFC PATCH v2 5/7] KVM: pfncache: Rename invalidate_start() helper Takahiro Itazuri
2026-02-26 13:53 ` [RFC PATCH v2 6/7] KVM: Rename mn_* invalidate-related fields to generic ones Takahiro Itazuri
2026-02-26 13:53 ` Takahiro Itazuri [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260226135309.29493-8-itazur@amazon.com \
    --to=itazur@amazon.com \
    --cc=david@kernel.org \
    --cc=dwmw2@infradead.org \
    --cc=jackmanb@google.com \
    --cc=kalyazin@amazon.com \
    --cc=kvm@vger.kernel.org \
    --cc=patrick.roy@campus.lmu.de \
    --cc=pbonzini@redhat.com \
    --cc=pdurrant@amazon.com \
    --cc=seanjc@google.com \
    --cc=tabba@google.com \
    --cc=vkuznets@redhat.com \
    --cc=zulinx86@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox