public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Takahiro Itazuri <itazur@amazon.com>
To: <kvm@vger.kernel.org>, Sean Christopherson <seanjc@google.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>,
	Fuad Tabba <tabba@google.com>,
	Brendan Jackman <jackmanb@google.com>,
	David Hildenbrand <david@kernel.org>,
	David Woodhouse <dwmw2@infradead.org>,
	Paul Durrant <pdurrant@amazon.com>,
	Nikita Kalyazin <nikita.kalyazin@linux.dev>,
	Patrick Roy <patrick.roy@campus.lmu.de>,
	Patrick Roy <patrick.roy@linux.dev>,
	"Derek Manwaring" <derekmn@amazon.com>,
	Alina Cernea <acernea@amazon.com>,
	"Michael Zoumboulakis" <zoumboul@amazon.com>,
	Takahiro Itazuri <zulinx86@gmail.com>,
	Takahiro Itazuri <itazur@amazon.com>
Subject: [RFC PATCH v4 1/7] KVM: pfncache: Resolve PFNs via kvm_gmem_get_pfn() for gmem-backed GPAs
Date: Mon, 20 Apr 2026 15:46:02 +0000	[thread overview]
Message-ID: <20260420154720.29012-2-itazur@amazon.com> (raw)
In-Reply-To: <20260420154720.29012-1-itazur@amazon.com>

Currently, pfncaches always resolve PFNs via hva_to_pfn(), which
requires a userspace mapping and relies on GUP.  This does not work for
guest_memfd in the following two ways:

  * guest_memfd created without MMAP flag does not have a userspace
    mapping.

  * guest_memfd created with NO_DIRECT_MAP flag uses an AS_NO_DIRECT_MAP
    mapping, which is rejected by GUP.

Resolve PFNs via kvm_gmem_get_pfn() for guest_memfd-backed and GPA-based
pfncaches.  Otherwise, fall back to the existing hva_to_pfn().

The current implementation does not support HVA-based pfncaches for
NO_DIRECT_MAP guest_memfd.  HVA-based pfncaches do not store
memslot/GPA context, so they cannot determine whether the target is
guest_memfd-backed and always fall back to hva_to_pfn().  Adding a
memslot/GPA lookup is possibile but would add overhead to all HVA-based
pfncache activations and refreshes.  At the time of writing, only Xen
uses HVA-based pfncaches.

Signed-off-by: Takahiro Itazuri <itazur@amazon.com>
---
 virt/kvm/pfncache.c | 66 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 12 deletions(-)

diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 728d2c1b488a..ad41cf3e8df4 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -152,7 +152,53 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s
 	return kvm->mmu_invalidate_seq != mmu_seq;
 }
 
-static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
+/*
+ * Determine whether a GPA-based pfncache is backed by guest_memfd, i.e. needs
+ * to be resolved via kvm_gmem_get_pfn() rather than GUP.
+ *
+ * The caller holds gpc->refresh_lock, but does not hold gpc->lock nor
+ * kvm->slots_lock.  Reading slot->flags (via kvm_slot_has_gmem() and
+ * kvm_memslot_is_gmem_only()) is safe because memslot changes bump
+ * slots->generation, which is detected in kvm_gpc_check(), forcing callers
+ * to invoke kvm_gpc_refresh().
+ *
+ * Looking up memory attributes (via kvm_mem_is_private()) can race with
+ * KVM_SET_MEMORY_ATTRIBUTES, which takes kvm->slots_lock to serialize
+ * writers but doesn't exclude lockless readers.  Handling that race is deferred
+ * to a subsequent commit that wires up pfncache invalidation for gmem events.
+ */
+static inline bool gpc_is_gmem_backed(struct gfn_to_pfn_cache *gpc)
+{
+	lockdep_assert_held(&gpc->refresh_lock);
+
+	/* For HVA-based pfncaches, memslot is NULL */
+	return gpc->memslot && kvm_slot_has_gmem(gpc->memslot) &&
+	       (kvm_memslot_is_gmem_only(gpc->memslot) ||
+		kvm_mem_is_private(gpc->kvm, gpa_to_gfn(gpc->gpa)));
+}
+
+static kvm_pfn_t gpc_to_pfn(struct gfn_to_pfn_cache *gpc, struct page **page)
+{
+	if (gpc_is_gmem_backed(gpc)) {
+		kvm_pfn_t pfn;
+
+		if (kvm_gmem_get_pfn(gpc->kvm, gpc->memslot,
+				     gpa_to_gfn(gpc->gpa), &pfn, page, NULL))
+			return KVM_PFN_ERR_FAULT;
+
+		return pfn;
+	}
+
+	return hva_to_pfn(&(struct kvm_follow_pfn) {
+		.slot = gpc->memslot,
+		.gfn = gpa_to_gfn(gpc->gpa),
+		.flags = FOLL_WRITE,
+		.hva = gpc->uhva,
+		.refcounted_page = page,
+	});
+}
+
+static kvm_pfn_t gpc_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 {
 	/* Note, the new page offset may be different than the old! */
 	void *old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva);
@@ -161,14 +207,6 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 	unsigned long mmu_seq;
 	struct page *page;
 
-	struct kvm_follow_pfn kfp = {
-		.slot = gpc->memslot,
-		.gfn = gpa_to_gfn(gpc->gpa),
-		.flags = FOLL_WRITE,
-		.hva = gpc->uhva,
-		.refcounted_page = &page,
-	};
-
 	lockdep_assert_held(&gpc->refresh_lock);
 
 	lockdep_assert_held_write(&gpc->lock);
@@ -206,7 +244,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 			cond_resched();
 		}
 
-		new_pfn = hva_to_pfn(&kfp);
+		new_pfn = gpc_to_pfn(gpc, &page);
 		if (is_error_noslot_pfn(new_pfn))
 			goto out_error;
 
@@ -319,7 +357,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l
 		}
 	}
 
-	/* Note: the offset must be correct before calling hva_to_pfn_retry() */
+	/* Note: the offset must be correct before calling gpc_to_pfn_retry() */
 	gpc->uhva += page_offset;
 
 	/*
@@ -327,7 +365,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l
 	 * drop the lock and do the HVA to PFN lookup again.
 	 */
 	if (!gpc->valid || hva_change) {
-		ret = hva_to_pfn_retry(gpc);
+		ret = gpc_to_pfn_retry(gpc);
 	} else {
 		/*
 		 * If the HVA→PFN mapping was already valid, don't unmap it.
@@ -441,6 +479,10 @@ int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long uhva, unsig
 	if (!access_ok((void __user *)uhva, len))
 		return -EINVAL;
 
+	/*
+	 * HVA-based caches always resolve PFNs via GUP (hva_to_pfn()), which
+	 * does not work for NO_DIRECT_MAP guest_memfd.
+	 */
 	return __kvm_gpc_activate(gpc, INVALID_GPA, uhva, len);
 }
 
-- 
2.50.1


  reply	other threads:[~2026-04-20 15:47 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-20 15:46 [RFC PATCH v4 0/7] KVM: pfncache: Add guest_memfd support to pfncache Takahiro Itazuri
2026-04-20 15:46 ` Takahiro Itazuri [this message]
2026-04-20 15:46 ` [RFC PATCH v4 2/7] KVM: pfncache: Obtain KHVA via vmap() for gmem with NO_DIRECT_MAP Takahiro Itazuri
2026-04-20 15:46 ` [RFC PATCH v4 3/7] KVM: Rename invalidate_begin to invalidate_start for consistency Takahiro Itazuri
2026-04-20 15:46 ` [RFC PATCH v4 4/7] KVM: pfncache: Rename invalidate_start() helper Takahiro Itazuri
2026-04-20 15:46 ` [RFC PATCH v4 5/7] KVM: pfncache: Invalidate on gmem invalidation and memattr updates Takahiro Itazuri
2026-04-20 15:46 ` [RFC PATCH v4 6/7] KVM: selftests: Test pfncache with gmem-backed memory Takahiro Itazuri
2026-04-20 15:46 ` [RFC PATCH v4 7/7] KVM: selftests: Test pfncache invalidation for " Takahiro Itazuri

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260420154720.29012-2-itazur@amazon.com \
    --to=itazur@amazon.com \
    --cc=acernea@amazon.com \
    --cc=david@kernel.org \
    --cc=derekmn@amazon.com \
    --cc=dwmw2@infradead.org \
    --cc=jackmanb@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=nikita.kalyazin@linux.dev \
    --cc=patrick.roy@campus.lmu.de \
    --cc=patrick.roy@linux.dev \
    --cc=pbonzini@redhat.com \
    --cc=pdurrant@amazon.com \
    --cc=seanjc@google.com \
    --cc=tabba@google.com \
    --cc=vkuznets@redhat.com \
    --cc=zoumboul@amazon.com \
    --cc=zulinx86@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox