public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH kernel] iommufd: Allow mapping from KVM's guest_memfd
@ 2026-02-25  7:52 Alexey Kardashevskiy
  2026-02-25 13:55 ` Sean Christopherson
  0 siblings, 1 reply; 15+ messages in thread
From: Alexey Kardashevskiy @ 2026-02-25  7:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Jason Gunthorpe, Kevin Tian, Joerg Roedel, Will Deacon,
	Robin Murphy, Paolo Bonzini, Steve Sistare, Nicolin Chen, iommu,
	Alexey Kardashevskiy, linux-coco, Dan Williams, Santosh Shukla,
	Pratik R . Sampat, Ackerley Tng, Sean Christopherson, Fuad Tabba,
	Xu Yilun, Aneesh Kumar K . V

CoCo VMs get their private memory allocated from guest_memfd
("gmemfd") which is a KVM facility similar to memfd.
The gmemfds does not allow mapping private memory to the userspace
so the IOMMU_IOAS_MAP ioctl does not work.

Use the existing IOMMU_IOAS_MAP_FILE ioctl to allow mapping from
fd + offset. Detect the gmemfd case in pfn_reader_user_pin().

For the new guest_memfd type, no additional reference is taken as
pinning is guaranteed by the KVM guest_memfd library.

There is no KVM-GMEMFD->IOMMUFD direct notification mechanism as
the assumption is that:
1) page stage change events will be handled by VMM which is going
to call IOMMUFD to remap pages;
2) shrinking GMEMFD equals to VM memory unplug and VMM is going to
handle it.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
---

This is for Trusted IO == TEE-IO == PCIe TDISP, etc.

Previously posted here:
https://lore.kernel.org/r/20250218111017.491719-13-aik@amd.com
The main comment was "what is the lifetime of those folios()" and
GMEMFD + QEMU should take care of it.

And horrendous stuff like this is not really useful:
https://github.com/AMDESE/linux-kvm/commit/7d73fd2cccb8489b1

---
 include/linux/kvm_host.h      |  4 +
 drivers/iommu/iommufd/pages.c | 80 ++++++++++++++++++--
 virt/kvm/guest_memfd.c        | 36 +++++++++
 3 files changed, 113 insertions(+), 7 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 995db7a7ba57..9369cf22b24e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2673,4 +2673,8 @@ unsigned long kvm_get_vm_memory_attributes(struct kvm *kvm, gfn_t gfn);
 int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
 					   struct kvm_memory_attributes2 *attrs);
 
+bool kvm_is_gmemfd(struct file *file);
+struct folio *kvm_gmemfd_get_pfn(struct file *file, unsigned long index,
+				 unsigned long *pfn, int *max_order);
+
 #endif
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index dbe51ecb9a20..4c07e39e17d0 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -56,6 +56,9 @@
 #include <linux/slab.h>
 #include <linux/sched/mm.h>
 #include <linux/vfio_pci_core.h>
+#include <linux/pagemap.h>
+#include <linux/memcontrol.h>
+#include <linux/kvm_host.h>
 
 #include "double_span.h"
 #include "io_pagetable.h"
@@ -660,7 +663,8 @@ static void batch_from_pages(struct pfn_batch *batch, struct page **pages,
 }
 
 static int batch_from_folios(struct pfn_batch *batch, struct folio ***folios_p,
-			     unsigned long *offset_p, unsigned long npages)
+			     unsigned long *offset_p, unsigned long npages,
+			     bool do_pin)
 {
 	int rc = 0;
 	struct folio **folios = *folios_p;
@@ -676,7 +680,7 @@ static int batch_from_folios(struct pfn_batch *batch, struct folio ***folios_p,
 
 		if (!batch_add_pfn_num(batch, pfn, nr, BATCH_CPU_MEMORY))
 			break;
-		if (nr > 1) {
+		if (nr > 1 && do_pin) {
 			rc = folio_add_pins(folio, nr - 1);
 			if (rc) {
 				batch_remove_pfn_num(batch, nr);
@@ -697,6 +701,7 @@ static int batch_from_folios(struct pfn_batch *batch, struct folio ***folios_p,
 static void batch_unpin(struct pfn_batch *batch, struct iopt_pages *pages,
 			unsigned int first_page_off, size_t npages)
 {
+	bool do_unpin = !kvm_is_gmemfd(pages->file);
 	unsigned int cur = 0;
 
 	while (first_page_off) {
@@ -710,9 +715,12 @@ static void batch_unpin(struct pfn_batch *batch, struct iopt_pages *pages,
 		size_t to_unpin = min_t(size_t, npages,
 					batch->npfns[cur] - first_page_off);
 
-		unpin_user_page_range_dirty_lock(
-			pfn_to_page(batch->pfns[cur] + first_page_off),
-			to_unpin, pages->writable);
+		/* Do nothing for guest_memfd */
+		if (do_unpin)
+			unpin_user_page_range_dirty_lock(
+				pfn_to_page(batch->pfns[cur] + first_page_off),
+				to_unpin, pages->writable);
+
 		iopt_pages_sub_npinned(pages, to_unpin);
 		cur++;
 		first_page_off = 0;
@@ -872,6 +880,57 @@ static long pin_memfd_pages(struct pfn_reader_user *user, unsigned long start,
 	return npages_out;
 }
 
+static long pin_guest_memfd_pages(struct pfn_reader_user *user, loff_t start, unsigned long npages)
+{
+	struct page **upages = user->upages;
+	unsigned long offset = 0;
+	loff_t uptr = start;
+	long rc = 0;
+
+	for (unsigned long i = 0; (uptr - start) < (npages << PAGE_SHIFT); ++i) {
+		unsigned long gfn = 0, pfn = 0;
+		int max_order = 0;
+		struct folio *folio;
+
+		folio = kvm_gmemfd_get_pfn(user->file, uptr >> PAGE_SHIFT, &pfn, &max_order);
+		if (IS_ERR(folio))
+			rc = PTR_ERR(folio);
+
+		if (rc == -EINVAL && i == 0) {
+			pr_err_once("Must be vfio mmio at gfn=%lx pfn=%lx, skipping\n", gfn, pfn);
+			return rc;
+		}
+
+		if (rc) {
+			pr_err("%s: %ld %ld %lx -> %lx\n", __func__,
+			       rc, i, (unsigned long) uptr, (unsigned long) pfn);
+			break;
+		}
+
+		if (i == 0)
+			offset = offset_in_folio(folio, start) >> PAGE_SHIFT;
+
+		user->ufolios[i] = folio;
+
+		if (upages) {
+			unsigned long np = (1UL << (max_order + PAGE_SHIFT)) - offset_in_folio(folio, uptr);
+
+			for (unsigned long j = 0; j < np; ++j)
+				*upages++ = folio_page(folio, offset + j);
+		}
+
+		uptr += (1UL << (max_order + PAGE_SHIFT)) - offset_in_folio(folio, uptr);
+	}
+
+	if (!rc) {
+		rc = npages;
+		user->ufolios_next = user->ufolios;
+		user->ufolios_offset = offset;
+	}
+
+	return rc;
+}
+
 static int pfn_reader_user_pin(struct pfn_reader_user *user,
 			       struct iopt_pages *pages,
 			       unsigned long start_index,
@@ -925,7 +984,13 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
 
 	if (user->file) {
 		start = pages->start + (start_index * PAGE_SIZE);
-		rc = pin_memfd_pages(user, start, npages);
+		if (kvm_is_gmemfd(pages->file)) {
+			rc = pin_guest_memfd_pages(user, start, npages);
+		} else {
+			pr_err("UNEXP PINFD start=%lx sz=%lx file=%lx",
+				start, npages << PAGE_SHIFT, (ulong) pages->file);
+			rc = pin_memfd_pages(user, start, npages);
+		}
 	} else if (!remote_mm) {
 		uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE);
 		rc = pin_user_pages_fast(uptr, npages, user->gup_flags,
@@ -1221,7 +1286,8 @@ static int pfn_reader_fill_span(struct pfn_reader *pfns)
 				 npages);
 	else
 		rc = batch_from_folios(&pfns->batch, &user->ufolios_next,
-				       &user->ufolios_offset, npages);
+				       &user->ufolios_offset, npages,
+				       !kvm_is_gmemfd(pfns->pages->file));
 	return rc;
 }
 
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index e4e21068cf2a..2a313888c21b 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -1794,3 +1794,39 @@ void kvm_gmem_exit(void)
 	rcu_barrier();
 	kmem_cache_destroy(kvm_gmem_inode_cachep);
 }
+
+bool kvm_is_gmemfd(struct file *file)
+{
+	if (!file)
+		return false;
+
+	if (file->f_op != &kvm_gmem_fops)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(kvm_is_gmemfd);
+
+struct folio *kvm_gmemfd_get_pfn(struct file *file, unsigned long index,
+				 unsigned long *pfn, int *max_order)
+{
+	struct inode *inode = file_inode(file);
+	struct folio *folio;
+
+	if (!inode || !kvm_is_gmemfd(file))
+		return NULL;
+
+	folio = kvm_gmem_get_folio(inode, index);
+	if (!folio)
+		return NULL;
+
+
+	*pfn = folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1));
+	*max_order = folio_order(folio);
+
+	folio_put(folio);
+	folio_unlock(folio);
+
+	return folio;
+}
+EXPORT_SYMBOL_GPL(kvm_gmemfd_get_pfn);
-- 
2.52.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2026-02-28 18:29 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-25  7:52 [RFC PATCH kernel] iommufd: Allow mapping from KVM's guest_memfd Alexey Kardashevskiy
2026-02-25 13:55 ` Sean Christopherson
2026-02-26  6:47   ` Alexey Kardashevskiy
2026-02-26 19:27     ` Jason Gunthorpe
2026-02-27 11:03       ` Xu Yilun
2026-02-26  8:19   ` Ackerley Tng
2026-02-26 19:07     ` Jason Gunthorpe
2026-02-26 22:40       ` Sean Christopherson
2026-02-27  0:21         ` Jason Gunthorpe
2026-02-27  0:28           ` Sean Christopherson
2026-02-27  1:09             ` Jason Gunthorpe
2026-02-27 10:35               ` Xu Yilun
2026-02-27 13:18                 ` Jason Gunthorpe
2026-02-28  4:14                   ` Xu Yilun
2026-02-28 18:29                     ` Jason Gunthorpe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox