The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: Narayana Murty N <nnmlinux@linux.ibm.com>
To: alex@shazbot.org, dmatlack@google.com, shuah@kernel.org
Cc: amastro@fb.com, rananta@google.com, nnmlinux@linux.ibm.com,
	kvm@vger.kernel.org, linux-kselftest@vger.kernel.org,
	linux-kernel@vger.kernel.org, vaibhav@linux.ibm.com,
	sbhat@linux.ibm.com, harshpb@linux.ibm.com
Subject: [RFC PATCH 3/6] selftests/vfio: add sPAPR TCE v2 DMA window helpers
Date: Thu,  2 Jul 2026 23:28:03 -0400	[thread overview]
Message-ID: <20260703032806.40946-4-nnmlinux@linux.ibm.com> (raw)
In-Reply-To: <20260703032806.40946-1-nnmlinux@linux.ibm.com>

Add helper support for sPAPR TCE v2 DMA windows in the VFIO selftest
library.

Track the platform default DMA window separately from selftest-created
dynamic DMA windows. The default window is discovered with
VFIO_IOMMU_SPAPR_TCE_GET_INFO and is not removed during cleanup.

Add helpers to create and remove DDWs, return the active IOVA range, and
register/unregister memory around DMA map/unmap operations. Window
selection is done before IOVA allocation; the map path only validates,
registers memory, and calls VFIO_IOMMU_MAP_DMA.

Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>
---
 .../vfio/lib/include/libvfio/iommu.h          |  18 ++
 tools/testing/selftests/vfio/lib/iommu.c      | 273 +++++++++++++++++-
 tools/testing/selftests/vfio/lib/libvfio.c    |   2 -
 3 files changed, 288 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
index ba027a7270d0..c9b9ab929f1f 100644
--- a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
@@ -24,12 +24,26 @@ struct dma_region {
 	u64 size;
 };
 
+struct spapr_tce_window {
+	u64 start;
+	u64 size;
+	u32 page_shift;
+	bool valid;
+	bool dynamic;
+	bool remove_on_cleanup;
+};
+
 struct iommu {
 	const struct iommu_mode *mode;
 	int container_fd;
 	int iommufd;
 	u32 ioas_id;
 	struct list_head dma_regions;
+#ifdef __powerpc__
+	struct spapr_tce_window default_window;
+	struct spapr_tce_window ddw_window;
+	struct spapr_tce_window *active_window;
+#endif
 };
 
 struct iommu *iommu_init(const char *iommu_mode);
@@ -61,6 +75,10 @@ iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr);
 
 struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges);
 
+int iommu_prepare_dma_window(struct iommu *iommu, u64 min_size,
+			     u64 page_size, bool force_dynamic);
+bool iommu_supports_unmap_all(struct iommu *iommu);
+
 #define MODE_VFIO_TYPE1_IOMMU "vfio_type1_iommu"
 #define MODE_VFIO_TYPE1V2_IOMMU "vfio_type1v2_iommu"
 #define MODE_IOMMUFD_COMPAT_TYPE1 "iommufd_compat_type1"
diff --git a/tools/testing/selftests/vfio/lib/iommu.c b/tools/testing/selftests/vfio/lib/iommu.c
index e86457a715bf..19d19bb45a14 100644
--- a/tools/testing/selftests/vfio/lib/iommu.c
+++ b/tools/testing/selftests/vfio/lib/iommu.c
@@ -16,9 +16,11 @@
 #include <linux/types.h>
 #include <linux/vfio.h>
 #include <linux/iommufd.h>
+#include <linux/sizes.h>
 
 #include "../../../kselftest.h"
 #include <libvfio.h>
+#include <limits.h>
 
 const char *default_iommu_mode = MODE_IOMMUFD;
 
@@ -93,6 +95,233 @@ int __iommu_hva2iova(struct iommu *iommu, void *vaddr, iova_t *iova)
 	return -ENOENT;
 }
 
+#ifdef __powerpc__
+static bool iommu_is_spapr_tce_v2(struct iommu *iommu)
+{
+	return !!(iommu->mode->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU);
+}
+
+static u32 page_size_to_shift(u64 page_size)
+{
+	return __builtin_ctzll(page_size);
+}
+
+static int spapr_tce_read_default_window(struct iommu *iommu)
+{
+	struct vfio_iommu_spapr_tce_info info = {
+		.argsz = sizeof(info),
+	};
+
+	if (iommu->default_window.valid)
+		return 0;
+
+	if (ioctl(iommu->container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info))
+		return -errno;
+
+	iommu->default_window.start = info.dma32_window_start;
+	iommu->default_window.size = info.dma32_window_size;
+	iommu->default_window.page_shift = page_size_to_shift(getpagesize());
+	iommu->default_window.valid = true;
+	iommu->default_window.dynamic = false;
+	iommu->default_window.remove_on_cleanup = false;
+
+	return 0;
+}
+
+static int spapr_tce_create_ddw(struct iommu *iommu, u64 min_size, u32 page_shift)
+{
+	struct vfio_iommu_spapr_tce_create create = {
+		.argsz = sizeof(create),
+		.page_shift = page_shift,
+		.levels = 1,
+		.window_size = min_size,
+	};
+
+	if (ioctl(iommu->container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create))
+		return -errno;
+
+	iommu->ddw_window.start = create.start_addr;
+	iommu->ddw_window.size = create.window_size;
+	iommu->ddw_window.page_shift = page_shift;
+	iommu->ddw_window.valid = true;
+	iommu->ddw_window.dynamic = true;
+	iommu->ddw_window.remove_on_cleanup = true;
+
+	return 0;
+}
+
+static int spapr_tce_remove_window(struct iommu *iommu, struct spapr_tce_window *window)
+{
+	struct vfio_iommu_spapr_tce_remove remove = {
+		.argsz = sizeof(remove),
+		.start_addr = window->start,
+	};
+
+	if (!window->valid || !window->remove_on_cleanup)
+		return 0;
+
+	if (ioctl(iommu->container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove))
+		return -errno;
+
+	window->valid = false;
+	return 0;
+}
+
+static bool spapr_tce_window_satisfies(struct spapr_tce_window *window,
+				       u64 min_size, u32 page_shift,
+				       bool require_dynamic)
+{
+	if (!window || !window->valid)
+		return false;
+
+	if (require_dynamic && !window->dynamic)
+		return false;
+
+	if (window->size < min_size)
+		return false;
+
+	if (window->page_shift != page_shift)
+		return false;
+
+	return true;
+}
+
+static struct iommu_iova_range *spapr_tce_iova_ranges(struct iommu *iommu, u32 *nranges)
+{
+	struct iommu_iova_range *ranges;
+	int ret;
+
+	if (!iommu->active_window) {
+		ret = iommu_prepare_dma_window(iommu, SZ_1G, getpagesize(), false);
+		if (ret)
+			return NULL;
+	}
+
+	ranges = calloc(1, sizeof(*ranges));
+	VFIO_ASSERT_NOT_NULL(ranges);
+
+	ranges[0].start = iommu->active_window->start;
+	ranges[0].last = iommu->active_window->start + iommu->active_window->size - 1;
+
+	*nranges = 1;
+	return ranges;
+}
+
+static bool spapr_tce_iova_inside_window(struct iommu *iommu, struct dma_region *region)
+{
+	struct spapr_tce_window *window = iommu->active_window;
+
+	if (!window || !window->valid)
+		return false;
+
+	if (region->iova < window->start)
+		return false;
+
+	if (region->iova + region->size > window->start + window->size)
+		return false;
+
+	return true;
+}
+
+static int spapr_register_memory(struct iommu *iommu, struct dma_region *region)
+{
+	struct vfio_iommu_spapr_register_memory args = {
+		.argsz = sizeof(args),
+		.vaddr = (u64)region->vaddr,
+		.size = region->size,
+	};
+
+	if (ioctl(iommu->container_fd, VFIO_IOMMU_SPAPR_REGISTER_MEMORY, &args))
+		return -errno;
+
+	return 0;
+}
+
+static int spapr_unregister_memory(struct iommu *iommu, struct dma_region *region)
+{
+	struct vfio_iommu_spapr_register_memory args = {
+		.argsz = sizeof(args),
+		.vaddr = (u64)region->vaddr,
+		.size = region->size,
+	};
+
+	if (ioctl(iommu->container_fd, VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &args))
+		return -errno;
+
+	return 0;
+}
+
+int iommu_prepare_dma_window(struct iommu *iommu, u64 min_size,
+			     u64 page_size, bool force_dynamic)
+{
+	u32 page_shift;
+	int ret;
+
+	if (!iommu_is_spapr_tce_v2(iommu))
+		return 0;
+
+	if (!page_size)
+		page_size = getpagesize();
+
+	page_shift = page_size_to_shift(page_size);
+
+	ret = spapr_tce_read_default_window(iommu);
+	if (ret)
+		return ret;
+
+	/*
+	 * Normal mapping path:
+	 * use default window when it satisfies the request.
+	 */
+	if (!force_dynamic &&
+	    spapr_tce_window_satisfies(&iommu->default_window,
+				       min_size, page_shift, false)) {
+		iommu->active_window = &iommu->default_window;
+		return 0;
+	}
+
+	/*
+	 * Dynamic path:
+	 * use existing DDW if it already satisfies the request.
+	 */
+	if (spapr_tce_window_satisfies(&iommu->ddw_window,
+				       min_size, page_shift, true)) {
+		iommu->active_window = &iommu->ddw_window;
+		return 0;
+	}
+
+	/*
+	 * Neither default nor DDW is sufficient.
+	 * Remove only the selftest-created DDW, then create a new DDW.
+	 */
+	ret = spapr_tce_remove_window(iommu, &iommu->ddw_window);
+	if (ret)
+		return ret;
+
+	ret = spapr_tce_create_ddw(iommu, min_size, page_shift);
+	if (ret)
+		return ret;
+
+	iommu->active_window = &iommu->ddw_window;
+	return 0;
+}
+#else
+int iommu_prepare_dma_window(struct iommu *iommu, u64 min_size,
+			     u64 page_size, bool force_dynamic)
+{
+	return 0;
+}
+#endif
+
+bool iommu_supports_unmap_all(struct iommu *iommu)
+{
+#ifdef __powerpc__
+	if (iommu_is_spapr_tce_v2(iommu))
+		return false;
+#endif
+	return true;
+}
+
 iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr)
 {
 	iova_t iova;
@@ -113,9 +342,27 @@ static int vfio_iommu_map(struct iommu *iommu, struct dma_region *region)
 		.iova = region->iova,
 		.size = region->size,
 	};
+	int ret;
 
-	if (ioctl(iommu->container_fd, VFIO_IOMMU_MAP_DMA, &args))
+#ifdef __powerpc__
+	if (iommu_is_spapr_tce_v2(iommu)) {
+		if (!spapr_tce_iova_inside_window(iommu, region))
+			return -EINVAL;
+
+		ret = spapr_register_memory(iommu, region);
+		if (ret)
+			return ret;
+	}
+#endif
+
+	ret = ioctl(iommu->container_fd, VFIO_IOMMU_MAP_DMA, &args);
+	if (ret) {
+#ifdef __powerpc__
+		if (iommu_is_spapr_tce_v2(iommu))
+			spapr_unregister_memory(iommu, region);
+#endif
 		return -errno;
+	}
 
 	return 0;
 }
@@ -177,8 +424,18 @@ static int __vfio_iommu_unmap(int fd, u64 iova, u64 size, u32 flags, u64 *unmapp
 static int vfio_iommu_unmap(struct iommu *iommu, struct dma_region *region,
 			    u64 *unmapped)
 {
-	return __vfio_iommu_unmap(iommu->container_fd, region->iova,
-				  region->size, 0, unmapped);
+	int ret;
+
+	ret = __vfio_iommu_unmap(iommu->container_fd, region->iova,
+				 region->size, 0, unmapped);
+	if (ret)
+		return ret;
+#ifdef __powerpc__
+	if (iommu_is_spapr_tce_v2(iommu))
+		ret = spapr_unregister_memory(iommu, region);
+#endif
+
+	return ret;
 }
 
 static int __iommufd_unmap(int fd, u64 iova, u64 length, u32 ioas_id, u64 *unmapped)
@@ -324,6 +581,11 @@ static struct iommu_iova_range *vfio_iommu_iova_ranges(struct iommu *iommu,
 	struct vfio_info_cap_header *hdr;
 	struct iommu_iova_range *ranges = NULL;
 
+#ifdef __powerpc__
+	if (iommu_is_spapr_tce_v2(iommu))
+		return spapr_tce_iova_ranges(iommu, nranges);
+#endif
+
 	info = vfio_iommu_get_info(iommu->container_fd);
 	hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
 	VFIO_ASSERT_NOT_NULL(hdr);
@@ -462,6 +724,11 @@ struct iommu *iommu_init(const char *iommu_mode)
 
 void iommu_cleanup(struct iommu *iommu)
 {
+#ifdef __powerpc__
+	if (iommu_is_spapr_tce_v2(iommu))
+		spapr_tce_remove_window(iommu, &iommu->ddw_window);
+#endif
+
 	if (iommu->iommufd)
 		VFIO_ASSERT_EQ(close(iommu->iommufd), 0);
 	else
diff --git a/tools/testing/selftests/vfio/lib/libvfio.c b/tools/testing/selftests/vfio/lib/libvfio.c
index 617c8dc7288c..12f731963b05 100644
--- a/tools/testing/selftests/vfio/lib/libvfio.c
+++ b/tools/testing/selftests/vfio/lib/libvfio.c
@@ -138,5 +138,3 @@ const char *vfio_selftests_get_iommu_mode(void)
 	return get_iommu_mode_env();
 }
 
-	return map_align;
-}
-- 
2.51.1


  parent reply	other threads:[~2026-07-03  7:53 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-07-03  3:28 [RFC PATCH 0/6] selftests/vfio: Add sPAPR TCE v2 coverage Narayana Murty N
2026-07-03  3:28 ` [RFC PATCH 1/6] selftests/vfio: allow selecting IOMMU backend from environment Narayana Murty N
2026-07-03  3:28 ` [RFC PATCH 2/6] selftests/vfio: add sPAPR TCE v2 IOMMU mode Narayana Murty N
2026-07-03  3:28 ` Narayana Murty N [this message]
2026-07-03  3:28 ` [RFC PATCH 4/6] selftests/vfio: Exercise sPAPR DDW path for hugepage DMA mappings Narayana Murty N
2026-07-03  3:28 ` [RFC PATCH 5/6] selftests/vfio: Accept sPAPR errno for DMA range overflow Narayana Murty N
2026-07-03  3:28 ` [RFC PATCH 6/6] selftests/vfio: Enable VFIO selftests on ppc64 and ppc64le Narayana Murty N
2026-07-03  8:28 ` [RFC PATCH 0/6] selftests/vfio: Add sPAPR TCE v2 coverage Harsh Prateek Bora

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260703032806.40946-4-nnmlinux@linux.ibm.com \
    --to=nnmlinux@linux.ibm.com \
    --cc=alex@shazbot.org \
    --cc=amastro@fb.com \
    --cc=dmatlack@google.com \
    --cc=harshpb@linux.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=rananta@google.com \
    --cc=sbhat@linux.ibm.com \
    --cc=shuah@kernel.org \
    --cc=vaibhav@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox