public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] vfio: selftests: Incorporate IOVA range info
@ 2025-11-08 21:29 Alex Williamson
  2025-11-08 22:58 ` Jason Gunthorpe
  0 siblings, 1 reply; 3+ messages in thread
From: Alex Williamson @ 2025-11-08 21:29 UTC (permalink / raw)
  To: dmatlack, alex; +Cc: Alex Williamson, jgg, amastro, kvm

From: Alex Williamson <alex.williamson@nvidia.com>

Not all IOMMUs support the same virtual address width as the processor,
for instance older Intel consumer platforms only support 39-bits of
IOMMU address space.  On such platforms, using the virtual address as
the IOVA and mappings at the top of the address space both fail.

VFIO and IOMMUFD have facilities for retrieving valid IOVA ranges,
VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE and IOMMU_IOAS_IOVA_RANGES,
respectively.  These provide compatible arrays of ranges from which
we can construct a simple allocator and record the maximum supported
IOVA address.

Use this new allocator in place of reusing the virtual address, and
incorporate the maximum supported IOVA into the limit testing.  This
latter change doesn't test quite the same absolute end-of-address space
behavior but still seems to have some value.  Testing for overflow is
skipped when a reduced address space is supported as the desired errno
is not generated.

Signed-off-by: Alex Williamson <alex.williamson@nvidia.com>
---

This happened upon another interesting vfio-compat difference for
IOMMUFD, native type1 returns the correct set of IOVA ranges after
VFIO_SET_IOMMU, vfio-compat requires the next step of calling
VFIO_GROUP_GET_DEVICE_FD to attach the device to the IOAS.  If
checked prior to this, the IOVA range is reported as the full
64-bit address space.  ISTR this is known, but it's sufficiently
subtle to make note of again.

 .../selftests/vfio/lib/include/vfio_util.h    |  10 ++
 .../selftests/vfio/lib/vfio_pci_device.c      | 134 ++++++++++++++++++
 .../selftests/vfio/vfio_dma_mapping_test.c    |   7 +-
 .../selftests/vfio/vfio_pci_driver_test.c     |   2 +-
 4 files changed, 150 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
index 240409bf5f8a..6ee7748c2a06 100644
--- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
+++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
@@ -10,6 +10,8 @@
 
 #include "../../../kselftest.h"
 
+#define ALIGN(x, a)	(((x) + (a - 1)) & (~((a) - 1)))
+
 #define VFIO_LOG_AND_EXIT(...) do {		\
 	fprintf(stderr, "  " __VA_ARGS__);	\
 	fprintf(stderr, "\n");			\
@@ -183,6 +185,12 @@ struct vfio_pci_device {
 	int msi_eventfds[PCI_MSIX_FLAGS_QSIZE + 1];
 
 	struct vfio_pci_driver driver;
+
+	int nr_iova_ranges;
+	struct vfio_iova_range *iova_ranges;
+	int iova_range_idx;
+	iova_t iova_next;
+	iova_t iova_max;
 };
 
 /*
@@ -206,6 +214,8 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_
 void vfio_pci_device_cleanup(struct vfio_pci_device *device);
 void vfio_pci_device_reset(struct vfio_pci_device *device);
 
+iova_t vfio_pci_get_next_iova(struct vfio_pci_device *device, size_t size);
+
 int __vfio_pci_dma_map(struct vfio_pci_device *device,
 		       struct vfio_dma_region *region);
 int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index a381fd253aa7..295a00084880 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -14,6 +14,7 @@
 #include <uapi/linux/types.h>
 #include <linux/limits.h>
 #include <linux/mman.h>
+#include <linux/overflow.h>
 #include <linux/types.h>
 #include <linux/vfio.h>
 #include <linux/iommufd.h>
@@ -386,10 +387,66 @@ static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf
 	ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->container_fd);
 }
 
+iova_t vfio_pci_get_next_iova(struct vfio_pci_device *device, size_t size)
+{
+	int idx = device->iova_range_idx;
+	struct vfio_iova_range *range = &device->iova_ranges[idx];
+
+	VFIO_ASSERT_LT(idx, device->nr_iova_ranges, "IOVA allocate out of space\n");
+	VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n");
+	VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n");
+
+	for (;;) {
+		iova_t iova, end;
+
+		iova = ALIGN(device->iova_next, size);
+
+		if (iova < device->iova_next || iova > range->end ||
+		    check_add_overflow(iova, size - 1, &end) ||
+		    end > range->end) {
+			device->iova_range_idx = ++idx;
+			VFIO_ASSERT_LT(idx, device->nr_iova_ranges,
+				       "Out of ranges for allocation\n");
+			device->iova_next = (++range)->start;
+			continue;
+		}
+
+		if (check_add_overflow(end, (iova_t)1, &device->iova_next) ||
+		    device->iova_next > range->end) {
+			device->iova_range_idx = ++idx;
+			if (idx < device->nr_iova_ranges)
+				device->iova_next = (++range)->start;
+		}
+
+		return iova;
+	}
+}
+
+static void vfio_pci_fill_iova_ranges(struct vfio_pci_device *device,
+				      struct vfio_iova_range *ranges, int nr)
+{
+	int i;
+
+	VFIO_ASSERT_GT(nr, 0, "Empty IOVA ranges\n");
+	device->nr_iova_ranges = nr;
+
+	device->iova_ranges = calloc(nr, sizeof(struct vfio_iova_range));
+	VFIO_ASSERT_NOT_NULL(device->iova_ranges);
+	memcpy(device->iova_ranges, ranges, nr * sizeof(struct vfio_iova_range));
+
+	device->iova_next = device->iova_ranges[0].start;
+
+	for (i = 0; i < device->nr_iova_ranges; i++) {
+		if (device->iova_ranges[i].end > device->iova_max)
+			device->iova_max = device->iova_ranges[i].end;
+	}
+}
+
 static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf)
 {
 	unsigned long iommu_type = device->iommu_mode->iommu_type;
 	const char *path = device->iommu_mode->container_path;
+	struct vfio_iommu_type1_info *iommu_info;
 	int version;
 	int ret;
 
@@ -408,6 +465,51 @@ static void vfio_pci_container_setup(struct vfio_pci_device *device, const char
 
 	device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf);
 	VFIO_ASSERT_GE(device->fd, 0);
+
+	iommu_info = calloc(1, sizeof(*iommu_info));
+	VFIO_ASSERT_NOT_NULL(iommu_info);
+	iommu_info->argsz = sizeof(*iommu_info);
+
+	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, (void *)iommu_info);
+
+	if ((iommu_info->flags & VFIO_IOMMU_INFO_CAPS) &&
+	    iommu_info->argsz != sizeof(*iommu_info)) {
+		u32 next, info_size = iommu_info->argsz;
+		struct vfio_info_cap_header *hdr;
+		char *ptr;
+
+		iommu_info = realloc(iommu_info, info_size);
+		VFIO_ASSERT_NOT_NULL(iommu_info);
+
+		ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO,
+			     (void *)iommu_info);
+		VFIO_ASSERT_EQ(iommu_info->argsz, info_size);
+		VFIO_ASSERT_GT(iommu_info->flags & VFIO_IOMMU_INFO_CAPS, 0);
+		VFIO_ASSERT_GT(iommu_info->cap_offset, 0);
+
+		next = iommu_info->cap_offset;
+		ptr = (char *)iommu_info;
+
+		while (next) {
+			hdr =  (struct vfio_info_cap_header *)(ptr + next);
+			if (hdr->id == VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
+				VFIO_ASSERT_EQ(hdr->version, 1);
+				break;
+			}
+
+			next = hdr->next;
+		}
+
+		if (next) {
+			struct vfio_iommu_type1_info_cap_iova_range *ranges;
+
+			ranges = (struct vfio_iommu_type1_info_cap_iova_range *)hdr;
+			vfio_pci_fill_iova_ranges(device, ranges->iova_ranges,
+						  ranges->nr_iovas);
+		}
+	}
+
+	free(iommu_info);
 }
 
 static void vfio_pci_device_setup(struct vfio_pci_device *device)
@@ -547,6 +649,10 @@ static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
 static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf)
 {
 	const char *cdev_path = vfio_pci_get_cdev_path(bdf);
+	struct iommu_ioas_iova_ranges *ioas_ranges;
+	struct vfio_iova_range *iova_ranges;
+	size_t size;
+	int ret, i;
 
 	device->fd = open(cdev_path, O_RDWR);
 	VFIO_ASSERT_GE(device->fd, 0);
@@ -563,6 +669,34 @@ static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *b
 	vfio_device_bind_iommufd(device->fd, device->iommufd);
 	device->ioas_id = iommufd_ioas_alloc(device->iommufd);
 	vfio_device_attach_iommufd_pt(device->fd, device->ioas_id);
+
+	ioas_ranges = calloc(1, sizeof(*ioas_ranges));
+	VFIO_ASSERT_NOT_NULL(ioas_ranges);
+	ioas_ranges->size = sizeof(*ioas_ranges);
+	ioas_ranges->ioas_id = device->ioas_id;
+
+	ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, ioas_ranges);
+
+	VFIO_ASSERT_NE(ret, 0);
+	VFIO_ASSERT_EQ(errno, EMSGSIZE);
+	VFIO_ASSERT_NE(ioas_ranges->num_iovas, 0);
+
+	size = sizeof(*ioas_ranges) + (ioas_ranges->num_iovas *
+				       sizeof(struct iommu_iova_range));
+	ioas_ranges = realloc(ioas_ranges, size);
+	VFIO_ASSERT_NOT_NULL(ioas_ranges);
+
+	ioas_ranges->allowed_iovas = (uintptr_t)(ioas_ranges + 1);
+
+	ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, ioas_ranges);
+
+	VFIO_ASSERT_EQ(sizeof(struct vfio_iova_range),
+		       sizeof(struct iommu_iova_range));
+
+	iova_ranges = (void *)ioas_ranges->allowed_iovas;
+
+	vfio_pci_fill_iova_ranges(device, iova_ranges, ioas_ranges->num_iovas);
+	free(ioas_ranges);
 }
 
 struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode)
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
index 4f1ea79a200c..d5ab9f84e675 100644
--- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -142,7 +142,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
 	else
 		ASSERT_NE(region.vaddr, MAP_FAILED);
 
-	region.iova = (u64)region.vaddr;
+	region.iova = vfio_pci_get_next_iova(self->device, size);
 	region.size = size;
 
 	vfio_pci_dma_map(self->device, &region);
@@ -233,7 +233,7 @@ FIXTURE_SETUP(vfio_dma_map_limit_test)
 	ASSERT_NE(region->vaddr, MAP_FAILED);
 
 	/* One page prior to the end of address space */
-	region->iova = ~(iova_t)0 & ~(region_size - 1);
+	region->iova = self->device->iova_max & ~(region_size - 1);
 	region->size = region_size;
 }
 
@@ -276,6 +276,9 @@ TEST_F(vfio_dma_map_limit_test, overflow)
 	struct vfio_dma_region *region = &self->region;
 	int rc;
 
+	if (self->device->iova_max != UINT64_MAX)
+		SKIP(return, "IOMMU address space insufficient for overflow test\n");
+
 	region->size = self->mmap_size;
 
 	rc = __vfio_pci_dma_map(self->device, region);
diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
index 2dbd70b7db62..b8ff04bf6c86 100644
--- a/tools/testing/selftests/vfio/vfio_pci_driver_test.c
+++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
@@ -29,7 +29,7 @@ static void region_setup(struct vfio_pci_device *device,
 	VFIO_ASSERT_NE(vaddr, MAP_FAILED);
 
 	region->vaddr = vaddr;
-	region->iova = (u64)vaddr;
+	region->iova = vfio_pci_get_next_iova(self->device, size);
 	region->size = size;
 
 	vfio_pci_dma_map(device, region);
-- 
2.51.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] vfio: selftests: Incorporate IOVA range info
  2025-11-08 21:29 [PATCH] vfio: selftests: Incorporate IOVA range info Alex Williamson
@ 2025-11-08 22:58 ` Jason Gunthorpe
  2025-11-10 14:59   ` Alex Williamson
  0 siblings, 1 reply; 3+ messages in thread
From: Jason Gunthorpe @ 2025-11-08 22:58 UTC (permalink / raw)
  To: Alex Williamson; +Cc: dmatlack, Alex Williamson, amastro, kvm

On Sat, Nov 08, 2025 at 02:29:49PM -0700, Alex Williamson wrote:
> From: Alex Williamson <alex.williamson@nvidia.com>
> 
> Not all IOMMUs support the same virtual address width as the processor,
> for instance older Intel consumer platforms only support 39-bits of
> IOMMU address space.  On such platforms, using the virtual address as
> the IOVA and mappings at the top of the address space both fail.
> 
> VFIO and IOMMUFD have facilities for retrieving valid IOVA ranges,
> VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE and IOMMU_IOAS_IOVA_RANGES,
> respectively.  These provide compatible arrays of ranges from which
> we can construct a simple allocator and record the maximum supported
> IOVA address.
> 
> Use this new allocator in place of reusing the virtual address, and
> incorporate the maximum supported IOVA into the limit testing.  This
> latter change doesn't test quite the same absolute end-of-address space
> behavior but still seems to have some value.  Testing for overflow is
> skipped when a reduced address space is supported as the desired errno
> is not generated.
> 
> Signed-off-by: Alex Williamson <alex.williamson@nvidia.com>
> ---
> 
> This happened upon another interesting vfio-compat difference for
> IOMMUFD, native type1 returns the correct set of IOVA ranges after
> VFIO_SET_IOMMU, vfio-compat requires the next step of calling
> VFIO_GROUP_GET_DEVICE_FD to attach the device to the IOAS.  If
> checked prior to this, the IOVA range is reported as the full
> 64-bit address space.  ISTR this is known, but it's sufficiently
> subtle to make note of again.

Maybe we should fail in this in between state rather than give wrong
information?

Jason

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] vfio: selftests: Incorporate IOVA range info
  2025-11-08 22:58 ` Jason Gunthorpe
@ 2025-11-10 14:59   ` Alex Williamson
  0 siblings, 0 replies; 3+ messages in thread
From: Alex Williamson @ 2025-11-10 14:59 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: dmatlack, Alex Williamson, amastro, kvm

On Sat, 8 Nov 2025 18:58:42 -0400
Jason Gunthorpe <jgg@nvidia.com> wrote:
> On Sat, Nov 08, 2025 at 02:29:49PM -0700, Alex Williamson wrote:
> > ---
> > 
> > This happened upon another interesting vfio-compat difference for
> > IOMMUFD, native type1 returns the correct set of IOVA ranges after
> > VFIO_SET_IOMMU, vfio-compat requires the next step of calling
> > VFIO_GROUP_GET_DEVICE_FD to attach the device to the IOAS.  If
> > checked prior to this, the IOVA range is reported as the full
> > 64-bit address space.  ISTR this is known, but it's sufficiently
> > subtle to make note of again.  
> 
> Maybe we should fail in this in between state rather than give wrong
> information?

That's probably an improvement, maybe with a WARN_ONCE indicating the
ordering requirement for vfio-compat.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-11-10 14:59 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-08 21:29 [PATCH] vfio: selftests: Incorporate IOVA range info Alex Williamson
2025-11-08 22:58 ` Jason Gunthorpe
2025-11-10 14:59   ` Alex Williamson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox