public inbox for linux-media@vger.kernel.org
 help / color / mirror / Atom feed
From: Matt Evans <mattev@meta.com>
To: Alex Williamson <alex@shazbot.org>,
	Leon Romanovsky <leon@kernel.org>,
	Jason Gunthorpe <jgg@nvidia.com>, Alex Mastro <amastro@fb.com>,
	Mahmoud Adam <mngyadam@amazon.de>,
	David Matlack <dmatlack@google.com>
Cc: "Björn Töpel" <bjorn@kernel.org>,
	"Sumit Semwal" <sumit.semwal@linaro.org>,
	"Christian König" <christian.koenig@amd.com>,
	"Kevin Tian" <kevin.tian@intel.com>,
	"Ankit Agrawal" <ankita@nvidia.com>,
	"Pranjal Shrivastava" <praan@google.com>,
	"Alistair Popple" <apopple@nvidia.com>,
	"Vivek Kasireddy" <vivek.kasireddy@intel.com>,
	linux-kernel@vger.kernel.org, linux-media@vger.kernel.org,
	dri-devel@lists.freedesktop.org, linaro-mm-sig@lists.linaro.org,
	kvm@vger.kernel.org
Subject: [RFC v2 PATCH 10/10] [RFC ONLY] selftests: vfio: Add standalone vfio_dmabuf_mmap_test
Date: Thu, 12 Mar 2026 11:46:08 -0700	[thread overview]
Message-ID: <20260312184613.3710705-11-mattev@meta.com> (raw)
In-Reply-To: <20260312184613.3710705-1-mattev@meta.com>

This test exercises VFIO DMABUF mmap() to userspace, including various
revocation/shutdown cases (which make the VMA inacessible).

This is a TEMPORARY test, just to illustrate a new UAPI and
DMABUF/mmap() usage.  Since it originates from out-of-tree code, it
duplicates some of the VFIO device setup code in
.../selftests/vfio/lib.  Instead, the tests should be folded into the
existing VFIO tests.

Signed-off-by: Matt Evans <mattev@meta.com>
---
 tools/testing/selftests/vfio/Makefile         |   1 +
 .../vfio/standalone/vfio_dmabuf_mmap_test.c   | 837 ++++++++++++++++++
 2 files changed, 838 insertions(+)
 create mode 100644 tools/testing/selftests/vfio/standalone/vfio_dmabuf_mmap_test.c

diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile
index 8e90e409e91d..8679d96e5b92 100644
--- a/tools/testing/selftests/vfio/Makefile
+++ b/tools/testing/selftests/vfio/Makefile
@@ -12,6 +12,7 @@ TEST_GEN_PROGS += vfio_iommufd_setup_test
 TEST_GEN_PROGS += vfio_pci_device_test
 TEST_GEN_PROGS += vfio_pci_device_init_perf_test
 TEST_GEN_PROGS += vfio_pci_driver_test
+TEST_GEN_PROGS += standalone/vfio_dmabuf_mmap_test
 
 TEST_FILES += scripts/cleanup.sh
 TEST_FILES += scripts/lib.sh
diff --git a/tools/testing/selftests/vfio/standalone/vfio_dmabuf_mmap_test.c b/tools/testing/selftests/vfio/standalone/vfio_dmabuf_mmap_test.c
new file mode 100644
index 000000000000..0c087497b777
--- /dev/null
+++ b/tools/testing/selftests/vfio/standalone/vfio_dmabuf_mmap_test.c
@@ -0,0 +1,837 @@
+/*
+ * Tests for VFIO DMABUF userspace mmap()
+ *
+ * As well as the basics (mmap() a BAR resource to userspace), test
+ * shutdown/unmapping, aliasing, and DMABUF revocation scenarios.
+ *
+ * This test relies on being attached to a QEMU EDU device (for a
+ * simple known MMIO layout).  Example invocation, assuming function
+ * 0000:00:03.0 is the target:
+ *
+ *  # lspci -n -s 00:03.0
+ *  00:03.0 00ff: 1234:11e8 (rev 10)
+ *
+ *  # readlink /sys/bus/pci/devices/0000\:00\:03.0/iommu_group
+ *  ../../../../../kernel/iommu_groups/3
+ *
+ *  (if there's a driver already attached)
+ *  # echo 0000:00:03.0 > /sys/bus/pci/devices/0000:00:03.0/driver/unbind
+ *
+ *  (and, might need)
+ *  # echo 1 > /sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts
+ *
+ *  Attach to VFIO:
+ *  # echo 1234 11e8 > /sys/bus/pci/drivers/vfio-pci/new_id
+ *
+ *  There should be only one thing in the group:
+ *  # ls /sys/bus/pci/devices/0000:00:03.0/iommu_group/devices
+ *
+ *  Then given above an invocation would be:
+ *  # this_test -r 0000:00:03.0 -g 3
+ *
+ * However, note the QEMU EDU device has a very small address span of
+ * useful things in BAR0, which makes testing a non-zero BAR offset
+ * impossible.  An "extended EDU" device is supported, which just
+ * presents a large chunk of memory as a second BAR resource: this
+ * allows non-zero BAR offsets to be tested.  See below for a QEMU
+ * diff...
+ *
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This software may be used and distributed according to the terms of the
+ * GNU General Public License version 2.
+ */
+
+/*
+diff --git a/hw/misc/edu.c b/hw/misc/edu.c
+index cece633e11..5f119e0642 100644
+--- a/hw/misc/edu.c
++++ b/hw/misc/edu.c
+@@ -47,6 +47,7 @@ DECLARE_INSTANCE_CHECKER(EduState, EDU,
+ struct EduState {
+     PCIDevice pdev;
+     MemoryRegion mmio;
++    MemoryRegion ram;
+ 
+     QemuThread thread;
+     QemuMutex thr_mutex;
+@@ -386,7 +387,12 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
+ 
+     memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
+                     "edu-mmio", 1 * MiB);
++    memory_region_init_ram(&edu->ram, OBJECT(edu), "edu-ram", 64 * MiB, &error_fatal);
+     pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio);
++    pci_register_bar(pdev, 1,
++                     PCI_BASE_ADDRESS_SPACE_MEMORY |
++                    PCI_BASE_ADDRESS_MEM_PREFETCH |
++                    PCI_BASE_ADDRESS_MEM_TYPE_64, &edu->ram);
+ }
+ 
+ static void pci_edu_uninit(PCIDevice *pdev)
+*/
+
+#include <errno.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/dma-buf.h>
+#include <linux/vfio.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#define ROUND_UP(x, to) (((x) + (to) - 1) & ~((to) - 1))
+#define MiB(x)		((x) * 1024ULL * 1024)
+
+#define EDU_REG_MAGIC	0x00
+#define EDU_MAGIC_VAL	0x010000edu
+#define EDU_REG_INVERT	0x04
+
+#define FAIL_IF(cond, msg...)                  \
+	do {                                   \
+		if (cond) {                    \
+			printf("\n\nFAIL:\t"); \
+			printf(msg);           \
+			exit(1);               \
+		}                              \
+	} while (0)
+
+static int vfio_setup(int groupnr, char *rid_str,
+		      struct vfio_region_info *out_mappable_regions,
+		      int nr_regions, int *out_nr_regions, int *out_vfio_cfd,
+		      int *out_vfio_devfd)
+{
+	/* Create a new container, add group to it, open device, read
+	 * resource, reset, etc.  Based on the example code in
+	 * Documentation/driver-api/vfio.rst
+	 */
+
+	int container = open("/dev/vfio/vfio", O_RDWR);
+
+	int r = ioctl(container, VFIO_GET_API_VERSION);
+
+	if (r != VFIO_API_VERSION) {
+		/* Unknown API version */
+		printf("-E- Unknown API ver %d\n", r);
+		return 1;
+	}
+
+	if (ioctl(container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) != 1) {
+		printf("-E- Doesn't support type 1\n");
+		return 1;
+	}
+
+	char devpath[PATH_MAX];
+
+	snprintf(devpath, PATH_MAX - 1, "/dev/vfio/%d", groupnr);
+	/* Open the group */
+	int group = open(devpath, O_RDWR);
+
+	if (group < 0) {
+		printf("-E- Can't open VFIO device (group %d)\n", groupnr);
+		return 1;
+	}
+
+	/* Test the group is viable and available */
+	struct vfio_group_status group_status = { .argsz = sizeof(
+							  group_status) };
+
+	if (ioctl(group, VFIO_GROUP_GET_STATUS, &group_status)) {
+		perror("-E- Can't get group status");
+		return 1;
+	}
+
+	if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+		/* Group is not viable (ie, not all devices bound for vfio) */
+		printf("-E- Group %d is not viable!\n", groupnr);
+		return 1;
+	}
+
+	/* Add the group to the container */
+	if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container)) {
+		perror("-E- Can't add group to container");
+		return 1;
+	}
+
+	/* Enable the IOMMU model we want */
+	if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)) {
+		perror("-E- Can't select T1");
+		return 1;
+	}
+
+	/* Get addition IOMMU info */
+	struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(
+							    iommu_info) };
+
+	if (ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info)) {
+		perror("-E- Can't get VFIO info");
+		return 1;
+	}
+
+	/* Get a file descriptor for the device */
+	int device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, rid_str);
+
+	if (device < 0) {
+		perror("-E- Can't get device fd");
+		return 1;
+	}
+	close(group);
+
+	/* Test and setup the device */
+	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+
+	if (ioctl(device, VFIO_DEVICE_GET_INFO, &device_info)) {
+		perror("-E- Can't get device info");
+		return 1;
+	}
+	printf("-i- %d device regions, flags 0x%x\n", device_info.num_regions,
+	       device_info.flags);
+
+	/* Regions are BAR0-5 then ROM, config, VGA */
+	int out_region = 0;
+
+	for (int i = 0; i < device_info.num_regions; i++) {
+		struct vfio_region_info reg = { .argsz = sizeof(reg) };
+
+		reg.index = i;
+
+		if (ioctl(device, VFIO_DEVICE_GET_REGION_INFO, &reg)) {
+			/* We expect EINVAL if there's no VGA region */
+			printf("-W- Region %d: ERROR %d\n", i, errno);
+		} else {
+			printf("-i- Region %d: flags 0x%08x (%c%c%c), cap_offs %d, size 0x%llx, offs 0x%llx\n",
+			       i, reg.flags,
+			       (reg.flags & VFIO_REGION_INFO_FLAG_READ) ? 'R' :
+									  '-',
+			       (reg.flags & VFIO_REGION_INFO_FLAG_WRITE) ? 'W' :
+									   '-',
+			       (reg.flags & VFIO_REGION_INFO_FLAG_MMAP) ? 'M' :
+									  '-',
+			       reg.cap_offset, reg.size, reg.offset);
+
+			if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) &&
+			    (out_region < nr_regions))
+				out_mappable_regions[out_region++] = reg;
+		}
+	}
+	*out_nr_regions = out_region;
+
+#ifdef THERE_ARE_NO_IRQS_YET
+	for (i = 0; i < device_info.num_irqs; i++) {
+		struct vfio_irq_info irq = { .argsz = sizeof(irq) };
+
+		irq.index = i;
+
+		ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &irq);
+
+		/* Setup IRQs... eventfds, VFIO_DEVICE_SET_IRQS */
+	}
+#endif
+	/* Gratuitous device reset and go... */
+	if (ioctl(device, VFIO_DEVICE_RESET))
+		perror("-W- Can't reset device (continuing)");
+
+	*out_vfio_cfd = container;
+	*out_vfio_devfd = device;
+
+	return 0;
+}
+
+static int vfio_feature_present(int dev_fd, uint32_t feature)
+{
+	struct vfio_device_feature probeftr = {
+		.argsz = sizeof(probeftr),
+		.flags = VFIO_DEVICE_FEATURE_PROBE | VFIO_DEVICE_FEATURE_GET |
+			 feature,
+	};
+	return ioctl(dev_fd, VFIO_DEVICE_FEATURE, &probeftr) == 0;
+}
+
+static int vfio_create_dmabuf(int dev_fd, uint32_t region, uint64_t offset,
+			      uint64_t length)
+{
+	uint64_t ftrbuf
+		[ROUND_UP(sizeof(struct vfio_device_feature) +
+				  sizeof(struct vfio_device_feature_dma_buf) +
+				  sizeof(struct vfio_region_dma_range),
+			  8) /
+		 8];
+
+	struct vfio_device_feature *f = (struct vfio_device_feature *)ftrbuf;
+	struct vfio_device_feature_dma_buf *db =
+		(struct vfio_device_feature_dma_buf *)f->data;
+	struct vfio_region_dma_range *range =
+		(struct vfio_region_dma_range *)db->dma_ranges;
+
+	f->argsz = sizeof(ftrbuf);
+	f->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_BUF;
+	db->region_index = region;
+	db->open_flags = O_RDWR | O_CLOEXEC;
+	db->flags = 0;
+	db->nr_ranges = 1;
+	range->offset = offset;
+	range->length = length;
+
+	return ioctl(dev_fd, VFIO_DEVICE_FEATURE, &ftrbuf);
+}
+
+/* As above, but try multiple ranges in one dmabuf */
+static int vfio_create_dmabuf_dual(int dev_fd, uint32_t region,
+				   uint64_t offset0, uint64_t length0,
+				   uint64_t offset1, uint64_t length1)
+{
+	uint64_t ftrbuf
+		[ROUND_UP(sizeof(struct vfio_device_feature) +
+				  sizeof(struct vfio_device_feature_dma_buf) +
+				  (sizeof(struct vfio_region_dma_range) * 2),
+			  8) /
+		 8];
+
+	struct vfio_device_feature *f = (struct vfio_device_feature *)ftrbuf;
+	struct vfio_device_feature_dma_buf *db =
+		(struct vfio_device_feature_dma_buf *)f->data;
+	struct vfio_region_dma_range *range =
+		(struct vfio_region_dma_range *)db->dma_ranges;
+
+	f->argsz = sizeof(ftrbuf);
+	f->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_BUF;
+	db->region_index = region;
+	db->open_flags = O_RDWR | O_CLOEXEC;
+	db->flags = 0;
+	db->nr_ranges = 2;
+	range[0].offset = offset0;
+	range[0].length = length0;
+	range[1].offset = offset1;
+	range[1].length = length1;
+
+	return ioctl(dev_fd, VFIO_DEVICE_FEATURE, &ftrbuf);
+}
+
+static volatile uint32_t *mmap_resource_aligned(size_t size,
+						unsigned long align, int fd,
+						unsigned long offset)
+{
+	void *v;
+
+	if (align <= getpagesize()) {
+		v = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+			 offset);
+		FAIL_IF(v == MAP_FAILED,
+			"Can't mmap fd %d (size 0x%lx, offset 0x%lx), %d\n", fd,
+			size, offset, errno);
+	} else {
+		size_t resv_size = size + align;
+		void *resv =
+			mmap(0, resv_size, 0, MAP_PRIVATE | MAP_ANON, -1, 0);
+		FAIL_IF(resv == MAP_FAILED,
+			"Can't mmap reservation, size 0x%lx, %d\n", resv_size,
+			errno);
+
+		uintptr_t pos = ((uintptr_t)resv + (align - 1)) & ~(align - 1);
+
+		v = mmap((void *)pos, size, PROT_READ | PROT_WRITE,
+			 MAP_SHARED | MAP_FIXED, fd, offset);
+		FAIL_IF(v == MAP_FAILED,
+			"Can't mmap-fixed fd %d (size 0x%lx, offset 0x%lx), %d\n",
+			fd, size, offset, errno);
+		madvise((void *)v, size, MADV_HUGEPAGE);
+
+		/* Tidy */
+		if (pos > (uintptr_t)resv)
+			munmap(resv, pos - (uintptr_t)resv);
+		if (pos + size < (uintptr_t)resv + resv_size)
+			munmap((void *)pos + size,
+			       (uintptr_t)resv + resv_size - (pos + size));
+	}
+
+	return (volatile uint32_t *)v;
+}
+
+static volatile uint32_t *mmap_resource(size_t size, int fd,
+					unsigned long offset)
+{
+	return mmap_resource_aligned(size, getpagesize(), fd, offset);
+}
+
+static void check_mmio(volatile uint32_t *base)
+{
+	static uint32_t magic = 0xdeadbeef;
+	uint32_t v;
+
+	printf("-i- MMIO check: ");
+
+	/* Trivial MMIO */
+	v = base[EDU_REG_MAGIC / 4];
+	FAIL_IF(v != EDU_MAGIC_VAL,
+		"Magic value %08x incorrect, BAR map bad?\n", v);
+
+	base[EDU_REG_INVERT / 4] = magic;
+	v = base[EDU_REG_INVERT / 4];
+	FAIL_IF(v != ~magic, "Inverterizer value %08x bad (should be %08x)\n",
+		v, ~magic);
+	printf("OK\n");
+
+	magic = (magic << 1) ^ (magic >> 1) ^ (magic << 7);
+}
+
+static int revoke_dmabuf(int dev_fd, int dmabuf_fd)
+{
+	struct vfio_pci_dmabuf_revoke dmabuf_rev = {
+		.argsz = sizeof(dmabuf_rev),
+		.dmabuf_fd = dmabuf_fd,
+	};
+	return ioctl(dev_fd, VFIO_DEVICE_PCI_DMABUF_REVOKE, &dmabuf_rev);
+}
+
+static jmp_buf jmpbuf;
+
+static void sighandler(int sig)
+{
+	printf("*** Signal %d ***\n", sig);
+	siglongjmp(jmpbuf, sig);
+}
+
+static void setup_signals(void)
+{
+	struct sigaction sa = {
+		.sa_handler = sighandler,
+		.sa_flags = 0,
+	};
+
+	sigaction(SIGBUS, &sa, NULL);
+}
+
+static int vfio_dmabuf_test(int groupnr, char *rid_str)
+{
+	/* Only expecting one or two regions */
+	struct vfio_region_info bar_region[2];
+	int num_regions = 0;
+	int container_fd, dev_fd;
+	int r = vfio_setup(groupnr, rid_str, &bar_region[0], 2, &num_regions,
+			   &container_fd, &dev_fd);
+
+	FAIL_IF(r, "VFIO setup failed\n");
+	FAIL_IF(!vfio_feature_present(dev_fd, VFIO_DEVICE_FEATURE_DMA_BUF),
+		"VFIO DMABUF support not available\n");
+
+	printf("-i- Container fd %d, device fd %d, and got DMA_BUF\n",
+	       container_fd, dev_fd);
+
+	setup_signals();
+
+	////////////////////////////////////////////////////////////////////////////////
+
+	/* Real basics:	 create DMABUF, and mmap it, and access MMIO through it.
+	 * Do this for 2nd BAR if present, too (just plain memory).
+	 */
+	printf("\nTEST: Create DMABUF, map it\n");
+	int bar_db_fd = vfio_create_dmabuf(dev_fd, /* region */ 0,
+					   /* offset */ 0, bar_region[0].size);
+	FAIL_IF(bar_db_fd < 0, "Can't create DMABUF, %d\n", errno);
+
+	volatile uint32_t *dbbar0 =
+		mmap_resource(bar_region[0].size, bar_db_fd, 0);
+
+	printf("-i- Mapped DMABUF BAR0 at %p+0x%llx\n", dbbar0,
+	       bar_region[0].size);
+	check_mmio(dbbar0);
+
+	/* TEST: Map the traditional VFIO one _second_; it should still work. */
+	printf("\nTEST: Map the regular VFIO BAR\n");
+	volatile uint32_t *vfiobar =
+		mmap_resource(bar_region[0].size, dev_fd, bar_region[0].offset);
+
+	printf("-i- Mapped VIRTIO BAR0 at %p+0x%llx\n", vfiobar,
+	       bar_region[0].size);
+	check_mmio(vfiobar);
+
+	/* Test plan:
+	 *
+	 * - Revoke the first DMABUF, check for fault
+	 * - Check VFIO BAR access still works
+	 * - Revoke first DMABUF fd again: -EBADFD
+	 * - create new DMABUF for same (previously-revoked) region: accessible
+	 *
+	 * - Create overlapping DMABUFs: map success, maps alias OK
+	 * - Create a second mapping of the second DMABUF, maps alias OK
+	 * - Destroy one by revoking through a dup()ed fd: check mapping revoked
+	 * - Check original is still accessible
+	 *
+	 * If we have a larger (>4K of accessible stuff!) second BAR resource:
+	 * - Map it, create an overlapping alias with offset != 0
+	 * - Check alias/offset is sane
+	 *
+	 * Last:
+	 * - close container_fd and dev_fd: check DMABUF mapping revoked
+	 * - try revoking a non-DMABUF fd: -EINVAL
+	 */
+
+	printf("\nTEST: Revocation of first DMABUF\n");
+	r = revoke_dmabuf(dev_fd, bar_db_fd);
+	FAIL_IF(r != 0, "Can't revoke: %d\n", errno);
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		// Try an access: expect BOOM
+		check_mmio(dbbar0);
+		FAIL_IF(true, "Expecting fault after revoke!\n");
+	}
+	printf("-i- Revoked OK\n");
+
+	printf("\nTEST: Access through VFIO-mapped region still works\n");
+	if (sigsetjmp(jmpbuf, 1) == 0)
+		check_mmio(vfiobar);
+	else
+		FAIL_IF(true, "Expecting VFIO-mapped BAR to still work!\n");
+
+	printf("\nTEST: Double-revoke\n");
+	r = revoke_dmabuf(dev_fd, bar_db_fd);
+	FAIL_IF(r != -1 || errno != EBADFD,
+		"Expecting 2nd revoke to give EBADFD, got %d errno %d\n", r,
+		errno);
+	printf("-i- Correctly failed second revoke\n");
+
+	printf("\nTEST: Can't mmap() revoked DMABUF\n");
+	void *dbfail = mmap(0, bar_region[1].size, PROT_READ | PROT_WRITE,
+			    MAP_SHARED, bar_db_fd, 0);
+	FAIL_IF(dbfail != MAP_FAILED, "mmap() should fail\n");
+	printf("-i- OK\n");
+
+	printf("\nTEST: Recreate new DMABUF for previously-revoked region\n");
+	int bar_db_fd_2 = vfio_create_dmabuf(
+		dev_fd, /* region */ 0, /* offset */ 0, bar_region[0].size);
+	FAIL_IF(bar_db_fd_2 < 0, "Can't create DMABUF, %d\n", errno);
+
+	volatile uint32_t *dbbar0_2 =
+		mmap_resource(bar_region[0].size, bar_db_fd_2, 0);
+
+	printf("-i- Mapped 2nd DMABUF BAR0 at %p+0x%llx\n", dbbar0_2,
+	       bar_region[0].size);
+	check_mmio(dbbar0_2);
+
+	munmap((void *)dbbar0, bar_region[0].size);
+	close(bar_db_fd);
+
+	printf("\nTEST: Create aliasing/overlapping DMABUF\n");
+	int bar_db_fd_3 = vfio_create_dmabuf(
+		dev_fd, /* region */ 0, /* offset */ 0, bar_region[0].size);
+	FAIL_IF(bar_db_fd_3 < 0, "Can't create DMABUF, %d\n", errno);
+
+	volatile uint32_t *dbbar0_3 =
+		mmap_resource(bar_region[0].size, bar_db_fd_3, 0);
+
+	printf("-i- Mapped 3rd DMABUF BAR0 at %p+0x%llx\n", dbbar0_3,
+	       bar_region[0].size);
+	check_mmio(dbbar0_3);
+
+	/* Basic aliasing check: Write value through 2nd, read back through 3rd */
+	uint32_t v;
+
+	dbbar0_2[EDU_REG_INVERT / 4] = 0xfacecace;
+	v = dbbar0_3[EDU_REG_INVERT / 4];
+	FAIL_IF(v != ~0xfacecace,
+		"Alias inverted MMIO value %08x bad (should be %08x)\n", v,
+		~0xfacecace);
+	printf("-i- Aliasing DMABUF OK\n");
+
+	printf("\nTEST: Create a double-mapping of DMABUF\n");
+	/* Create another mmap of the existing aliasing DMABUF fd */
+	volatile uint32_t *dbbar0_3_2 =
+		mmap_resource(bar_region[0].size, bar_db_fd_3, 0);
+
+	printf("-i- Mapped 3rd DMABUF BAR0 _again_ at %p+0x%llx\n", dbbar0_3_2,
+	       bar_region[0].size);
+	/* Can we see the value we wrote before? */
+	v = dbbar0_3_2[EDU_REG_INVERT / 4];
+	FAIL_IF(v != ~0xfacecace,
+		"Alias alias inverted MMIO value %08x bad (should be %08x)\n",
+		v, ~0xfacecace);
+	check_mmio(dbbar0_3_2);
+
+	printf("\nTEST: revoke aliasing DMABUF through dup()ed fd\n");
+	int dup_dbfd3 = dup(bar_db_fd_3);
+
+	r = revoke_dmabuf(dev_fd, dup_dbfd3);
+	FAIL_IF(r != 0, "Can't revoke: %d\n", errno);
+
+	/* Both of the mmap()s made should now be gone */
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		check_mmio(dbbar0_3);
+		FAIL_IF(true, "Expecting fault on 1st mmap after revoke!\n");
+	}
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		check_mmio(dbbar0_3_2);
+		FAIL_IF(true, "Expecting fault on 2nd mmap after revoke!\n");
+	}
+	printf("-i- Both aliasing DMABUF mappings revoked OK\n");
+
+	close(dup_dbfd3);
+	close(bar_db_fd_3);
+	munmap((void *)dbbar0_3, bar_region[0].size);
+	munmap((void *)dbbar0_3_2, bar_region[0].size);
+
+	/* And finally, although the aliasing DMABUF is gone, access
+	 * through the original one should still work:
+	 */
+	if (sigsetjmp(jmpbuf, 1) == 0)
+		check_mmio(dbbar0_2);
+	else
+		FAIL_IF(true,
+			"Expecting original DMABUF mapping to still work!\n");
+	printf("-i- Aliasing DMABUF removal OK, original still accessible\n");
+
+	/* If we're attached to a hacked/extended QEMU EDU device with
+	 * a large memory region 1 then we can test things like
+	 * offsets/aliasing.
+	 */
+	if (num_regions >= 2) {
+		printf("\nTEST: Second BAR: test overlapping+offset DMABUF\n");
+
+		printf("-i- Region 1 DMABUF: offset %llx, size %llx\n",
+		       bar_region[1].offset, bar_region[1].size);
+		int bar1_db_fd =
+			vfio_create_dmabuf(dev_fd, 1, 0, bar_region[1].size);
+
+		FAIL_IF(bar1_db_fd < 0, "Can't create DMABUF, %d\n", errno);
+
+		volatile uint32_t *dbbar1 = mmap_resource_aligned(
+			bar_region[1].size, MiB(32), bar1_db_fd, 0);
+		printf("-i- Mapped DMABUF Region 1 at %p+0x%llx\n", dbbar1,
+		       bar_region[1].size);
+
+		/* Init with known values */
+		for (unsigned long i = 0; i < (bar_region[1].size);
+		     i += getpagesize())
+			dbbar1[i / 4] = 0xca77face ^ i;
+
+		v = dbbar1[0];
+		FAIL_IF(v != 0xca77face,
+			"DB Region 1 read: Magic value %08x incorrect\n", v);
+		printf("-i- DB Region 1 read: Magic: 0x%08x\n", v);
+
+		/* TEST: Overlap/aliasing; map same BAR with a range
+		 * offset > 0.  Also test disjoint/multi-range DMABUFs
+		 * by creating a second range.  This appears as one
+		 * contiguous VA range mapped to a first BAR range
+		 * (starting from range0_offset), then skipping a few
+		 * physical pages, then a second range (starting at
+		 * range1_offset).
+		 */
+		unsigned long range0_offset = getpagesize() * 3;
+		unsigned long range1_skip_pages = 5;
+		unsigned long range1_skip = getpagesize() * range1_skip_pages;
+		unsigned long range_size =
+			(bar_region[1].size - range0_offset - range1_skip) / 2;
+		unsigned long range1_offset =
+			range0_offset + range_size + range1_skip;
+		unsigned long map_size = range_size * 2;
+
+		printf("\nTEST: Second BAR aliasing mapping, two ranges size 0x%lx:\n\t\t0x%lx-0x%lx, 0x%lx-0x%lx\n",
+		       range_size, range0_offset, range0_offset + range_size,
+		       range1_offset, range1_offset + range_size);
+
+		int bar1_2_db_fd = vfio_create_dmabuf_dual(
+			dev_fd, 1, range0_offset, range_size, range1_offset,
+			range_size);
+		FAIL_IF(bar1_2_db_fd < 0, "Can't create DMABUF, %d\n", errno);
+
+		volatile uint32_t *dbbar1_2 =
+			mmap_resource(map_size, bar1_2_db_fd, 0);
+
+		printf("-i- Mapped DMABUF Region 1 alias at %p+0x%lx\n",
+		       dbbar1_2, map_size);
+		FAIL_IF(dbbar1_2[0] != dbbar1[range0_offset / 4],
+			"slice2 value mismatch\n");
+
+		dbbar1[(range0_offset + 4) / 4] = 0xfacef00d;
+		/* Check we can see the value written above at +offset
+		 * from offset 0 of this mapping (since the DMABUF
+		 * itself is offsetted):
+		 */
+		v = dbbar1_2[4 / 4];
+		FAIL_IF(v != 0xfacef00d,
+			"DB Region 1 alias read: Magic value %08x incorrect\n",
+			v);
+		printf("-i- DB Region 1 alias read: Magic 0x%08x, OK\n", v);
+
+		/* Read back the known values across the two
+		 * sub-ranges of the dbbar1_2 mapping, accounting for
+		 * the physical pages skipped between them
+		 */
+		for (unsigned long i = 0; i < range_size; i += getpagesize()) {
+			unsigned long t = i + range0_offset;
+			uint32_t want = (0xca77face ^ t);
+
+			v = dbbar1_2[i / 4];
+			FAIL_IF(v != want,
+				"Expected %08x (got %08x) from range0 +%08lx (real %08lx)\n",
+				want, v, i, t);
+		}
+		for (unsigned long i = range_size; i < (range_size * 2);
+		     i += getpagesize()) {
+			unsigned long t = i + range1_offset - range_size;
+			uint32_t want = (0xca77face ^ t);
+
+			v = dbbar1_2[i / 4];
+			FAIL_IF(v != want,
+				"Expected %08x (got %08x) from range1 +%08lx (real %08lx)\n",
+				want, v, i, t);
+		}
+
+		printf("\nTEST: Third BAR aliasing mapping, testing mmap() non-zero offset:\n");
+
+		unsigned long smaller = range_size - 0x1000;
+		volatile uint32_t *dbbar1_3 = mmap_resource_aligned(
+			smaller, MiB(32), bar1_2_db_fd, range_size);
+		printf("-i- Mapped DMABUF Region 1 range 1 alias at %p+0x%lx\n",
+		       dbbar1_3, smaller);
+
+		for (unsigned long i = 0; i < smaller; i += getpagesize()) {
+			unsigned long t = i + range1_offset;
+			uint32_t want = (0xca77face ^ t);
+
+			v = dbbar1_3[i / 4];
+			FAIL_IF(v != want,
+				"Expected %08x (got %08x) from 3rd range1 +%08lx (real %08lx)\n",
+				want, v, i, t);
+		}
+		printf("-i- mmap offset OK\n");
+
+		/* TODO: If we can observe hugepages (mechanically,
+		 * rather than human reading debug), we can test
+		 * interesting alignment cases for the PFN search:
+		 *
+		 * - Deny hugepages at start/end of an mmap() that
+		 *   starts/ends at non-HP-aligned addresses
+		 *   (e.g. first pages are small, middle is fully
+		 *   aligned in VA and PFN so 2M, and buffer finishes
+		 *   before 2M boundary, so last pages are small).
+		 *
+		 * - Everything aligned nicely except the mmap() size
+		 *   is <2MB, so hugepage denied due to straddling
+		 *   end.
+		 *
+		 * - Buffer offsets into BAR not aligned, so no huge
+		 *   mappings even if mmap() is perfectly aligned.
+		 */
+
+		/* Check that access after DMABUF fd close still works
+		 * (VMA still holds refcount, obvs!)
+		 */
+		close(bar1_2_db_fd);
+		if (sigsetjmp(jmpbuf, 1) == 0)
+			v = dbbar1_2[0x4 / 4];
+		else
+			FAIL_IF(true,
+				"Expecting original DMABUF mapping to still work!\n");
+		printf("-i- DB Region 1 alias read 2: Magic 0x%08x, OK\n", v);
+		printf("-i- Offset check OK\n");
+	}
+
+	printf("\nTEST: Shutdown: close VFIO container/device fds, check DMABUF gone\n");
+
+	/* Final use of dev_fd: use it to try to revoke a non-DMABUF fd: */
+	r = revoke_dmabuf(dev_fd, 1);
+	FAIL_IF(r != -1 || errno != EINVAL,
+		"Expecting revoke of stdout to give EINVAL, got %d errno %d\n",
+		r, errno);
+	printf("-i- Correctly failed final revoke\n");
+
+	/* Closing all uses of dev_fd (including the VFIO BAR mmap()!)
+	 * will revoke the DMABUF; even though the DMABUF fd might
+	 * remain open, the mapping itself is zapped. Start with a
+	 * plain close (before unmapping the VFIO BAR mapping):
+	 */
+	close(dev_fd);
+	close(container_fd);
+	printf("-i- VFIO fds closed\n");
+
+	if (sigsetjmp(jmpbuf, 1) == 0)
+		check_mmio(dbbar0_2);
+	else
+		FAIL_IF(true,
+			"Expecting DMABUF mapping to still work if VFIO mapping still live!\n");
+
+	if (sigsetjmp(jmpbuf, 1) == 0)
+		check_mmio(vfiobar);
+	else
+		FAIL_IF(true,
+			"Expecting VFIO BAR mapping to still work after fd close!\n");
+
+	munmap((void *)vfiobar, bar_region[0].size);
+	printf("-i- VFIO BAR unmapped\n");
+
+	/* The final reference via VFIO should now be gone, and the
+	 * DMABUF should now be destroyed.  The mapping of it should
+	 * be inaccessible:
+	 */
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		check_mmio(dbbar0_2);
+		FAIL_IF(true,
+			"Expecting DMABUF mapping to fault after VFIO fd shutdown!\n");
+	}
+	printf("-i- DMABUF mappings inaccessible\n");
+
+	/* Ensure we can't mmap() DMABUF for closed device */
+	void *dbfail2 = mmap(0, bar_region[1].size, PROT_READ | PROT_WRITE,
+			     MAP_SHARED, bar_db_fd_2, 0);
+	FAIL_IF(dbfail2 != MAP_FAILED, "mmap() should fail\n");
+	printf("-i- Can't mmap DMABUF for closed device, OK\n");
+
+	munmap((void *)dbbar0_2, bar_region[0].size);
+	close(bar_db_fd_2);
+
+	printf("\nPASS\n");
+
+	return 0;
+}
+
+static void usage(char *me)
+{
+	printf("Usage:\t%s -g <group_number> -r <RID/BDF>\n"
+	       "\n"
+	       "\t\tGroup is found via device path, e.g. cat /sys/bus/pci/devices/0000:03:1d.0/iommu_group\n"
+	       "\t\tRID is of the form 0000:03:1d.0\n"
+	       "\n",
+	       me);
+}
+
+int main(int argc, char *argv[])
+{
+	/* Get args: IOMMU group and BDF/path */
+	int groupnr = -1;
+	char *rid_str = NULL;
+	int arg;
+
+	while ((arg = getopt(argc, argv, "g:r:h")) != -1) {
+		switch (arg) {
+		case 'g':
+			groupnr = atoi(optarg);
+			break;
+
+		case 'r':
+			rid_str = strdup(optarg);
+			break;
+		case 'h':
+		default:
+			usage(argv[0]);
+			return 1;
+		}
+	}
+
+	if (rid_str == NULL || groupnr == -1) {
+		usage(argv[0]);
+		return 1;
+	}
+
+	printf("-i- Using group number %d, RID '%s'\n", groupnr, rid_str);
+
+	return vfio_dmabuf_test(groupnr, rid_str);
+}
-- 
2.47.3


  parent reply	other threads:[~2026-03-12 18:47 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-12 18:45 [RFC v2 PATCH 00/10] vfio/pci: Add mmap() for DMABUFs Matt Evans
2026-03-12 18:45 ` [RFC v2 PATCH 01/10] vfio/pci: Set up VFIO barmap before creating a DMABUF Matt Evans
2026-03-12 18:46 ` [RFC v2 PATCH 02/10] vfio/pci: Clean up DMABUFs before disabling function Matt Evans
2026-03-12 18:46 ` [RFC v2 PATCH 03/10] vfio/pci: Add helper to look up PFNs for DMABUFs Matt Evans
2026-03-12 18:46 ` [RFC v2 PATCH 04/10] vfio/pci: Add a helper to create a DMABUF for a BAR-map VMA Matt Evans
2026-03-18 20:04   ` Alex Williamson
2026-03-23 13:25     ` Jason Gunthorpe
2026-03-23 14:55     ` Matt Evans
2026-03-12 18:46 ` [RFC v2 PATCH 05/10] vfio/pci: Convert BAR mmap() to use a DMABUF Matt Evans
2026-03-12 18:46 ` [RFC v2 PATCH 06/10] vfio/pci: Remove vfio_pci_zap_bars() Matt Evans
2026-03-13  9:12   ` Christian König
2026-03-12 18:46 ` [RFC v2 PATCH 07/10] vfio/pci: Support mmap() of a VFIO DMABUF Matt Evans
2026-03-12 18:46 ` [RFC v2 PATCH 08/10] vfio/pci: Permanently revoke a DMABUF on request Matt Evans
2026-03-12 18:46 ` [RFC v2 PATCH 09/10] vfio/pci: Add mmap() attributes to DMABUF feature Matt Evans
2026-03-12 18:46 ` Matt Evans [this message]
2026-03-13  9:21 ` [RFC v2 PATCH 00/10] vfio/pci: Add mmap() for DMABUFs Christian König
2026-03-13 13:28   ` Matt Evans

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260312184613.3710705-11-mattev@meta.com \
    --to=mattev@meta.com \
    --cc=alex@shazbot.org \
    --cc=amastro@fb.com \
    --cc=ankita@nvidia.com \
    --cc=apopple@nvidia.com \
    --cc=bjorn@kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=dmatlack@google.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=jgg@nvidia.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=leon@kernel.org \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=mngyadam@amazon.de \
    --cc=praan@google.com \
    --cc=sumit.semwal@linaro.org \
    --cc=vivek.kasireddy@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox