All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matt Evans <mattev@meta.com>
To: "Alex Williamson" <alex@shazbot.org>,
	"Leon Romanovsky" <leon@kernel.org>,
	"Jason Gunthorpe" <jgg@nvidia.com>,
	"Alex Mastro" <amastro@fb.com>,
	"Christian König" <christian.koenig@amd.com>
Cc: "Mahmoud Adam" <mngyadam@amazon.de>,
	"David Matlack" <dmatlack@google.com>,
	"Björn Töpel" <bjorn@kernel.org>,
	"Sumit Semwal" <sumit.semwal@linaro.org>,
	"Kevin Tian" <kevin.tian@intel.com>,
	"Ankit Agrawal" <ankita@nvidia.com>,
	"Pranjal Shrivastava" <praan@google.com>,
	"Alistair Popple" <apopple@nvidia.com>,
	"Vivek Kasireddy" <vivek.kasireddy@intel.com>,
	linux-kernel@vger.kernel.org, linux-media@vger.kernel.org,
	dri-devel@lists.freedesktop.org, linaro-mm-sig@lists.linaro.org,
	kvm@vger.kernel.org
Subject: [PATCH 6/9] vfio/pci: Clean up BAR zap and revocation
Date: Thu, 16 Apr 2026 06:17:49 -0700	[thread overview]
Message-ID: <20260416131815.2729131-7-mattev@meta.com> (raw)
In-Reply-To: <20260416131815.2729131-1-mattev@meta.com>

Previously, vfio_pci_zap_bars() (and the wrapper
vfio_pci_zap_and_down_write_memory_lock()) calls were paired with
calls of vfio_pci_dma_buf_move().

This commit replaces them a unified new function,
vfio_pci_zap_revoke_bars() containing both the vfio_pci_dma_buf_move()
and the unmap_mapping_range(), making it harder for callers to omit
one.  It adds a wrapper, vfio_pci_lock_zap_revoke_bars(), which takes
the write memory_lock before zapping, and adds a new
vfio_pci_unrevoke_bars() for the re-enable path.

However, as of "vfio/pci: Convert BAR mmap() to use a DMABUF" the
unmap_mapping_range() to zap is entirely redundant for plain vfio-pci,
since the DMABUFs used for BAR mappings already zap PTEs when the
vfio_pci_dma_buf_move() occurs.

One exception remains as a FIXME: in nvgrace-gpu, some BAR VMAs
conditionally use custom vm_ops, which have not moved to be backed by
DMABUFs.  If these BARs are mmap()ed, the vdev enables the existing
behaviour of unmap_mapping_range() for the device fd address space.

Signed-off-by: Matt Evans <mattev@meta.com>
---
 drivers/vfio/pci/nvgrace-gpu/main.c |  5 +++
 drivers/vfio/pci/vfio_pci_config.c  | 30 ++++++--------
 drivers/vfio/pci/vfio_pci_core.c    | 62 +++++++++++++++++++----------
 drivers/vfio/pci/vfio_pci_priv.h    |  3 +-
 include/linux/vfio_pci_core.h       |  1 +
 5 files changed, 62 insertions(+), 39 deletions(-)

diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index c1df437754f9..5304d15b9a2b 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -358,6 +358,8 @@ static int nvgrace_gpu_mmap(struct vfio_device *core_vdev,
 	struct nvgrace_gpu_pci_core_device *nvdev =
 		container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
 			     core_device.vdev);
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
 	struct mem_region *memregion;
 	u64 req_len, pgoff, end;
 	unsigned int index;
@@ -368,6 +370,9 @@ static int nvgrace_gpu_mmap(struct vfio_device *core_vdev,
 	if (!memregion)
 		return vfio_pci_core_mmap(core_vdev, vma);
 
+	/* Non-DMABUF BAR mappings need an extra zap */
+	vdev->bar_needs_zap = true;
+
 	/*
 	 * Request to mmap the BAR. Map to the CPU accessible memory on the
 	 * GPU using the memory information gathered from the system ACPI
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index a10ed733f0e3..8bfab0da481c 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -590,12 +590,10 @@ static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,
 		virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY);
 		new_mem = !!(new_cmd & PCI_COMMAND_MEMORY);
 
-		if (!new_mem) {
-			vfio_pci_zap_and_down_write_memory_lock(vdev);
-			vfio_pci_dma_buf_move(vdev, true);
-		} else {
+		if (!new_mem)
+			vfio_pci_lock_zap_revoke_bars(vdev);
+		else
 			down_write(&vdev->memory_lock);
-		}
 
 		/*
 		 * If the user is writing mem/io enable (new_mem/io) and we
@@ -631,7 +629,7 @@ static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,
 		*virt_cmd |= cpu_to_le16(new_cmd & mask);
 
 		if (__vfio_pci_memory_enabled(vdev))
-			vfio_pci_dma_buf_move(vdev, false);
+			vfio_pci_unrevoke_bars(vdev);
 		up_write(&vdev->memory_lock);
 	}
 
@@ -712,16 +710,14 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
 static void vfio_lock_and_set_power_state(struct vfio_pci_core_device *vdev,
 					  pci_power_t state)
 {
-	if (state >= PCI_D3hot) {
-		vfio_pci_zap_and_down_write_memory_lock(vdev);
-		vfio_pci_dma_buf_move(vdev, true);
-	} else {
+	if (state >= PCI_D3hot)
+		vfio_pci_lock_zap_revoke_bars(vdev);
+	else
 		down_write(&vdev->memory_lock);
-	}
 
 	vfio_pci_set_power_state(vdev, state);
 	if (__vfio_pci_memory_enabled(vdev))
-		vfio_pci_dma_buf_move(vdev, false);
+		vfio_pci_unrevoke_bars(vdev);
 	up_write(&vdev->memory_lock);
 }
 
@@ -908,11 +904,10 @@ static int vfio_exp_config_write(struct vfio_pci_core_device *vdev, int pos,
 						 &cap);
 
 		if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) {
-			vfio_pci_zap_and_down_write_memory_lock(vdev);
-			vfio_pci_dma_buf_move(vdev, true);
+			vfio_pci_lock_zap_revoke_bars(vdev);
 			pci_try_reset_function(vdev->pdev);
 			if (__vfio_pci_memory_enabled(vdev))
-				vfio_pci_dma_buf_move(vdev, false);
+				vfio_pci_unrevoke_bars(vdev);
 			up_write(&vdev->memory_lock);
 		}
 	}
@@ -993,11 +988,10 @@ static int vfio_af_config_write(struct vfio_pci_core_device *vdev, int pos,
 						&cap);
 
 		if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) {
-			vfio_pci_zap_and_down_write_memory_lock(vdev);
-			vfio_pci_dma_buf_move(vdev, true);
+			vfio_pci_lock_zap_revoke_bars(vdev);
 			pci_try_reset_function(vdev->pdev);
 			if (__vfio_pci_memory_enabled(vdev))
-				vfio_pci_dma_buf_move(vdev, false);
+				vfio_pci_unrevoke_bars(vdev);
 			up_write(&vdev->memory_lock);
 		}
 	}
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index c00a61d61250..464b63585bef 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -319,8 +319,7 @@ static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev,
 	 * The vdev power related flags are protected with 'memory_lock'
 	 * semaphore.
 	 */
-	vfio_pci_zap_and_down_write_memory_lock(vdev);
-	vfio_pci_dma_buf_move(vdev, true);
+	vfio_pci_lock_zap_revoke_bars(vdev);
 
 	if (vdev->pm_runtime_engaged) {
 		up_write(&vdev->memory_lock);
@@ -406,7 +405,7 @@ static void vfio_pci_runtime_pm_exit(struct vfio_pci_core_device *vdev)
 	down_write(&vdev->memory_lock);
 	__vfio_pci_runtime_pm_exit(vdev);
 	if (__vfio_pci_memory_enabled(vdev))
-		vfio_pci_dma_buf_move(vdev, false);
+		vfio_pci_unrevoke_bars(vdev);
 	up_write(&vdev->memory_lock);
 }
 
@@ -1229,7 +1228,7 @@ static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev,
 	if (!vdev->reset_works)
 		return -EINVAL;
 
-	vfio_pci_zap_and_down_write_memory_lock(vdev);
+	vfio_pci_lock_zap_revoke_bars(vdev);
 
 	/*
 	 * This function can be invoked while the power state is non-D0. If
@@ -1242,10 +1241,9 @@ static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev,
 	 */
 	vfio_pci_set_power_state(vdev, PCI_D0);
 
-	vfio_pci_dma_buf_move(vdev, true);
 	ret = pci_try_reset_function(vdev->pdev);
 	if (__vfio_pci_memory_enabled(vdev))
-		vfio_pci_dma_buf_move(vdev, false);
+		vfio_pci_unrevoke_bars(vdev);
 	up_write(&vdev->memory_lock);
 
 	return ret;
@@ -1613,20 +1611,44 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_write);
 
-static void vfio_pci_zap_bars(struct vfio_pci_core_device *vdev)
+static void vfio_pci_zap_revoke_bars(struct vfio_pci_core_device *vdev)
 {
-	struct vfio_device *core_vdev = &vdev->vdev;
-	loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX);
-	loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX);
-	loff_t len = end - start;
+	lockdep_assert_held_write(&vdev->memory_lock);
+	vfio_pci_dma_buf_move(vdev, true);
 
-	unmap_mapping_range(core_vdev->inode->i_mapping, start, len, true);
+	/*
+	 * All VFIO PCI BARs are backed by DMABUFs, with the current
+	 * exception of the nvgrace-gpu device which uses its own
+	 * vm_ops for a subset of BARs.  For this, BAR mappings are
+	 * still made in the vdev's address_space, and a zap is
+	 * required.  The tracking is crude, and will (harmlessly)
+	 * continue to zap if the special BAR is unmapped, but that
+	 * behaviour isn't the common case.
+	 *
+	 * FIXME: This can go away if the special nvgrace-gpu mapping
+	 * is converted to use DMABUF.
+	 */
+	if (vdev->bar_needs_zap) {
+		struct vfio_device *core_vdev = &vdev->vdev;
+		loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX);
+		loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX);
+		loff_t len = end - start;
+
+		unmap_mapping_range(core_vdev->inode->i_mapping,
+				    start, len, true);
+	}
 }
 
-void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev)
+void vfio_pci_lock_zap_revoke_bars(struct vfio_pci_core_device *vdev)
 {
 	down_write(&vdev->memory_lock);
-	vfio_pci_zap_bars(vdev);
+	vfio_pci_zap_revoke_bars(vdev);
+}
+
+void vfio_pci_unrevoke_bars(struct vfio_pci_core_device *vdev)
+{
+	lockdep_assert_held_write(&vdev->memory_lock);
+	vfio_pci_dma_buf_move(vdev, false);
 }
 
 u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev)
@@ -2480,9 +2502,10 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 		}
 
 		/*
-		 * Take the memory write lock for each device and zap BAR
-		 * mappings to prevent the user accessing the device while in
-		 * reset.  Locking multiple devices is prone to deadlock,
+		 * Take the memory write lock for each device and
+		 * zap/revoke BAR mappings to prevent the user (or
+		 * peers) accessing the device while in reset.
+		 * Locking multiple devices is prone to deadlock,
 		 * runaway and unwind if we hit contention.
 		 */
 		if (!down_write_trylock(&vdev->memory_lock)) {
@@ -2490,8 +2513,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 			break;
 		}
 
-		vfio_pci_dma_buf_move(vdev, true);
-		vfio_pci_zap_bars(vdev);
+		vfio_pci_zap_revoke_bars(vdev);
 	}
 
 	if (!list_entry_is_head(vdev,
@@ -2521,7 +2543,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 	list_for_each_entry_from_reverse(vdev, &dev_set->device_list,
 					 vdev.dev_set_list) {
 		if (vdev->vdev.open_count && __vfio_pci_memory_enabled(vdev))
-			vfio_pci_dma_buf_move(vdev, false);
+			vfio_pci_unrevoke_bars(vdev);
 		up_write(&vdev->memory_lock);
 	}
 
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index 868a54ba482c..a8edbee6ce56 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -82,7 +82,8 @@ void vfio_config_free(struct vfio_pci_core_device *vdev);
 int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev,
 			     pci_power_t state);
 
-void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev);
+void vfio_pci_lock_zap_revoke_bars(struct vfio_pci_core_device *vdev);
+void vfio_pci_unrevoke_bars(struct vfio_pci_core_device *vdev);
 u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev);
 void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev,
 					u16 cmd);
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 2ea4e773c121..c1cd67741125 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -127,6 +127,7 @@ struct vfio_pci_core_device {
 	bool			needs_pm_restore:1;
 	bool			pm_intx_masked:1;
 	bool			pm_runtime_engaged:1;
+	bool			bar_needs_zap:1;
 	struct pci_saved_state	*pci_saved_state;
 	struct pci_saved_state	*pm_save;
 	int			ioeventfds_nr;
-- 
2.47.3


  parent reply	other threads:[~2026-04-17  7:06 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-16 13:17 [PATCH 0/9] vfio/pci: Add mmap() for DMABUFs Matt Evans
2026-04-16 13:17 ` [PATCH 1/9] vfio/pci: Fix vfio_pci_dma_buf_cleanup() double-put Matt Evans
2026-04-24 18:05   ` Jason Gunthorpe
2026-05-01 19:12   ` Alex Williamson
2026-05-06 13:53     ` Matt Evans
2026-05-06 15:29       ` Leon Romanovsky
2026-05-06 15:55         ` Matt Evans
2026-05-06 16:14           ` Leon Romanovsky
2026-05-06 16:42             ` Matt Evans
2026-04-16 13:17 ` [PATCH 2/9] vfio/pci: Add a helper to look up PFNs for DMABUFs Matt Evans
2026-04-24 18:15   ` Jason Gunthorpe
2026-05-07 15:48     ` Matt Evans
2026-04-16 13:17 ` [PATCH 3/9] vfio/pci: Add a helper to create a DMABUF for a BAR-map VMA Matt Evans
2026-04-24 18:24   ` Jason Gunthorpe
2026-04-30 16:47     ` Matt Evans
2026-04-30 17:11       ` Jason Gunthorpe
2026-05-05 18:13         ` Matt Evans
2026-05-06 19:03           ` Matt Evans
2026-04-16 13:17 ` [PATCH 4/9] vfio/pci: Convert BAR mmap() to use a DMABUF Matt Evans
2026-05-01 22:19   ` Alex Williamson
2026-05-04  7:40     ` Jason Gunthorpe
2026-05-05 10:49       ` Leon Romanovsky
2026-05-05 14:50         ` Alex Williamson
2026-05-05 14:59           ` Jason Gunthorpe
2026-05-06  5:35           ` Leon Romanovsky
2026-05-14 17:52             ` Matt Evans
2026-04-16 13:17 ` [PATCH 5/9] vfio/pci: Provide a user-facing name for BAR mappings Matt Evans
2026-04-24 18:26   ` Jason Gunthorpe
2026-05-01 22:44   ` Alex Williamson
2026-05-07 16:56     ` Matt Evans
2026-05-07 17:17       ` Matt Evans
2026-04-16 13:17 ` Matt Evans [this message]
2026-05-01 23:19   ` [PATCH 6/9] vfio/pci: Clean up BAR zap and revocation Alex Williamson
2026-05-05 10:58     ` Leon Romanovsky
2026-04-16 13:17 ` [PATCH 7/9] vfio/pci: Support mmap() of a VFIO DMABUF Matt Evans
2026-04-24 18:30   ` Jason Gunthorpe
2026-05-07 16:09     ` Matt Evans
2026-04-16 13:17 ` [PATCH 8/9] vfio/pci: Permanently revoke a DMABUF on request Matt Evans
2026-04-16 13:17 ` [PATCH 9/9] vfio/pci: Add mmap() attributes to DMABUF feature Matt Evans
2026-04-24 18:31   ` Jason Gunthorpe
2026-04-26 10:52     ` Leon Romanovsky
2026-04-27 14:36       ` Alex Williamson
2026-05-11 15:30         ` Matt Evans
2026-05-11 17:51           ` Leon Romanovsky
2026-05-11 20:09           ` Alex Williamson
2026-05-12 17:51             ` Matt Evans
2026-05-13 18:27               ` Alex Williamson
2026-05-14 13:55                 ` Matt Evans

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260416131815.2729131-7-mattev@meta.com \
    --to=mattev@meta.com \
    --cc=alex@shazbot.org \
    --cc=amastro@fb.com \
    --cc=ankita@nvidia.com \
    --cc=apopple@nvidia.com \
    --cc=bjorn@kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=dmatlack@google.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=jgg@nvidia.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=leon@kernel.org \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=mngyadam@amazon.de \
    --cc=praan@google.com \
    --cc=sumit.semwal@linaro.org \
    --cc=vivek.kasireddy@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.