* [PATCH 2/2] vfio/pci: Serialise vfio_pci_core_setup_barmap()
2026-04-15 18:14 [PATCH 1/2] vfio/pci: Set up VFIO barmap before creating a DMABUF Matt Evans
@ 2026-04-15 18:14 ` Matt Evans
2026-04-16 22:44 ` [PATCH 1/2] vfio/pci: Set up VFIO barmap before creating a DMABUF Alex Williamson
1 sibling, 0 replies; 5+ messages in thread
From: Matt Evans @ 2026-04-15 18:14 UTC (permalink / raw)
To: Alex Williamson, Ankit Agrawal, Jason Gunthorpe, Yishai Hadas,
Shameer Kolothum, Kevin Tian
Cc: Alistair Popple, Leon Romanovsky, Vivek Kasireddy, Kees Cook,
Zhi Wang, Peter Xu, Alexey Kardashevskiy, Eric Auger, kvm,
linux-kernel, virtualization
vfio_pci_core_setup_barmap() is used in a couple of paths
(vfio_pci_bar_rw(), mmap()) to ensure BARs are mapped before access,
and these paths could execute concurrently. Concurrent execution of
vfio_pci_core_setup_barmap() could lead to some callers getting
-EBUSY, which would be treated as fatal.
Introduce a new vfio_pci_core_lock_setup_barmap() function, which
takes the vdev->memory_lock for write across BAR initialization.
Current in-kernel use moves to this.
The existing (exported!) vfio_pci_core_setup_barmap() keeps its
'unlocked' behaviour.
Fixes: 7f5764e179c6 ("vfio: use vfio_pci_core_setup_barmap to map bar in mmap")
Fixes: 0d77ed3589ac0 ("vfio/pci: Pull BAR mapping setup from read-write path")
Signed-off-by: Matt Evans <mattev@meta.com>
---
drivers/vfio/pci/nvgrace-gpu/main.c | 2 +-
drivers/vfio/pci/vfio_pci_core.c | 2 +-
drivers/vfio/pci/vfio_pci_dmabuf.c | 2 +-
drivers/vfio/pci/vfio_pci_rdwr.c | 43 +++++++++++++++++++++++++----
drivers/vfio/pci/virtio/legacy_io.c | 2 +-
include/linux/vfio_pci_core.h | 1 +
6 files changed, 42 insertions(+), 10 deletions(-)
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index fa056b69f899..c1df437754f9 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -189,7 +189,7 @@ static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
* register reads on first fault before establishing any GPU
* memory mapping.
*/
- ret = vfio_pci_core_setup_barmap(vdev, 0);
+ ret = vfio_pci_core_lock_setup_barmap(vdev, 0);
if (ret)
goto error_exit;
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 3f8d093aacf8..4e9091e5fcc2 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1764,7 +1764,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
* Even though we don't make use of the barmap for the mmap,
* we need to request the region and the barmap tracks that.
*/
- ret = vfio_pci_core_setup_barmap(vdev, index);
+ ret = vfio_pci_core_lock_setup_barmap(vdev, index);
if (ret)
return ret;
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index fefe7cf4256b..281ba7d69567 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -277,7 +277,7 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
* were requested before returning DMABUFs that reference
* them. Barmap setup does this:
*/
- ret = vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index);
+ ret = vfio_pci_core_lock_setup_barmap(vdev, get_dma_buf.region_index);
if (ret)
goto err_free_phys;
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 4251ee03e146..11e155acf8ef 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -198,15 +198,12 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
-int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
+static int __vfio_pci_core_iomap_barmap(struct vfio_pci_core_device *vdev, int bar)
{
struct pci_dev *pdev = vdev->pdev;
int ret;
void __iomem *io;
- if (vdev->barmap[bar])
- return 0;
-
ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
if (ret)
return ret;
@@ -221,6 +218,40 @@ int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
return 0;
}
+
+int vfio_pci_core_lock_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
+{
+ int ret;
+
+ lockdep_assert_not_held(&vdev->memory_lock);
+
+ if (likely(READ_ONCE(vdev->barmap[bar])))
+ return 0;
+
+ down_write(&vdev->memory_lock);
+ if (unlikely(READ_ONCE(vdev->barmap[bar]))) {
+ up_write(&vdev->memory_lock);
+ return 0;
+ }
+
+ ret = __vfio_pci_core_iomap_barmap(vdev, bar);
+ up_write(&vdev->memory_lock);
+
+ return ret;
+}
+
+int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
+{
+ /*
+ * An external caller must prevent concurrent calls of this,
+ * including via other VFIO-internal paths (for example, by
+ * holding vdev->memory_lock).
+ */
+ if (vdev->barmap[bar])
+ return 0;
+
+ return __vfio_pci_core_iomap_barmap(vdev, bar);
+}
EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
@@ -274,7 +305,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
*/
max_width = VFIO_PCI_IO_WIDTH_4;
} else {
- int ret = vfio_pci_core_setup_barmap(vdev, bar);
+ int ret = vfio_pci_core_lock_setup_barmap(vdev, bar);
if (ret) {
done = ret;
goto out;
@@ -452,7 +483,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
if (count == 8)
return -EINVAL;
- ret = vfio_pci_core_setup_barmap(vdev, bar);
+ ret = vfio_pci_core_lock_setup_barmap(vdev, bar);
if (ret)
return ret;
diff --git a/drivers/vfio/pci/virtio/legacy_io.c b/drivers/vfio/pci/virtio/legacy_io.c
index 1ed349a55629..c77064e3f5c4 100644
--- a/drivers/vfio/pci/virtio/legacy_io.c
+++ b/drivers/vfio/pci/virtio/legacy_io.c
@@ -305,7 +305,7 @@ static int virtiovf_set_notify_addr(struct virtiovf_pci_core_device *virtvdev)
* Setup the BAR where the 'notify' exists to be used by vfio as well
* This will let us mmap it only once and use it when needed.
*/
- ret = vfio_pci_core_setup_barmap(core_device,
+ ret = vfio_pci_core_lock_setup_barmap(core_device,
virtvdev->notify_bar);
if (ret)
return ret;
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 2ebba746c18f..2ea4e773c121 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -189,6 +189,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev);
void vfio_pci_core_disable(struct vfio_pci_core_device *vdev);
void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev);
int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar);
+int vfio_pci_core_lock_setup_barmap(struct vfio_pci_core_device *vdev, int bar);
pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
pci_channel_state_t state);
ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
--
2.47.3
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH 1/2] vfio/pci: Set up VFIO barmap before creating a DMABUF
2026-04-15 18:14 [PATCH 1/2] vfio/pci: Set up VFIO barmap before creating a DMABUF Matt Evans
2026-04-15 18:14 ` [PATCH 2/2] vfio/pci: Serialise vfio_pci_core_setup_barmap() Matt Evans
@ 2026-04-16 22:44 ` Alex Williamson
2026-04-17 5:16 ` Tian, Kevin
1 sibling, 1 reply; 5+ messages in thread
From: Alex Williamson @ 2026-04-16 22:44 UTC (permalink / raw)
To: Matt Evans
Cc: Ankit Agrawal, Jason Gunthorpe, Yishai Hadas, Shameer Kolothum,
Kevin Tian, Alistair Popple, Leon Romanovsky, Vivek Kasireddy,
Kees Cook, Zhi Wang, Peter Xu, Alexey Kardashevskiy, Eric Auger,
kvm, linux-kernel, virtualization, alex
On Wed, 15 Apr 2026 11:14:22 -0700
Matt Evans <mattev@meta.com> wrote:
> A DMABUF exports access to BAR resources which need to be requested
> before the DMABUF is handed out. Usually the resources are requested
> when setting up the barmap when the VFIO device fd is mmap()ed, but
> there's no guarantee that's done before a DMABUF is created.
>
> Set up the barmap (and so request resources) in the DMABUF-creation
> path.
>
> Fixes: 5d74781ebc86c ("vfio/pci: Add dma-buf export support for MMIO regions")
> Signed-off-by: Matt Evans <mattev@meta.com>
> ---
> drivers/vfio/pci/vfio_pci_dmabuf.c | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
> index 4ccaf3531e02..fefe7cf4256b 100644
> --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> @@ -272,6 +272,15 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
> goto err_free_priv;
> }
>
> + /*
> + * See comment in vfio_pci_core_mmap(); ensure PCI regions
> + * were requested before returning DMABUFs that reference
> + * them. Barmap setup does this:
> + */
> + ret = vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index);
> + if (ret)
> + goto err_free_phys;
> +
> priv->vdev = vdev;
> priv->nr_ranges = get_dma_buf.nr_ranges;
> priv->size = length;
Wouldn't this get a lot easier if we just setup all the barmaps in
vfio_pci_core_enable(), conditional on pci_resource_len() just like we
use to filter in REGION_INFO?
I don't recall if there's some reason we've avoid this so far, maybe
others can shout it out if they do.
We already tear them all down in vfio_pci_core_disable(). It would be
a small patch to add that, which we would mark as Fixes:, then a small
follow-up on top of that that removes any then redundant or unnecessary
callers (all of them). Thoughts? Thanks,
Alex
BTW, cover letters are appreciated for multi-part series.
^ permalink raw reply [flat|nested] 5+ messages in thread