* [PATCH v4 1/3] vfio/pci: Set up BAR resources and maps in vfio_pci_core_enable()
2026-05-05 17:38 [PATCH v4 0/3] vfio/pci: Request resources and map BARs at enable time Matt Evans
@ 2026-05-05 17:38 ` Matt Evans
2026-05-05 17:38 ` [PATCH v4 2/3] vfio/pci: Check BAR resources before exporting a DMABUF Matt Evans
2026-05-05 17:38 ` [PATCH v4 3/3] vfio/pci: Replace vfio_pci_core_setup_barmap() with vfio_pci_core_get_iomap() Matt Evans
2 siblings, 0 replies; 4+ messages in thread
From: Matt Evans @ 2026-05-05 17:38 UTC (permalink / raw)
To: Alex Williamson, Kevin Tian, Jason Gunthorpe, Ankit Agrawal,
Alistair Popple, Leon Romanovsky, Kees Cook, Shameer Kolothum,
Yishai Hadas
Cc: Alexey Kardashevskiy, Eric Auger, Peter Xu, Vivek Kasireddy,
Zhi Wang, kvm, linux-kernel, virtualization
Previously BAR resource requests and the corresponding pci_iomap()
were performed on-demand and without synchronisation, which was racy.
Rather than add synchronisation, it's simplest to address this by
doing both activities from vfio_pci_core_enable().
The resource allocation and/or pci_iomap() can still fail; their
status is tracked and existing calls to vfio_pci_core_setup_barmap()
will fail in a similar way to before. This keeps the point of failure
as observed by userspace the same, i.e. failures to request/map unused
BARs are benign.
Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver")
Signed-off-by: Matt Evans <mattev@meta.com>
---
drivers/vfio/pci/vfio_pci_core.c | 36 +++++++++++++++++++++++++++++++-
drivers/vfio/pci/vfio_pci_rdwr.c | 26 +++++++----------------
2 files changed, 42 insertions(+), 20 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 3f8d093aacf8..62931dc381d8 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -482,6 +482,39 @@ static int vfio_pci_core_runtime_resume(struct device *dev)
}
#endif /* CONFIG_PM */
+/*
+ * Eager-request BAR resources, and iomap them. Soft failures are
+ * allowed, and consumers must check the barmap before use in order to
+ * give compatible user-visible behaviour with the previous on-demand
+ * allocation method.
+ */
+static void vfio_pci_core_map_bars(struct vfio_pci_core_device *vdev)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ int i;
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ int bar = i + PCI_STD_RESOURCES;
+
+ vdev->barmap[bar] = ERR_PTR(-ENODEV);
+
+ if (!pci_resource_len(pdev, i))
+ continue;
+
+ if (pci_request_selected_regions(pdev, 1 << bar, "vfio")) {
+ pci_dbg(vdev->pdev, "Failed to reserve region %d\n", bar);
+ vdev->barmap[bar] = ERR_PTR(-EBUSY);
+ continue;
+ }
+
+ vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
+ if (!vdev->barmap[bar]) {
+ pci_dbg(vdev->pdev, "Failed to iomap region %d\n", bar);
+ vdev->barmap[bar] = ERR_PTR(-ENOMEM);
+ }
+ }
+}
+
/*
* The pci-driver core runtime PM routines always save the device state
* before going into suspended state. If the device is going into low power
@@ -568,6 +601,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
+ vfio_pci_core_map_bars(vdev);
return 0;
@@ -648,7 +682,7 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
bar = i + PCI_STD_RESOURCES;
- if (!vdev->barmap[bar])
+ if (IS_ERR_OR_NULL(vdev->barmap[bar]))
continue;
pci_iounmap(pdev, vdev->barmap[bar]);
pci_release_selected_regions(pdev, 1 << bar);
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 4251ee03e146..3bfbb879a005 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -198,27 +198,15 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
+/*
+ * The barmap is set up in vfio_pci_core_enable(). Callers use this
+ * function to check that the BAR resources are requested or that the
+ * pci_iomap() was done.
+ */
int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
{
- struct pci_dev *pdev = vdev->pdev;
- int ret;
- void __iomem *io;
-
- if (vdev->barmap[bar])
- return 0;
-
- ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
- if (ret)
- return ret;
-
- io = pci_iomap(pdev, bar, 0);
- if (!io) {
- pci_release_selected_regions(pdev, 1 << bar);
- return -ENOMEM;
- }
-
- vdev->barmap[bar] = io;
-
+ if (IS_ERR(vdev->barmap[bar]))
+ return PTR_ERR(vdev->barmap[bar]);
return 0;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
--
2.47.3
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v4 2/3] vfio/pci: Check BAR resources before exporting a DMABUF
2026-05-05 17:38 [PATCH v4 0/3] vfio/pci: Request resources and map BARs at enable time Matt Evans
2026-05-05 17:38 ` [PATCH v4 1/3] vfio/pci: Set up BAR resources and maps in vfio_pci_core_enable() Matt Evans
@ 2026-05-05 17:38 ` Matt Evans
2026-05-05 17:38 ` [PATCH v4 3/3] vfio/pci: Replace vfio_pci_core_setup_barmap() with vfio_pci_core_get_iomap() Matt Evans
2 siblings, 0 replies; 4+ messages in thread
From: Matt Evans @ 2026-05-05 17:38 UTC (permalink / raw)
To: Alex Williamson, Kevin Tian, Jason Gunthorpe, Ankit Agrawal,
Alistair Popple, Leon Romanovsky, Kees Cook, Shameer Kolothum,
Yishai Hadas
Cc: Alexey Kardashevskiy, Eric Auger, Peter Xu, Vivek Kasireddy,
Zhi Wang, kvm, linux-kernel, virtualization
A DMABUF exports access to BAR resources and, although they are
requested at startup time, we need to ensure they really were reserved
before exporting. Otherwise, it's possible to access unreserved
resources through the export.
Add a check to the DMABUF-creation path.
Fixes: 5d74781ebc86c ("vfio/pci: Add dma-buf export support for MMIO regions")
Signed-off-by: Matt Evans <mattev@meta.com>
---
drivers/vfio/pci/vfio_pci_dmabuf.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index f87fd32e4a01..69a5c2d511e6 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -244,9 +244,11 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
return -EINVAL;
/*
- * For PCI the region_index is the BAR number like everything else.
+ * For PCI the region_index is the BAR number like everything
+ * else. Check that PCI resources have been claimed for it.
*/
- if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX)
+ if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX ||
+ vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index))
return -ENODEV;
dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
--
2.47.3
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v4 3/3] vfio/pci: Replace vfio_pci_core_setup_barmap() with vfio_pci_core_get_iomap()
2026-05-05 17:38 [PATCH v4 0/3] vfio/pci: Request resources and map BARs at enable time Matt Evans
2026-05-05 17:38 ` [PATCH v4 1/3] vfio/pci: Set up BAR resources and maps in vfio_pci_core_enable() Matt Evans
2026-05-05 17:38 ` [PATCH v4 2/3] vfio/pci: Check BAR resources before exporting a DMABUF Matt Evans
@ 2026-05-05 17:38 ` Matt Evans
2 siblings, 0 replies; 4+ messages in thread
From: Matt Evans @ 2026-05-05 17:38 UTC (permalink / raw)
To: Alex Williamson, Kevin Tian, Jason Gunthorpe, Ankit Agrawal,
Alistair Popple, Leon Romanovsky, Kees Cook, Shameer Kolothum,
Yishai Hadas
Cc: Alexey Kardashevskiy, Eric Auger, Peter Xu, Vivek Kasireddy,
Zhi Wang, kvm, linux-kernel, virtualization
Since "vfio/pci: Set up barmap in vfio_pci_core_enable()", the
resource request and iomap for the BARs was performed early, and
vfio_pci_core_setup_barmap() just checks those actions succeeded.
Move this logic to a new helper that checks success and returns the
iomap address, replacing the various bare vdev->barmap[] lookups.
This maintains the error behaviour of the previous on-demand
vfio_pci_core_setup_barmap() scheme.
Signed-off-by: Matt Evans <mattev@meta.com>
---
drivers/vfio/pci/nvgrace-gpu/main.c | 11 ++++-------
drivers/vfio/pci/vfio_pci_core.c | 11 +++++------
drivers/vfio/pci/vfio_pci_dmabuf.c | 2 +-
drivers/vfio/pci/vfio_pci_rdwr.c | 30 ++++++++---------------------
drivers/vfio/pci/virtio/legacy_io.c | 13 ++++++-------
include/linux/vfio_pci_core.h | 20 ++++++++++++++++++-
6 files changed, 43 insertions(+), 44 deletions(-)
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index fa056b69f899..e153002258ce 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -184,13 +184,10 @@ static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
/*
* GPU readiness is checked by reading the BAR0 registers.
- *
- * ioremap BAR0 to ensure that the BAR0 mapping is present before
- * register reads on first fault before establishing any GPU
- * memory mapping.
+ * The BAR map was just set up by vfio_pci_core_enable(), so
+ * check that was successful and bail early if not:
*/
- ret = vfio_pci_core_setup_barmap(vdev, 0);
- if (ret)
+ if (IS_ERR(vfio_pci_core_get_iomap(vdev, 0)))
goto error_exit;
if (nvdev->resmem.memlength) {
@@ -275,7 +272,7 @@ nvgrace_gpu_check_device_ready(struct nvgrace_gpu_pci_core_device *nvdev)
if (!__vfio_pci_memory_enabled(vdev))
return -EIO;
- ret = nvgrace_gpu_wait_device_ready(vdev->barmap[0]);
+ ret = nvgrace_gpu_wait_device_ready(vfio_pci_core_get_iomap(vdev, 0));
if (ret)
return ret;
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 62931dc381d8..5c8bd13f10d0 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1761,7 +1761,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
struct pci_dev *pdev = vdev->pdev;
unsigned int index;
u64 phys_len, req_len, pgoff, req_start;
- int ret;
+ void __iomem *bar_io;
index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
@@ -1795,12 +1795,11 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
return -EINVAL;
/*
- * Even though we don't make use of the barmap for the mmap,
- * we need to request the region and the barmap tracks that.
+ * Ensure the BAR resource region is reserved for use.
*/
- ret = vfio_pci_core_setup_barmap(vdev, index);
- if (ret)
- return ret;
+ bar_io = vfio_pci_core_get_iomap(vdev, index);
+ if (IS_ERR(bar_io))
+ return PTR_ERR(bar_io);
vma->vm_private_data = vdev;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index 69a5c2d511e6..46cd44b22c9c 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -248,7 +248,7 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
* else. Check that PCI resources have been claimed for it.
*/
if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX ||
- vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index))
+ IS_ERR(vfio_pci_core_get_iomap(vdev, get_dma_buf.region_index)))
return -ENODEV;
dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 3bfbb879a005..7f14dd46de17 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -198,19 +198,6 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
-/*
- * The barmap is set up in vfio_pci_core_enable(). Callers use this
- * function to check that the BAR resources are requested or that the
- * pci_iomap() was done.
- */
-int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
-{
- if (IS_ERR(vdev->barmap[bar]))
- return PTR_ERR(vdev->barmap[bar]);
- return 0;
-}
-EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
-
ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
@@ -262,13 +249,11 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
*/
max_width = VFIO_PCI_IO_WIDTH_4;
} else {
- int ret = vfio_pci_core_setup_barmap(vdev, bar);
- if (ret) {
- done = ret;
+ io = vfio_pci_core_get_iomap(vdev, bar);
+ if (IS_ERR(io)) {
+ done = PTR_ERR(io);
goto out;
}
-
- io = vdev->barmap[bar];
}
if (bar == vdev->msix_bar) {
@@ -423,6 +408,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
struct vfio_pci_ioeventfd *ioeventfd;
+ void __iomem *io;
/* Only support ioeventfds into BARs */
if (bar > VFIO_PCI_BAR5_REGION_INDEX)
@@ -440,9 +426,9 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
if (count == 8)
return -EINVAL;
- ret = vfio_pci_core_setup_barmap(vdev, bar);
- if (ret)
- return ret;
+ io = vfio_pci_core_get_iomap(vdev, bar);
+ if (IS_ERR(io))
+ return PTR_ERR(io);
mutex_lock(&vdev->ioeventfds_lock);
@@ -479,7 +465,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
}
ioeventfd->vdev = vdev;
- ioeventfd->addr = vdev->barmap[bar] + pos;
+ ioeventfd->addr = io + pos;
ioeventfd->data = data;
ioeventfd->pos = pos;
ioeventfd->bar = bar;
diff --git a/drivers/vfio/pci/virtio/legacy_io.c b/drivers/vfio/pci/virtio/legacy_io.c
index 1ed349a55629..c868b2177310 100644
--- a/drivers/vfio/pci/virtio/legacy_io.c
+++ b/drivers/vfio/pci/virtio/legacy_io.c
@@ -299,19 +299,18 @@ int virtiovf_pci_ioctl_get_region_info(struct vfio_device *core_vdev,
static int virtiovf_set_notify_addr(struct virtiovf_pci_core_device *virtvdev)
{
struct vfio_pci_core_device *core_device = &virtvdev->core_device;
- int ret;
+ void __iomem *io;
/*
* Setup the BAR where the 'notify' exists to be used by vfio as well
* This will let us mmap it only once and use it when needed.
*/
- ret = vfio_pci_core_setup_barmap(core_device,
- virtvdev->notify_bar);
- if (ret)
- return ret;
+ io = vfio_pci_core_get_iomap(core_device,
+ virtvdev->notify_bar);
+ if (IS_ERR(io))
+ return PTR_ERR(io);
- virtvdev->notify_addr = core_device->barmap[virtvdev->notify_bar] +
- virtvdev->notify_offset;
+ virtvdev->notify_addr = io + virtvdev->notify_offset;
return 0;
}
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 2ebba746c18f..ffd67e25bf3f 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -188,7 +188,6 @@ int vfio_pci_core_match_token_uuid(struct vfio_device *core_vdev,
int vfio_pci_core_enable(struct vfio_pci_core_device *vdev);
void vfio_pci_core_disable(struct vfio_pci_core_device *vdev);
void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev);
-int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar);
pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
pci_channel_state_t state);
ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
@@ -234,6 +233,25 @@ static inline bool is_aligned_for_order(struct vm_area_struct *vma,
!IS_ALIGNED(pfn, 1 << order)));
}
+/*
+ * Returns a BAR's iomap base or an ERR_PTR() if, for example, the
+ * BAR isn't valid, its resource wasn't acquired, or its iomap
+ * failed. This shall only be used after vfio_pci_core_enable()
+ * has set up the BAR maps and before vfio_pci_core_disable()
+ * tears them down.
+ */
+static inline void __iomem __must_check *
+vfio_pci_core_get_iomap(struct vfio_pci_core_device *vdev, int bar)
+{
+ if (WARN_ON_ONCE(bar < 0 || bar >= PCI_STD_NUM_BARS))
+ return ERR_PTR(-EINVAL);
+
+ if (WARN_ON_ONCE(!vdev->barmap[bar]))
+ return ERR_PTR(-ENODEV);
+
+ return vdev->barmap[bar];
+}
+
int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
struct phys_vec *phys);
--
2.47.3
^ permalink raw reply related [flat|nested] 4+ messages in thread