All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matt Evans <mattev@meta.com>
To: "Alex Williamson" <alex@shazbot.org>,
	"Leon Romanovsky" <leon@kernel.org>,
	"Jason Gunthorpe" <jgg@nvidia.com>,
	"Alex Mastro" <amastro@fb.com>,
	"Christian König" <christian.koenig@amd.com>
Cc: "Mahmoud Adam" <mngyadam@amazon.de>,
	"David Matlack" <dmatlack@google.com>,
	"Björn Töpel" <bjorn@kernel.org>,
	"Sumit Semwal" <sumit.semwal@linaro.org>,
	"Kevin Tian" <kevin.tian@intel.com>,
	"Ankit Agrawal" <ankita@nvidia.com>,
	"Pranjal Shrivastava" <praan@google.com>,
	"Alistair Popple" <apopple@nvidia.com>,
	"Vivek Kasireddy" <vivek.kasireddy@intel.com>,
	linux-kernel@vger.kernel.org, linux-media@vger.kernel.org,
	dri-devel@lists.freedesktop.org, linaro-mm-sig@lists.linaro.org,
	kvm@vger.kernel.org
Subject: [PATCH 3/9] vfio/pci: Add a helper to create a DMABUF for a BAR-map VMA
Date: Thu, 16 Apr 2026 06:17:46 -0700	[thread overview]
Message-ID: <20260416131815.2729131-4-mattev@meta.com> (raw)
In-Reply-To: <20260416131815.2729131-1-mattev@meta.com>

This helper, vfio_pci_core_mmap_prep_dmabuf(), creates a single-range
DMABUF for the purpose of mapping a PCI BAR.  This is used in a future
commit by VFIO's ordinary mmap() path.

This function transfers ownership of the VFIO device fd to the
DMABUF, which fput()s when it's released.

Refactor the existing vfio_pci_core_feature_dma_buf() to split out
export code common to the two paths, VFIO_DEVICE_FEATURE_DMA_BUF and
this new VFIO_BAR mmap().

Signed-off-by: Matt Evans <mattev@meta.com>
---
 drivers/vfio/pci/vfio_pci_dmabuf.c | 143 +++++++++++++++++++++++------
 drivers/vfio/pci/vfio_pci_priv.h   |   5 +
 2 files changed, 118 insertions(+), 30 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index 8b6bae56bbf2..3554afbc8ebc 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -82,6 +82,8 @@ static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf)
 		up_write(&priv->vdev->memory_lock);
 		vfio_device_put_registration(&priv->vdev->vdev);
 	}
+	if (priv->vfile)
+		fput(priv->vfile);
 	kfree(priv->phys_vec);
 	kfree(priv);
 }
@@ -204,6 +206,45 @@ int vfio_pci_dma_buf_find_pfn(struct vfio_pci_dma_buf *vpdmabuf,
 	return -EFAULT;
 }
 
+/*
+ * Create a DMABUF corresponding to priv, add it to vdev->dmabufs list
+ * for tracking (meaning cleanup or revocation will zap it), and take
+ * a vfio_device registration.
+ */
+static int vfio_pci_dmabuf_export(struct vfio_pci_core_device *vdev,
+				  struct vfio_pci_dma_buf *priv, uint32_t flags)
+{
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+	if (!vfio_device_try_get_registration(&vdev->vdev))
+		return -ENODEV;
+
+	exp_info.ops = &vfio_pci_dmabuf_ops;
+	exp_info.size = priv->size;
+	exp_info.flags = flags;
+	exp_info.priv = priv;
+
+	priv->dmabuf = dma_buf_export(&exp_info);
+	if (IS_ERR(priv->dmabuf)) {
+		vfio_device_put_registration(&vdev->vdev);
+		return PTR_ERR(priv->dmabuf);
+	}
+
+	kref_init(&priv->kref);
+	init_completion(&priv->comp);
+
+	/* dma_buf_put() now frees priv */
+	INIT_LIST_HEAD(&priv->dmabufs_elm);
+	down_write(&vdev->memory_lock);
+	dma_resv_lock(priv->dmabuf->resv, NULL);
+	priv->revoked = !__vfio_pci_memory_enabled(vdev);
+	list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs);
+	dma_resv_unlock(priv->dmabuf->resv);
+	up_write(&vdev->memory_lock);
+
+	return 0;
+}
+
 /*
  * This is a temporary "private interconnect" between VFIO DMABUF and iommufd.
  * It allows the two co-operating drivers to exchange the physical address of
@@ -322,7 +363,6 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 {
 	struct vfio_device_feature_dma_buf get_dma_buf = {};
 	struct vfio_region_dma_range *dma_ranges;
-	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
 	struct vfio_pci_dma_buf *priv;
 	size_t length;
 	int ret;
@@ -392,34 +432,9 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 	kfree(dma_ranges);
 	dma_ranges = NULL;
 
-	if (!vfio_device_try_get_registration(&vdev->vdev)) {
-		ret = -ENODEV;
+	ret = vfio_pci_dmabuf_export(vdev, priv, get_dma_buf.open_flags);
+	if (ret)
 		goto err_free_phys;
-	}
-
-	exp_info.ops = &vfio_pci_dmabuf_ops;
-	exp_info.size = priv->size;
-	exp_info.flags = get_dma_buf.open_flags;
-	exp_info.priv = priv;
-
-	priv->dmabuf = dma_buf_export(&exp_info);
-	if (IS_ERR(priv->dmabuf)) {
-		ret = PTR_ERR(priv->dmabuf);
-		goto err_dev_put;
-	}
-
-	kref_init(&priv->kref);
-	init_completion(&priv->comp);
-
-	/* dma_buf_put() now frees priv */
-	INIT_LIST_HEAD(&priv->dmabufs_elm);
-	down_write(&vdev->memory_lock);
-	dma_resv_lock(priv->dmabuf->resv, NULL);
-	priv->revoked = !__vfio_pci_memory_enabled(vdev);
-	list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs);
-	dma_resv_unlock(priv->dmabuf->resv);
-	up_write(&vdev->memory_lock);
-
 	/*
 	 * dma_buf_fd() consumes the reference, when the file closes the dmabuf
 	 * will be released.
@@ -430,8 +445,6 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 
 	return ret;
 
-err_dev_put:
-	vfio_device_put_registration(&vdev->vdev);
 err_free_phys:
 	kfree(priv->phys_vec);
 err_free_priv:
@@ -441,6 +454,76 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 	return ret;
 }
 
+int vfio_pci_core_mmap_prep_dmabuf(struct vfio_pci_core_device *vdev,
+				   struct vm_area_struct *vma,
+				   u64 phys_start, u64 req_len,
+				   unsigned int res_index)
+{
+	struct vfio_pci_dma_buf *priv;
+	const unsigned int nr_ranges = 1;
+	int ret;
+
+	priv = kzalloc_obj(*priv);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->phys_vec = kzalloc_obj(*priv->phys_vec);
+	if (!priv->phys_vec) {
+		ret = -ENOMEM;
+		goto err_free_priv;
+	}
+
+	/*
+	 * The mmap() request's vma->vm_offs might be non-zero, but
+	 * the DMABUF is created from _offset zero_ of the BAR.  The
+	 * portion between zero and the vm_offs is inaccessible
+	 * through this VMA, but this approach keeps the
+	 * /proc/<pid>/maps offset somewhat consistent with the
+	 * pre-DMABUF code.  Size includes the offset portion.
+	 *
+	 * This differs from an mmap() of an explicitly-exported
+	 * DMABUF which is an arbitrary slice of the BAR, would be
+	 * created with the desired offset+size, and would usually be
+	 * mmap()ed with pgoff = 0.
+	 *
+	 * Both are equivalent and vfio_pci_dma_buf_find_pfn() finds
+	 * the same PFNs.
+	 */
+	priv->vdev = vdev;
+	priv->nr_ranges = nr_ranges;
+	priv->size = (vma->vm_pgoff << PAGE_SHIFT) + req_len;
+	priv->provider = pcim_p2pdma_provider(vdev->pdev, res_index);
+	if (!priv->provider) {
+		ret = -EINVAL;
+		goto err_free_phys;
+	}
+
+	priv->phys_vec[0].paddr = phys_start;
+	priv->phys_vec[0].len = priv->size;
+
+	ret = vfio_pci_dmabuf_export(vdev, priv, O_CLOEXEC | O_RDWR);
+	if (ret)
+		goto err_free_phys;
+
+	/*
+	 * The VMA gets the DMABUF file so that other users can locate
+	 * the DMABUF via a VA.  Ownership of the original VFIO device
+	 * file being mmap()ed transfers to priv, and is put when the
+	 * DMABUF is released.
+	 */
+	priv->vfile = vma->vm_file;
+	vma->vm_file = priv->dmabuf->file;
+	vma->vm_private_data = priv;
+
+	return 0;
+
+err_free_phys:
+	kfree(priv->phys_vec);
+err_free_priv:
+	kfree(priv);
+	return ret;
+}
+
 void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
 {
 	struct vfio_pci_dma_buf *priv;
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index 317170a5b407..3cff1b7eb47b 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -30,6 +30,7 @@ struct vfio_pci_dma_buf {
 	size_t size;
 	struct phys_vec *phys_vec;
 	struct p2pdma_provider *provider;
+	struct file *vfile;
 	u32 nr_ranges;
 	struct kref kref;
 	struct completion comp;
@@ -132,6 +133,10 @@ int vfio_pci_dma_buf_find_pfn(struct vfio_pci_dma_buf *vpdmabuf,
 			      unsigned long address,
 			      unsigned int order,
 			      unsigned long *out_pfn);
+int vfio_pci_core_mmap_prep_dmabuf(struct vfio_pci_core_device *vdev,
+				   struct vm_area_struct *vma,
+				   u64 phys_start, u64 req_len,
+				   unsigned int res_index);
 
 #ifdef CONFIG_VFIO_PCI_DMABUF
 int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
-- 
2.47.3


  parent reply	other threads:[~2026-04-17  7:06 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-16 13:17 [PATCH 0/9] vfio/pci: Add mmap() for DMABUFs Matt Evans
2026-04-16 13:17 ` [PATCH 1/9] vfio/pci: Fix vfio_pci_dma_buf_cleanup() double-put Matt Evans
2026-04-24 18:05   ` Jason Gunthorpe
2026-05-01 19:12   ` Alex Williamson
2026-05-06 13:53     ` Matt Evans
2026-05-06 15:29       ` Leon Romanovsky
2026-05-06 15:55         ` Matt Evans
2026-05-06 16:14           ` Leon Romanovsky
2026-05-06 16:42             ` Matt Evans
2026-04-16 13:17 ` [PATCH 2/9] vfio/pci: Add a helper to look up PFNs for DMABUFs Matt Evans
2026-04-24 18:15   ` Jason Gunthorpe
2026-05-07 15:48     ` Matt Evans
2026-04-16 13:17 ` Matt Evans [this message]
2026-04-24 18:24   ` [PATCH 3/9] vfio/pci: Add a helper to create a DMABUF for a BAR-map VMA Jason Gunthorpe
2026-04-30 16:47     ` Matt Evans
2026-04-30 17:11       ` Jason Gunthorpe
2026-05-05 18:13         ` Matt Evans
2026-05-06 19:03           ` Matt Evans
2026-04-16 13:17 ` [PATCH 4/9] vfio/pci: Convert BAR mmap() to use a DMABUF Matt Evans
2026-05-01 22:19   ` Alex Williamson
2026-05-04  7:40     ` Jason Gunthorpe
2026-05-05 10:49       ` Leon Romanovsky
2026-05-05 14:50         ` Alex Williamson
2026-05-05 14:59           ` Jason Gunthorpe
2026-05-06  5:35           ` Leon Romanovsky
2026-05-14 17:52             ` Matt Evans
2026-04-16 13:17 ` [PATCH 5/9] vfio/pci: Provide a user-facing name for BAR mappings Matt Evans
2026-04-24 18:26   ` Jason Gunthorpe
2026-05-01 22:44   ` Alex Williamson
2026-05-07 16:56     ` Matt Evans
2026-05-07 17:17       ` Matt Evans
2026-04-16 13:17 ` [PATCH 6/9] vfio/pci: Clean up BAR zap and revocation Matt Evans
2026-05-01 23:19   ` Alex Williamson
2026-05-05 10:58     ` Leon Romanovsky
2026-04-16 13:17 ` [PATCH 7/9] vfio/pci: Support mmap() of a VFIO DMABUF Matt Evans
2026-04-24 18:30   ` Jason Gunthorpe
2026-05-07 16:09     ` Matt Evans
2026-04-16 13:17 ` [PATCH 8/9] vfio/pci: Permanently revoke a DMABUF on request Matt Evans
2026-04-16 13:17 ` [PATCH 9/9] vfio/pci: Add mmap() attributes to DMABUF feature Matt Evans
2026-04-24 18:31   ` Jason Gunthorpe
2026-04-26 10:52     ` Leon Romanovsky
2026-04-27 14:36       ` Alex Williamson
2026-05-11 15:30         ` Matt Evans
2026-05-11 17:51           ` Leon Romanovsky
2026-05-11 20:09           ` Alex Williamson
2026-05-12 17:51             ` Matt Evans
2026-05-13 18:27               ` Alex Williamson
2026-05-14 13:55                 ` Matt Evans

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260416131815.2729131-4-mattev@meta.com \
    --to=mattev@meta.com \
    --cc=alex@shazbot.org \
    --cc=amastro@fb.com \
    --cc=ankita@nvidia.com \
    --cc=apopple@nvidia.com \
    --cc=bjorn@kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=dmatlack@google.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=jgg@nvidia.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=leon@kernel.org \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=mngyadam@amazon.de \
    --cc=praan@google.com \
    --cc=sumit.semwal@linaro.org \
    --cc=vivek.kasireddy@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.