From: Alex Williamson <alex@shazbot.org>
To: Matt Evans <mattev@meta.com>
Cc: "Leon Romanovsky" <leon@kernel.org>,
"Jason Gunthorpe" <jgg@nvidia.com>,
"Alex Mastro" <amastro@fb.com>,
"Christian König" <christian.koenig@amd.com>,
"Bjorn Helgaas" <bhelgaas@google.com>,
"Logan Gunthorpe" <logang@deltatee.com>,
"Mahmoud Adam" <mngyadam@amazon.de>,
"David Matlack" <dmatlack@google.com>,
"Björn Töpel" <bjorn@kernel.org>,
"Sumit Semwal" <sumit.semwal@linaro.org>,
"Kevin Tian" <kevin.tian@intel.com>,
"Ankit Agrawal" <ankita@nvidia.com>,
"Pranjal Shrivastava" <praan@google.com>,
"Alistair Popple" <apopple@nvidia.com>,
"Vivek Kasireddy" <vivek.kasireddy@intel.com>,
linux-kernel@vger.kernel.org, linux-media@vger.kernel.org,
dri-devel@lists.freedesktop.org, linaro-mm-sig@lists.linaro.org,
kvm@vger.kernel.org, linux-pci@vger.kernel.org, alex@shazbot.org
Subject: Re: [PATCH v2 3/9] vfio/pci: Add a helper to create a DMABUF for a BAR-map VMA
Date: Wed, 27 May 2026 16:59:22 -0600 [thread overview]
Message-ID: <20260527165922.60a79fee@shazbot.org> (raw)
In-Reply-To: <20260527102319.100128-4-mattev@meta.com>
On Wed, 27 May 2026 03:23:06 -0700
Matt Evans <mattev@meta.com> wrote:
> This helper, vfio_pci_core_mmap_prep_dmabuf(), creates a single-range
> DMABUF for the purpose of mapping a PCI BAR. This is used in a future
> commit by VFIO's ordinary mmap() path.
>
> This function transfers ownership of the VFIO device fd to the
> DMABUF, which fput()s when it's released.
>
> Refactor the existing vfio_pci_core_feature_dma_buf() to split out
> export code common to the two paths, VFIO_DEVICE_FEATURE_DMA_BUF and
> this new VFIO_BAR mmap().
>
> Signed-off-by: Matt Evans <mattev@meta.com>
> ---
> drivers/vfio/pci/vfio_pci_dmabuf.c | 140 ++++++++++++++++++++++-------
> drivers/vfio/pci/vfio_pci_priv.h | 5 ++
> 2 files changed, 115 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
> index 0d132c4ca95f..782408c08a5e 100644
> --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> @@ -82,6 +82,8 @@ static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf)
> up_write(&priv->vdev->memory_lock);
> vfio_device_put_registration(&priv->vdev->vdev);
> }
> + if (priv->vfile)
> + fput(priv->vfile);
> kfree(priv->phys_vec);
> kfree(priv);
> }
> @@ -222,6 +224,45 @@ int vfio_pci_dma_buf_find_pfn(struct vfio_pci_dma_buf *vpdmabuf,
> return -EFAULT;
> }
>
> +/*
> + * Create a DMABUF corresponding to priv, add it to vdev->dmabufs list
> + * for tracking (meaning cleanup or revocation will zap it), and take
> + * a vfio_device registration.
> + */
> +static int vfio_pci_dmabuf_export(struct vfio_pci_core_device *vdev,
> + struct vfio_pci_dma_buf *priv, uint32_t flags)
s/uint32_t/u32/?
> +{
> + DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> +
> + if (!vfio_device_try_get_registration(&vdev->vdev))
> + return -ENODEV;
> +
> + exp_info.ops = &vfio_pci_dmabuf_ops;
> + exp_info.size = priv->size;
> + exp_info.flags = flags;
> + exp_info.priv = priv;
> +
> + priv->dmabuf = dma_buf_export(&exp_info);
> + if (IS_ERR(priv->dmabuf)) {
> + vfio_device_put_registration(&vdev->vdev);
> + return PTR_ERR(priv->dmabuf);
> + }
> +
> + kref_init(&priv->kref);
> + init_completion(&priv->comp);
> +
> + /* dma_buf_put() now frees priv */
> + INIT_LIST_HEAD(&priv->dmabufs_elm);
> + down_write(&vdev->memory_lock);
> + dma_resv_lock(priv->dmabuf->resv, NULL);
> + priv->revoked = !__vfio_pci_memory_enabled(vdev);
> + list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs);
> + dma_resv_unlock(priv->dmabuf->resv);
> + up_write(&vdev->memory_lock);
> +
> + return 0;
> +}
> +
> /*
> * This is a temporary "private interconnect" between VFIO DMABUF and iommufd.
> * It allows the two co-operating drivers to exchange the physical address of
> @@ -340,7 +381,6 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
> {
> struct vfio_device_feature_dma_buf get_dma_buf = {};
> struct vfio_region_dma_range *dma_ranges;
> - DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> struct vfio_pci_dma_buf *priv;
> size_t length;
> int ret;
> @@ -400,34 +440,9 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
> kfree(dma_ranges);
> dma_ranges = NULL;
>
> - if (!vfio_device_try_get_registration(&vdev->vdev)) {
> - ret = -ENODEV;
> + ret = vfio_pci_dmabuf_export(vdev, priv, get_dma_buf.open_flags);
> + if (ret)
> goto err_free_phys;
> - }
> -
> - exp_info.ops = &vfio_pci_dmabuf_ops;
> - exp_info.size = priv->size;
> - exp_info.flags = get_dma_buf.open_flags;
> - exp_info.priv = priv;
> -
> - priv->dmabuf = dma_buf_export(&exp_info);
> - if (IS_ERR(priv->dmabuf)) {
> - ret = PTR_ERR(priv->dmabuf);
> - goto err_dev_put;
> - }
> -
> - kref_init(&priv->kref);
> - init_completion(&priv->comp);
> -
> - /* dma_buf_put() now frees priv */
> - INIT_LIST_HEAD(&priv->dmabufs_elm);
> - down_write(&vdev->memory_lock);
> - dma_resv_lock(priv->dmabuf->resv, NULL);
> - priv->revoked = !__vfio_pci_memory_enabled(vdev);
> - list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs);
> - dma_resv_unlock(priv->dmabuf->resv);
> - up_write(&vdev->memory_lock);
> -
> /*
> * dma_buf_fd() consumes the reference, when the file closes the dmabuf
> * will be released.
> @@ -438,8 +453,6 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
>
> return ret;
>
> -err_dev_put:
> - vfio_device_put_registration(&vdev->vdev);
> err_free_phys:
> kfree(priv->phys_vec);
> err_free_priv:
> @@ -449,6 +462,73 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
> return ret;
> }
>
> +int vfio_pci_core_mmap_prep_dmabuf(struct vfio_pci_core_device *vdev,
> + struct vm_area_struct *vma,
> + u64 phys_start, u64 req_len,
> + unsigned int res_index)
> +{
> + struct vfio_pci_dma_buf *priv;
> + const unsigned int nr_ranges = 1;
Why, versus priv->nr_ranges = 1; below? Thanks,
Alex
> + unsigned long vma_pgoff = vma->vm_pgoff & (VFIO_PCI_OFFSET_MASK >> PAGE_SHIFT);
> + int ret;
> +
> + priv = kzalloc_obj(*priv);
> + if (!priv)
> + return -ENOMEM;
> +
> + priv->phys_vec = kzalloc_obj(*priv->phys_vec);
> + if (!priv->phys_vec) {
> + ret = -ENOMEM;
> + goto err_free_priv;
> + }
> +
> + /*
> + * The DMABUF begins from the mmap()'s BAR offset, i.e. the
> + * start of the VMA corresponds to byte 0 of the DMABUF and
> + * byte (vma_pgoff << PAGE_SHIFT) of the BAR.
> + *
> + * vfio_pci_dma_buf_find_pfn() reverses this offset using
> + * vma_pgoff_adjust, so that ultimately a fault's offset from
> + * the start of the _VMA_ has a consistent usage whether the
> + * VMA originates from an mmap() of the VFIO device here or a
> + * direct DMABUF mmap().
> + */
> + priv->vdev = vdev;
> + priv->size = req_len;
> + priv->nr_ranges = nr_ranges;
> + priv->vma_pgoff_adjust = vma_pgoff;
> + priv->provider = pcim_p2pdma_provider(vdev->pdev, res_index);
> + if (!priv->provider) {
> + ret = -EINVAL;
> + goto err_free_phys;
> + }
> +
> + priv->phys_vec[0].paddr = phys_start + ((u64)vma_pgoff << PAGE_SHIFT);
> + priv->phys_vec[0].len = priv->size;
> +
> + ret = vfio_pci_dmabuf_export(vdev, priv, O_CLOEXEC | O_RDWR);
> + if (ret)
> + goto err_free_phys;
> +
> + /*
> + * The VMA gets the DMABUF file so that other users can locate
> + * the DMABUF via a VA. Ownership of the original VFIO device
> + * file being mmap()ed transfers to priv, and is put when the
> + * DMABUF is released.
> + */
> + priv->vfile = vma->vm_file;
> + vma->vm_file = priv->dmabuf->file;
> + vma->vm_private_data = priv;
> +
> + return 0;
> +
> +err_free_phys:
> + kfree(priv->phys_vec);
> +err_free_priv:
> + kfree(priv);
> + return ret;
> +}
> +
> void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
> {
> struct vfio_pci_dma_buf *priv;
> diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
> index c8f6f959056a..06dc0fd3e230 100644
> --- a/drivers/vfio/pci/vfio_pci_priv.h
> +++ b/drivers/vfio/pci/vfio_pci_priv.h
> @@ -30,6 +30,7 @@ struct vfio_pci_dma_buf {
> size_t size;
> struct phys_vec *phys_vec;
> struct p2pdma_provider *provider;
> + struct file *vfile;
> u32 nr_ranges;
> struct kref kref;
> struct completion comp;
> @@ -133,6 +134,10 @@ int vfio_pci_dma_buf_find_pfn(struct vfio_pci_dma_buf *vpdmabuf,
> unsigned long address,
> unsigned int order,
> unsigned long *out_pfn);
> +int vfio_pci_core_mmap_prep_dmabuf(struct vfio_pci_core_device *vdev,
> + struct vm_area_struct *vma,
> + u64 phys_start, u64 req_len,
> + unsigned int res_index);
>
> #ifdef CONFIG_VFIO_PCI_DMABUF
> int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
next prev parent reply other threads:[~2026-05-27 22:59 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-27 10:23 [PATCH v2 0/9] vfio/pci: Add mmap() for DMABUFs Matt Evans
2026-05-27 10:23 ` [PATCH v2 1/9] PCI/P2PDMA: Add CONFIG_PCI_P2PDMA_CORE Matt Evans
2026-05-27 16:07 ` Logan Gunthorpe
2026-05-27 17:13 ` Matt Evans
2026-05-27 21:09 ` Alex Williamson
2026-05-29 23:05 ` Jason Gunthorpe
2026-06-09 22:45 ` Bjorn Helgaas
2026-06-10 15:27 ` Pranjal Shrivastava
2026-06-10 16:00 ` Matt Evans
2026-05-27 10:23 ` [PATCH v2 2/9] vfio/pci: Add a helper to look up PFNs for DMABUFs Matt Evans
2026-05-27 22:38 ` Alex Williamson
2026-06-02 16:37 ` Matt Evans
2026-05-27 10:23 ` [PATCH v2 3/9] vfio/pci: Add a helper to create a DMABUF for a BAR-map VMA Matt Evans
2026-05-27 22:59 ` Alex Williamson [this message]
2026-06-02 16:39 ` Matt Evans
2026-05-27 10:23 ` [PATCH v2 4/9] vfio/pci: Convert BAR mmap() to use a DMABUF Matt Evans
2026-05-28 23:15 ` Alex Williamson
2026-06-02 18:01 ` Matt Evans
2026-05-27 10:23 ` [PATCH v2 5/9] vfio/pci: Provide a user-facing name for BAR mappings Matt Evans
2026-05-27 10:23 ` [PATCH v2 6/9] vfio/pci: Clean up BAR zap and revocation Matt Evans
2026-05-28 23:15 ` Alex Williamson
2026-05-27 10:23 ` [PATCH v2 7/9] vfio/pci: Support mmap() of a VFIO DMABUF Matt Evans
2026-05-28 23:15 ` Alex Williamson
2026-06-02 17:35 ` Matt Evans
2026-06-02 19:03 ` Alex Williamson
2026-05-27 10:23 ` [PATCH v2 8/9] vfio/pci: Permanently revoke a DMABUF on request Matt Evans
2026-05-28 23:14 ` Alex Williamson
2026-06-02 17:02 ` Matt Evans
2026-05-27 10:23 ` [PATCH v2 9/9] vfio/pci: Add mmap() attributes to DMABUF feature Matt Evans
2026-05-28 23:14 ` Alex Williamson
2026-06-02 16:50 ` Matt Evans
2026-06-02 19:14 ` Alex Williamson
2026-06-03 14:22 ` Matt Evans
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260527165922.60a79fee@shazbot.org \
--to=alex@shazbot.org \
--cc=amastro@fb.com \
--cc=ankita@nvidia.com \
--cc=apopple@nvidia.com \
--cc=bhelgaas@google.com \
--cc=bjorn@kernel.org \
--cc=christian.koenig@amd.com \
--cc=dmatlack@google.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=jgg@nvidia.com \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=leon@kernel.org \
--cc=linaro-mm-sig@lists.linaro.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-media@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=logang@deltatee.com \
--cc=mattev@meta.com \
--cc=mngyadam@amazon.de \
--cc=praan@google.com \
--cc=sumit.semwal@linaro.org \
--cc=vivek.kasireddy@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.