All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex@shazbot.org>
To: Leon Romanovsky <leon@kernel.org>
Cc: "Alex Williamson" <alex.williamson@redhat.com>,
	"Leon Romanovsky" <leonro@nvidia.com>,
	"Jason Gunthorpe" <jgg@nvidia.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Bjorn Helgaas" <bhelgaas@google.com>,
	"Christian König" <christian.koenig@amd.com>,
	dri-devel@lists.freedesktop.org, iommu@lists.linux.dev,
	"Jens Axboe" <axboe@kernel.dk>, "Joerg Roedel" <joro@8bytes.org>,
	kvm@vger.kernel.org, linaro-mm-sig@lists.linaro.org,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-media@vger.kernel.org, linux-mm@kvack.org,
	linux-pci@vger.kernel.org,
	"Logan Gunthorpe" <logang@deltatee.com>,
	"Marek Szyprowski" <m.szyprowski@samsung.com>,
	"Robin Murphy" <robin.murphy@arm.com>,
	"Sumit Semwal" <sumit.semwal@linaro.org>,
	"Vivek Kasireddy" <vivek.kasireddy@intel.com>,
	"Will Deacon" <will@kernel.org>
Subject: Re: [PATCH v5 9/9] vfio/pci: Add dma-buf export support for MMIO regions
Date: Thu, 30 Oct 2025 14:38:36 -0600	[thread overview]
Message-ID: <20251030143836.66cdf116@shazbot.org> (raw)
In-Reply-To: <72ecaa13864ca346797e342d23a7929562788148.1760368250.git.leon@kernel.org>

On Mon, 13 Oct 2025 18:26:11 +0300
Leon Romanovsky <leon@kernel.org> wrote:
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index fe247d0e2831..56b1320238a9 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1511,6 +1520,19 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
>  		return vfio_pci_core_pm_exit(vdev, flags, arg, argsz);
>  	case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
>  		return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
> +	case VFIO_DEVICE_FEATURE_DMA_BUF:
> +		if (device->ops->ioctl != vfio_pci_core_ioctl)
> +			/*
> +			 * Devices that overwrite general .ioctl() callback
> +			 * usually do it to implement their own
> +			 * VFIO_DEVICE_GET_REGION_INFO handlerm and they present

Typo, "handlerm"

> +			 * different BAR information from the real PCI.
> +			 *
> +			 * DMABUF relies on real PCI information.
> +			 */
> +			return -EOPNOTSUPP;
> +
> +		return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
>  	default:
>  		return -ENOTTY;
>  	}
...
> @@ -2459,6 +2482,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>  			break;
>  		}
>  
> +		vfio_pci_dma_buf_move(vdev, true);
>  		vfio_pci_zap_bars(vdev);
>  	}
>  
> @@ -2482,6 +2506,10 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>  
>  	ret = pci_reset_bus(pdev);
>  
> +	list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
> +		if (__vfio_pci_memory_enabled(vdev))
> +			vfio_pci_dma_buf_move(vdev, false);
> +
>  	vdev = list_last_entry(&dev_set->device_list,
>  			       struct vfio_pci_core_device, vdev.dev_set_list);
>  

This needs to be placed in the existing undo loop with the up_write(),
otherwise it can be missed in the error case.

> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
> new file mode 100644
> index 000000000000..eaba010777f3
> --- /dev/null
> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> +static unsigned int calc_sg_nents(struct vfio_pci_dma_buf *priv,
> +				  struct dma_iova_state *state)
> +{
> +	struct phys_vec *phys_vec = priv->phys_vec;
> +	unsigned int nents = 0;
> +	u32 i;
> +
> +	if (!state || !dma_use_iova(state))
> +		for (i = 0; i < priv->nr_ranges; i++)
> +			nents += DIV_ROUND_UP(phys_vec[i].len, UINT_MAX);
> +	else
> +		/*
> +		 * In IOVA case, there is only one SG entry which spans
> +		 * for whole IOVA address space, but we need to make sure
> +		 * that it fits sg->length, maybe we need more.
> +		 */
> +		nents = DIV_ROUND_UP(priv->size, UINT_MAX);

I think we're arguably running afoul of the coding style standard here
that this is not a single simple statement and should use braces.

> +
> +	return nents;
> +}
> +
> +static struct sg_table *
> +vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
> +		     enum dma_data_direction dir)
> +{
> +	struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> +	struct dma_iova_state *state = attachment->priv;
> +	struct phys_vec *phys_vec = priv->phys_vec;
> +	unsigned long attrs = DMA_ATTR_MMIO;
> +	unsigned int nents, mapped_len = 0;
> +	struct scatterlist *sgl;
> +	struct sg_table *sgt;
> +	dma_addr_t addr;
> +	int ret;
> +	u32 i;
> +
> +	dma_resv_assert_held(priv->dmabuf->resv);
> +
> +	if (priv->revoked)
> +		return ERR_PTR(-ENODEV);
> +
> +	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
> +	if (!sgt)
> +		return ERR_PTR(-ENOMEM);
> +
> +	nents = calc_sg_nents(priv, state);
> +	ret = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
> +	if (ret)
> +		goto err_kfree_sgt;
> +
> +	sgl = sgt->sgl;
> +
> +	for (i = 0; i < priv->nr_ranges; i++) {
> +		if (!state) {
> +			addr = pci_p2pdma_bus_addr_map(priv->provider,
> +						       phys_vec[i].paddr);
> +		} else if (dma_use_iova(state)) {
> +			ret = dma_iova_link(attachment->dev, state,
> +					    phys_vec[i].paddr, 0,
> +					    phys_vec[i].len, dir, attrs);
> +			if (ret)
> +				goto err_unmap_dma;
> +
> +			mapped_len += phys_vec[i].len;
> +		} else {
> +			addr = dma_map_phys(attachment->dev, phys_vec[i].paddr,
> +					    phys_vec[i].len, dir, attrs);
> +			ret = dma_mapping_error(attachment->dev, addr);
> +			if (ret)
> +				goto err_unmap_dma;
> +		}
> +
> +		if (!state || !dma_use_iova(state))
> +			sgl = fill_sg_entry(sgl, phys_vec[i].len, addr);
> +	}
> +
> +	if (state && dma_use_iova(state)) {
> +		WARN_ON_ONCE(mapped_len != priv->size);
> +		ret = dma_iova_sync(attachment->dev, state, 0, mapped_len);
> +		if (ret)
> +			goto err_unmap_dma;
> +		sgl = fill_sg_entry(sgl, mapped_len, state->addr);
> +	}
> +
> +	/*
> +	 * SGL must be NULL to indicate that SGL is the last one
> +	 * and we allocated correct number of entries in sg_alloc_table()
> +	 */
> +	WARN_ON_ONCE(sgl);
> +	return sgt;
> +
> +err_unmap_dma:
> +	if (!i || !state)
> +		; /* Do nothing */
> +	else if (dma_use_iova(state))
> +		dma_iova_destroy(attachment->dev, state, mapped_len, dir,
> +				 attrs);
> +	else
> +		for_each_sgtable_dma_sg(sgt, sgl, i)
> +			dma_unmap_phys(attachment->dev, sg_dma_address(sgl),
> +					sg_dma_len(sgl), dir, attrs);

Same, here for braces.

> +	sg_free_table(sgt);
> +err_kfree_sgt:
> +	kfree(sgt);
> +	return ERR_PTR(ret);
> +}
> +
> +static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
> +				   struct sg_table *sgt,
> +				   enum dma_data_direction dir)
> +{
> +	struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> +	struct dma_iova_state *state = attachment->priv;
> +	unsigned long attrs = DMA_ATTR_MMIO;
> +	struct scatterlist *sgl;
> +	int i;
> +
> +	if (!state)
> +		; /* Do nothing */
> +	else if (dma_use_iova(state))
> +		dma_iova_destroy(attachment->dev, state, priv->size, dir,
> +				 attrs);
> +	else
> +		for_each_sgtable_dma_sg(sgt, sgl, i)
> +			dma_unmap_phys(attachment->dev, sg_dma_address(sgl),
> +				       sg_dma_len(sgl), dir, attrs);
> +

Here too.

> +	sg_free_table(sgt);
> +	kfree(sgt);
> +}
...
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 75100bf009ba..63214467c875 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -1478,6 +1478,31 @@ struct vfio_device_feature_bus_master {
>  };
>  #define VFIO_DEVICE_FEATURE_BUS_MASTER 10
>  
> +/**
> + * Upon VFIO_DEVICE_FEATURE_GET create a dma_buf fd for the
> + * regions selected.
> + *
> + * open_flags are the typical flags passed to open(2), eg O_RDWR, O_CLOEXEC,
> + * etc. offset/length specify a slice of the region to create the dmabuf from.
> + * nr_ranges is the total number of (P2P DMA) ranges that comprise the dmabuf.
> + *

Probably worth noting that .flags should be zero, I see we enforce
that.  Thanks,

Alex

> + * Return: The fd number on success, -1 and errno is set on failure.
> + */
> +#define VFIO_DEVICE_FEATURE_DMA_BUF 11
> +
> +struct vfio_region_dma_range {
> +	__u64 offset;
> +	__u64 length;
> +};
> +
> +struct vfio_device_feature_dma_buf {
> +	__u32	region_index;
> +	__u32	open_flags;
> +	__u32   flags;
> +	__u32   nr_ranges;
> +	struct vfio_region_dma_range dma_ranges[];
> +};
> +
>  /* -------- API for Type1 VFIO IOMMU -------- */
>  
>  /**


  parent reply	other threads:[~2025-10-30 20:38 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-13 15:26 [PATCH v5 0/9] vfio/pci: Allow MMIO regions to be exported through dma-buf Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 1/9] PCI/P2PDMA: Separate the mmap() support from the core logic Leon Romanovsky
2025-10-17  6:30   ` Christoph Hellwig
2025-10-17 11:53     ` Jason Gunthorpe
2025-10-20 12:27       ` Christoph Hellwig
2025-10-20 12:58         ` Jason Gunthorpe
2025-10-20 15:04           ` Leon Romanovsky
2025-10-22  7:10           ` Christoph Hellwig
2025-10-22 11:43             ` Jason Gunthorpe
2025-10-13 15:26 ` [PATCH v5 2/9] PCI/P2PDMA: Simplify bus address mapping API Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 3/9] PCI/P2PDMA: Refactor to separate core P2P functionality from memory allocation Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 4/9] PCI/P2PDMA: Export pci_p2pdma_map_type() function Leon Romanovsky
2025-10-17  6:31   ` Christoph Hellwig
2025-10-17 12:14     ` Jason Gunthorpe
2025-10-20 12:29       ` Christoph Hellwig
2025-10-20 13:14         ` Jason Gunthorpe
2025-10-13 15:26 ` [PATCH v5 5/9] types: move phys_vec definition to common header Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 6/9] vfio: Export vfio device get and put registration helpers Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 7/9] vfio/pci: Share the core device pointer while invoking feature functions Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 8/9] vfio/pci: Enable peer-to-peer DMA transactions by default Leon Romanovsky
2025-10-16  4:09   ` Nicolin Chen
2025-10-16  6:10     ` Leon Romanovsky
2025-10-17  6:32   ` Christoph Hellwig
2025-10-17 11:55     ` Jason Gunthorpe
2025-10-20 12:28       ` Christoph Hellwig
2025-10-20 13:08         ` Jason Gunthorpe
2025-10-22  7:08           ` Christoph Hellwig
2025-10-22 11:38             ` Jason Gunthorpe
2025-10-22 11:54   ` Jason Gunthorpe
2025-10-13 15:26 ` [PATCH v5 9/9] vfio/pci: Add dma-buf export support for MMIO regions Leon Romanovsky
2025-10-16 23:53   ` Jason Gunthorpe
2025-10-17  5:40     ` Leon Romanovsky
2025-10-17 15:58       ` Jason Gunthorpe
2025-10-17 16:01         ` Jason Gunthorpe
2025-10-17  0:01   ` Jason Gunthorpe
2025-10-17  6:33   ` Christoph Hellwig
2025-10-17 12:16     ` Jason Gunthorpe
2025-10-17 13:02   ` Jason Gunthorpe
2025-10-17 16:13     ` Leon Romanovsky
2025-10-20 16:15       ` Jason Gunthorpe
2025-10-20 16:44         ` Leon Romanovsky
2025-10-20 16:51           ` Jason Gunthorpe
2025-10-17 23:40   ` Jason Gunthorpe
2025-10-22 12:50   ` Jason Gunthorpe
2025-10-26  7:55     ` Shuai Xue
2025-10-27 12:09       ` Jason Gunthorpe
2025-10-28 13:46         ` Shuai Xue
2025-10-27 23:13   ` David Matlack
2025-10-28 12:02     ` Leon Romanovsky
2025-10-30 22:28       ` Alex Mastro
2025-10-29 16:50   ` Alex Mastro
2025-10-29 18:21     ` Leon Romanovsky
2025-10-30  0:25   ` Samiullah Khawaja
2025-10-30  6:48     ` Leon Romanovsky
2025-10-30 12:57       ` Jason Gunthorpe
2025-10-30 20:38   ` Alex Williamson [this message]
2025-10-31  6:48     ` Leon Romanovsky
2025-10-15 21:15 ` [PATCH v5 0/9] vfio/pci: Allow MMIO regions to be exported through dma-buf shinichiro.kawasaki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251030143836.66cdf116@shazbot.org \
    --to=alex@shazbot.org \
    --cc=akpm@linux-foundation.org \
    --cc=alex.williamson@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=bhelgaas@google.com \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=iommu@lists.linux.dev \
    --cc=jgg@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=leon@kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=logang@deltatee.com \
    --cc=m.szyprowski@samsung.com \
    --cc=robin.murphy@arm.com \
    --cc=sumit.semwal@linaro.org \
    --cc=vivek.kasireddy@intel.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.