public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v1 1/2] vfio: add callback to get tph info for dma-buf
       [not found] ` <20260420183920.3626389-2-zhipingz@meta.com>
@ 2026-04-22 15:23   ` Alex Williamson
  2026-04-22 16:29     ` Jason Gunthorpe
  0 siblings, 1 reply; 3+ messages in thread
From: Alex Williamson @ 2026-04-22 15:23 UTC (permalink / raw)
  To: Zhiping Zhang
  Cc: Stanislav Fomichev, Keith Busch, Jason Gunthorpe, Leon Romanovsky,
	Bjorn Helgaas, linux-rdma, linux-pci, netdev, dri-devel,
	Yochai Cohen, Yishai Hadas, alex

On Mon, 20 Apr 2026 11:39:15 -0700
Zhiping Zhang <zhipingz@meta.com> wrote:

> Add a dma-buf callback that returns raw TPH metadata from the exporter
> so peer devices can reuse the steering tag and processing hint
> associated with a VFIO-exported buffer.
> 
> Keep the existing VFIO_DEVICE_FEATURE_DMA_BUF uAPI layout intact by
> using a flag plus one extra trailing entries[] object for the optional
> TPH metadata. Rename the uAPI field dma_ranges to entries. The
> nr_ranges field remains the DMA range count; when VFIO_DMABUF_FLAG_TPH
> is set the kernel reads one extra entry beyond nr_ranges for the TPH
> metadata.
> 
> Add an st_width parameter to get_tph() so the exporter can reject
> steering tags that exceed the consumer's supported width (8 vs 16 bit).
> When no TPH metadata was supplied, make get_tph() return -EOPNOTSUPP.
> 
> Signed-off-by: Zhiping Zhang <zhipingz@meta.com>
> ---
>  drivers/vfio/pci/vfio_pci_dmabuf.c | 62 +++++++++++++++++++++++-------
>  include/linux/dma-buf.h            | 17 ++++++++
>  include/uapi/linux/vfio.h          | 28 ++++++++++++--
>  3 files changed, 89 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
> index b1d658b8f7b5..fdc05f9ab3ae 100644
> --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> @@ -17,6 +17,9 @@ struct vfio_pci_dma_buf {
>  	struct phys_vec *phys_vec;
>  	struct p2pdma_provider *provider;
>  	u32 nr_ranges;
> +	u16 steering_tag;
> +	u8 ph;
> +	u8 tph_present : 1;
>  	u8 revoked : 1;
>  };
>  
> @@ -60,6 +63,22 @@ vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
>  				       priv->size, dir);
>  }
>  
> +static int vfio_pci_dma_buf_get_tph(struct dma_buf *dmabuf, u16 *steering_tag,
> +				    u8 *ph, u8 st_width)
> +{
> +	struct vfio_pci_dma_buf *priv = dmabuf->priv;
> +
> +	if (!priv->tph_present)
> +		return -EOPNOTSUPP;
> +
> +	if (st_width < 16 && priv->steering_tag > ((1U << st_width) - 1))
> +		return -EINVAL;
> +
> +	*steering_tag = priv->steering_tag;
> +	*ph = priv->ph;
> +	return 0;
> +}
> +
>  static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
>  				   struct sg_table *sgt,
>  				   enum dma_data_direction dir)
> @@ -89,6 +108,7 @@ static const struct dma_buf_ops vfio_pci_dmabuf_ops = {
>  	.pin = vfio_pci_dma_buf_pin,
>  	.unpin = vfio_pci_dma_buf_unpin,
>  	.attach = vfio_pci_dma_buf_attach,
> +	.get_tph = vfio_pci_dma_buf_get_tph,
>  	.map_dma_buf = vfio_pci_dma_buf_map,
>  	.unmap_dma_buf = vfio_pci_dma_buf_unmap,
>  	.release = vfio_pci_dma_buf_release,
> @@ -211,7 +231,9 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
>  				  size_t argsz)
>  {
>  	struct vfio_device_feature_dma_buf get_dma_buf = {};
> -	struct vfio_region_dma_range *dma_ranges;
> +	bool tph_supplied;
> +	u32 tph_index;
> +	struct vfio_region_dma_range *entries;
>  	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
>  	struct vfio_pci_dma_buf *priv;
>  	size_t length;
> @@ -228,7 +250,10 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
>  	if (copy_from_user(&get_dma_buf, arg, sizeof(get_dma_buf)))
>  		return -EFAULT;
>  
> -	if (!get_dma_buf.nr_ranges || get_dma_buf.flags)
> +	tph_supplied = !!(get_dma_buf.flags & VFIO_DMABUF_FLAG_TPH);
> +	tph_index = get_dma_buf.nr_ranges;
> +	if (!get_dma_buf.nr_ranges ||
> +	    (get_dma_buf.flags & ~VFIO_DMABUF_FLAG_TPH))
>  		return -EINVAL;
>  
>  	/*
> @@ -237,19 +262,21 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
>  	if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX)
>  		return -ENODEV;
>  
> -	dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
> -				       sizeof(*dma_ranges));
> -	if (IS_ERR(dma_ranges))
> -		return PTR_ERR(dma_ranges);
> +	entries = memdup_array_user(&arg->entries,
> +				    get_dma_buf.nr_ranges +
> +					(tph_supplied ? 1 : 0),
> +				    sizeof(*entries));
> +	if (IS_ERR(entries))
> +		return PTR_ERR(entries);
>  
> -	ret = validate_dmabuf_input(&get_dma_buf, dma_ranges, &length);
> +	ret = validate_dmabuf_input(&get_dma_buf, entries, &length);
>  	if (ret)
> -		goto err_free_ranges;
> +		goto err_free_entries;
>  
>  	priv = kzalloc_obj(*priv);
>  	if (!priv) {
>  		ret = -ENOMEM;
> -		goto err_free_ranges;
> +		goto err_free_entries;
>  	}
>  	priv->phys_vec = kzalloc_objs(*priv->phys_vec, get_dma_buf.nr_ranges);
>  	if (!priv->phys_vec) {
> @@ -260,15 +287,22 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
>  	priv->vdev = vdev;
>  	priv->nr_ranges = get_dma_buf.nr_ranges;
>  	priv->size = length;
> +
> +	if (tph_supplied) {
> +		priv->steering_tag = entries[tph_index].tph.steering_tag;
> +		priv->ph = entries[tph_index].tph.ph;
> +		priv->tph_present = 1;
> +	}
> +
>  	ret = vdev->pci_ops->get_dmabuf_phys(vdev, &priv->provider,
>  					     get_dma_buf.region_index,
> -					     priv->phys_vec, dma_ranges,
> +					     priv->phys_vec, entries,
>  					     priv->nr_ranges);
>  	if (ret)
>  		goto err_free_phys;
>  
> -	kfree(dma_ranges);
> -	dma_ranges = NULL;
> +	kfree(entries);
> +	entries = NULL;
>  
>  	if (!vfio_device_try_get_registration(&vdev->vdev)) {
>  		ret = -ENODEV;
> @@ -311,8 +345,8 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
>  	kfree(priv->phys_vec);
>  err_free_priv:
>  	kfree(priv);
> -err_free_ranges:
> -	kfree(dma_ranges);
> +err_free_entries:
> +	kfree(entries);
>  	return ret;
>  }
>  
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 133b9e637b55..b0a79ccbe100 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -113,6 +113,23 @@ struct dma_buf_ops {
>  	 */
>  	void (*unpin)(struct dma_buf_attachment *attach);
>  
> +	/**
> +	 * @get_tph:
> +	 * @dmabuf: DMA buffer for which to retrieve TPH metadata
> +	 * @steering_tag: Returns the raw TPH steering tag
> +	 * @ph: Returns the TPH processing hint
> +	 * @st_width: Consumer's supported steering tag width in bits (8 or 16)
> +	 *
> +	 * Return the TPH (TLP Processing Hints) metadata associated with this
> +	 * DMA buffer. Exporters that do not provide TPH metadata should return
> +	 * -EOPNOTSUPP. If the steering tag exceeds @st_width bits, return
> +	 * -EINVAL.
> +	 *
> +	 * This callback is optional.
> +	 */
> +	int (*get_tph)(struct dma_buf *dmabuf, u16 *steering_tag, u8 *ph,
> +		       u8 st_width);
> +
>  	/**
>  	 * @map_dma_buf:
>  	 *
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index bb7b89330d35..a0bd24623c52 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -1490,16 +1490,36 @@ struct vfio_device_feature_bus_master {
>   * open_flags are the typical flags passed to open(2), eg O_RDWR, O_CLOEXEC,
>   * etc. offset/length specify a slice of the region to create the dmabuf from.
>   * nr_ranges is the total number of (P2P DMA) ranges that comprise the dmabuf.
> + * When VFIO_DMABUF_FLAG_TPH is set, entries[] contains one extra trailing
> + * object after the nr_ranges DMA ranges carrying the TPH steering tag and
> + * processing hint.

I really don't think we want to design an API where entries is
implicitly one-off from what's actually there.  This feeds back into
the below removal of the __counted by attribute, which is a red flag
that this is the wrong approach.

In general though, I'm really hoping that someone interested in
enabling TPH as an interface through vfio actually decides to take
resource targeting and revocation seriously.  There's no validation of
the steering tag here relative to what the user has access to and no
mechanism to revoke those tags if access changes.  In fact, there's not
even a proposed mechanism allowing the user to derive valid steering
tags.  Does the user implicitly know the value and the kernel just
allows it because... yolo?  Thanks,

Alex

>   *
> - * flags should be 0.
> + * flags should be 0 or VFIO_DMABUF_FLAG_TPH.
>   *
>   * Return: The fd number on success, -1 and errno is set on failure.
>   */
>  #define VFIO_DEVICE_FEATURE_DMA_BUF 11
>  
> +enum vfio_device_feature_dma_buf_flags {
> +	VFIO_DMABUF_FLAG_TPH = 1 << 0,
> +};
> +
> +struct vfio_region_dma_tph {
> +	__u16 steering_tag;
> +	__u8 ph;
> +	__u8 reserved;
> +	__u32 reserved2;
> +};
> +
>  struct vfio_region_dma_range {
> -	__u64 offset;
> -	__u64 length;
> +	union {
> +		__u64 offset;
> +		struct vfio_region_dma_tph tph;
> +	};
> +	union {
> +		__u64 length;
> +		__u64 reserved;
> +	};
>  };
>  
>  struct vfio_device_feature_dma_buf {
> @@ -1507,7 +1527,7 @@ struct vfio_device_feature_dma_buf {
>  	__u32	open_flags;
>  	__u32   flags;
>  	__u32   nr_ranges;
> -	struct vfio_region_dma_range dma_ranges[] __counted_by(nr_ranges);
> +	struct vfio_region_dma_range entries[];
>  };
>  
>  /* -------- API for Type1 VFIO IOMMU -------- */


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v1 1/2] vfio: add callback to get tph info for dma-buf
  2026-04-22 15:23   ` [PATCH v1 1/2] vfio: add callback to get tph info for dma-buf Alex Williamson
@ 2026-04-22 16:29     ` Jason Gunthorpe
  2026-04-22 19:27       ` Alex Williamson
  0 siblings, 1 reply; 3+ messages in thread
From: Jason Gunthorpe @ 2026-04-22 16:29 UTC (permalink / raw)
  To: Alex Williamson
  Cc: Zhiping Zhang, Stanislav Fomichev, Keith Busch, Leon Romanovsky,
	Bjorn Helgaas, linux-rdma, linux-pci, netdev, dri-devel,
	Yochai Cohen, Yishai Hadas

On Wed, Apr 22, 2026 at 09:23:27AM -0600, Alex Williamson wrote:
> In general though, I'm really hoping that someone interested in
> enabling TPH as an interface through vfio actually decides to take
> resource targeting and revocation seriously.  There's no validation of
> the steering tag here relative to what the user has access to and no
> mechanism to revoke those tags if access changes.  In fact, there's not
> even a proposed mechanism allowing the user to derive valid steering
> tags.  Does the user implicitly know the value and the kernel just
> allows it because... yolo? 

This is the steering tag that remote devices will send *INTO* the VFIO
device.

IMHO it is entirely appropriate that the driver controlling the device
decide what tags are sent into it and when, so that's the VFIO
userspace.

There is no concept of access here since the entire device is captured
by VFIO.

If the VFIO device catastrophically malfunctions when receiving
certain steering tags then it is incompatible with VFIO and we should
at least block this new API..

The only requirement is that the device limit the TPH to only the
function that is perceiving them. If a device is really broken and
doesn't meet that then it should be blocked off and it is probably not
safe to be used with VMs at all.

Jason

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v1 1/2] vfio: add callback to get tph info for dma-buf
  2026-04-22 16:29     ` Jason Gunthorpe
@ 2026-04-22 19:27       ` Alex Williamson
  0 siblings, 0 replies; 3+ messages in thread
From: Alex Williamson @ 2026-04-22 19:27 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Zhiping Zhang, Stanislav Fomichev, Keith Busch, Leon Romanovsky,
	Bjorn Helgaas, linux-rdma, linux-pci, netdev, dri-devel,
	Yochai Cohen, Yishai Hadas, alex

On Wed, 22 Apr 2026 13:29:28 -0300
Jason Gunthorpe <jgg@ziepe.ca> wrote:

> On Wed, Apr 22, 2026 at 09:23:27AM -0600, Alex Williamson wrote:
> > In general though, I'm really hoping that someone interested in
> > enabling TPH as an interface through vfio actually decides to take
> > resource targeting and revocation seriously.  There's no validation of
> > the steering tag here relative to what the user has access to and no
> > mechanism to revoke those tags if access changes.  In fact, there's not
> > even a proposed mechanism allowing the user to derive valid steering
> > tags.  Does the user implicitly know the value and the kernel just
> > allows it because... yolo?   
> 
> This is the steering tag that remote devices will send *INTO* the VFIO
> device.
> 
> IMHO it is entirely appropriate that the driver controlling the device
> decide what tags are sent into it and when, so that's the VFIO
> userspace.
> 
> There is no concept of access here since the entire device is captured
> by VFIO.
> 
> If the VFIO device catastrophically malfunctions when receiving
> certain steering tags then it is incompatible with VFIO and we should
> at least block this new API..
> 
> The only requirement is that the device limit the TPH to only the
> function that is perceiving them. If a device is really broken and
> doesn't meet that then it should be blocked off and it is probably not
> safe to be used with VMs at all.

Ok, if the vfio user is only suggesting steering tags for another
driver to use when accessing their own device through the dma-buf, and
the lifecycle is bound to that dma-buf, maybe I'm overreacting on the
security aspect.

I don't know how to qualify the statement in the last paragraph about
"[t]he only requirement is that the device limit the TPH to only the
function that is perceiving them", though.  Is that implicit in being
associated to the dma-buf for the user owned device, or is it a
property of the suggested steering tags, that we're not validating?

Steering tags can induce caching abuse, as interpreted in the
interconnect fabric, but maybe we've already conceded that as
fundamental aspect of TPH in general.

So why does vfio need to be involved in any of the sequence proposed
here?  It seems like it would be a much cleaner design, avoiding
overloading the existing vfio feature and questionable array semantics,
if there were a set-tph ioctl on the resulting dma-buf instead of
making some vfio specific interface bundling creation with tph hints.
Thanks,

Alex

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-04-22 19:27 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20260420183920.3626389-1-zhipingz@meta.com>
     [not found] ` <20260420183920.3626389-2-zhipingz@meta.com>
2026-04-22 15:23   ` [PATCH v1 1/2] vfio: add callback to get tph info for dma-buf Alex Williamson
2026-04-22 16:29     ` Jason Gunthorpe
2026-04-22 19:27       ` Alex Williamson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox