public inbox for linux-pci@vger.kernel.org
 help / color / mirror / Atom feed
From: Zhiping Zhang <zhipingz@meta.com>
To: Stanislav Fomichev <sdf@meta.com>, Keith Busch <kbusch@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>, Leon Romanovsky <leon@kernel.org>,
	Bjorn Helgaas <helgaas@kernel.org>, <linux-rdma@vger.kernel.org>,
	<linux-pci@vger.kernel.org>, <netdev@vger.kernel.org>,
	<dri-devel@lists.freedesktop.org>,
	Yochai Cohen <yochai@nvidia.com>,
	Yishai Hadas <yishaih@nvidia.com>,
	Zhiping Zhang <zhipingz@meta.com>
Subject: [PATCH v1 1/2] vfio: add callback to get tph info for dma-buf
Date: Mon, 20 Apr 2026 11:39:15 -0700	[thread overview]
Message-ID: <20260420183920.3626389-2-zhipingz@meta.com> (raw)
In-Reply-To: <20260420183920.3626389-1-zhipingz@meta.com>

Add a dma-buf callback that returns raw TPH metadata from the exporter
so peer devices can reuse the steering tag and processing hint
associated with a VFIO-exported buffer.

Keep the existing VFIO_DEVICE_FEATURE_DMA_BUF uAPI layout intact by
using a flag plus one extra trailing entries[] object for the optional
TPH metadata. Rename the uAPI field dma_ranges to entries. The
nr_ranges field remains the DMA range count; when VFIO_DMABUF_FLAG_TPH
is set the kernel reads one extra entry beyond nr_ranges for the TPH
metadata.

Add an st_width parameter to get_tph() so the exporter can reject
steering tags that exceed the consumer's supported width (8 vs 16 bit).
When no TPH metadata was supplied, make get_tph() return -EOPNOTSUPP.

Signed-off-by: Zhiping Zhang <zhipingz@meta.com>
---
 drivers/vfio/pci/vfio_pci_dmabuf.c | 62 +++++++++++++++++++++++-------
 include/linux/dma-buf.h            | 17 ++++++++
 include/uapi/linux/vfio.h          | 28 ++++++++++++--
 3 files changed, 89 insertions(+), 18 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index b1d658b8f7b5..fdc05f9ab3ae 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -17,6 +17,9 @@ struct vfio_pci_dma_buf {
 	struct phys_vec *phys_vec;
 	struct p2pdma_provider *provider;
 	u32 nr_ranges;
+	u16 steering_tag;
+	u8 ph;
+	u8 tph_present : 1;
 	u8 revoked : 1;
 };
 
@@ -60,6 +63,22 @@ vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
 				       priv->size, dir);
 }
 
+static int vfio_pci_dma_buf_get_tph(struct dma_buf *dmabuf, u16 *steering_tag,
+				    u8 *ph, u8 st_width)
+{
+	struct vfio_pci_dma_buf *priv = dmabuf->priv;
+
+	if (!priv->tph_present)
+		return -EOPNOTSUPP;
+
+	if (st_width < 16 && priv->steering_tag > ((1U << st_width) - 1))
+		return -EINVAL;
+
+	*steering_tag = priv->steering_tag;
+	*ph = priv->ph;
+	return 0;
+}
+
 static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
 				   struct sg_table *sgt,
 				   enum dma_data_direction dir)
@@ -89,6 +108,7 @@ static const struct dma_buf_ops vfio_pci_dmabuf_ops = {
 	.pin = vfio_pci_dma_buf_pin,
 	.unpin = vfio_pci_dma_buf_unpin,
 	.attach = vfio_pci_dma_buf_attach,
+	.get_tph = vfio_pci_dma_buf_get_tph,
 	.map_dma_buf = vfio_pci_dma_buf_map,
 	.unmap_dma_buf = vfio_pci_dma_buf_unmap,
 	.release = vfio_pci_dma_buf_release,
@@ -211,7 +231,9 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 				  size_t argsz)
 {
 	struct vfio_device_feature_dma_buf get_dma_buf = {};
-	struct vfio_region_dma_range *dma_ranges;
+	bool tph_supplied;
+	u32 tph_index;
+	struct vfio_region_dma_range *entries;
 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
 	struct vfio_pci_dma_buf *priv;
 	size_t length;
@@ -228,7 +250,10 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 	if (copy_from_user(&get_dma_buf, arg, sizeof(get_dma_buf)))
 		return -EFAULT;
 
-	if (!get_dma_buf.nr_ranges || get_dma_buf.flags)
+	tph_supplied = !!(get_dma_buf.flags & VFIO_DMABUF_FLAG_TPH);
+	tph_index = get_dma_buf.nr_ranges;
+	if (!get_dma_buf.nr_ranges ||
+	    (get_dma_buf.flags & ~VFIO_DMABUF_FLAG_TPH))
 		return -EINVAL;
 
 	/*
@@ -237,19 +262,21 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 	if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX)
 		return -ENODEV;
 
-	dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
-				       sizeof(*dma_ranges));
-	if (IS_ERR(dma_ranges))
-		return PTR_ERR(dma_ranges);
+	entries = memdup_array_user(&arg->entries,
+				    get_dma_buf.nr_ranges +
+					(tph_supplied ? 1 : 0),
+				    sizeof(*entries));
+	if (IS_ERR(entries))
+		return PTR_ERR(entries);
 
-	ret = validate_dmabuf_input(&get_dma_buf, dma_ranges, &length);
+	ret = validate_dmabuf_input(&get_dma_buf, entries, &length);
 	if (ret)
-		goto err_free_ranges;
+		goto err_free_entries;
 
 	priv = kzalloc_obj(*priv);
 	if (!priv) {
 		ret = -ENOMEM;
-		goto err_free_ranges;
+		goto err_free_entries;
 	}
 	priv->phys_vec = kzalloc_objs(*priv->phys_vec, get_dma_buf.nr_ranges);
 	if (!priv->phys_vec) {
@@ -260,15 +287,22 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 	priv->vdev = vdev;
 	priv->nr_ranges = get_dma_buf.nr_ranges;
 	priv->size = length;
+
+	if (tph_supplied) {
+		priv->steering_tag = entries[tph_index].tph.steering_tag;
+		priv->ph = entries[tph_index].tph.ph;
+		priv->tph_present = 1;
+	}
+
 	ret = vdev->pci_ops->get_dmabuf_phys(vdev, &priv->provider,
 					     get_dma_buf.region_index,
-					     priv->phys_vec, dma_ranges,
+					     priv->phys_vec, entries,
 					     priv->nr_ranges);
 	if (ret)
 		goto err_free_phys;
 
-	kfree(dma_ranges);
-	dma_ranges = NULL;
+	kfree(entries);
+	entries = NULL;
 
 	if (!vfio_device_try_get_registration(&vdev->vdev)) {
 		ret = -ENODEV;
@@ -311,8 +345,8 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
 	kfree(priv->phys_vec);
 err_free_priv:
 	kfree(priv);
-err_free_ranges:
-	kfree(dma_ranges);
+err_free_entries:
+	kfree(entries);
 	return ret;
 }
 
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 133b9e637b55..b0a79ccbe100 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -113,6 +113,23 @@ struct dma_buf_ops {
 	 */
 	void (*unpin)(struct dma_buf_attachment *attach);
 
+	/**
+	 * @get_tph:
+	 * @dmabuf: DMA buffer for which to retrieve TPH metadata
+	 * @steering_tag: Returns the raw TPH steering tag
+	 * @ph: Returns the TPH processing hint
+	 * @st_width: Consumer's supported steering tag width in bits (8 or 16)
+	 *
+	 * Return the TPH (TLP Processing Hints) metadata associated with this
+	 * DMA buffer. Exporters that do not provide TPH metadata should return
+	 * -EOPNOTSUPP. If the steering tag exceeds @st_width bits, return
+	 * -EINVAL.
+	 *
+	 * This callback is optional.
+	 */
+	int (*get_tph)(struct dma_buf *dmabuf, u16 *steering_tag, u8 *ph,
+		       u8 st_width);
+
 	/**
 	 * @map_dma_buf:
 	 *
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index bb7b89330d35..a0bd24623c52 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1490,16 +1490,36 @@ struct vfio_device_feature_bus_master {
  * open_flags are the typical flags passed to open(2), eg O_RDWR, O_CLOEXEC,
  * etc. offset/length specify a slice of the region to create the dmabuf from.
  * nr_ranges is the total number of (P2P DMA) ranges that comprise the dmabuf.
+ * When VFIO_DMABUF_FLAG_TPH is set, entries[] contains one extra trailing
+ * object after the nr_ranges DMA ranges carrying the TPH steering tag and
+ * processing hint.
  *
- * flags should be 0.
+ * flags should be 0 or VFIO_DMABUF_FLAG_TPH.
  *
  * Return: The fd number on success, -1 and errno is set on failure.
  */
 #define VFIO_DEVICE_FEATURE_DMA_BUF 11
 
+enum vfio_device_feature_dma_buf_flags {
+	VFIO_DMABUF_FLAG_TPH = 1 << 0,
+};
+
+struct vfio_region_dma_tph {
+	__u16 steering_tag;
+	__u8 ph;
+	__u8 reserved;
+	__u32 reserved2;
+};
+
 struct vfio_region_dma_range {
-	__u64 offset;
-	__u64 length;
+	union {
+		__u64 offset;
+		struct vfio_region_dma_tph tph;
+	};
+	union {
+		__u64 length;
+		__u64 reserved;
+	};
 };
 
 struct vfio_device_feature_dma_buf {
@@ -1507,7 +1527,7 @@ struct vfio_device_feature_dma_buf {
 	__u32	open_flags;
 	__u32   flags;
 	__u32   nr_ranges;
-	struct vfio_region_dma_range dma_ranges[] __counted_by(nr_ranges);
+	struct vfio_region_dma_range entries[];
 };
 
 /* -------- API for Type1 VFIO IOMMU -------- */
-- 
2.52.0


  reply	other threads:[~2026-04-20 18:45 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-20 18:39 [PATCH v1 0/2] Retrieve TPH from dma-buf for PCIe P2P memory access Zhiping Zhang
2026-04-20 18:39 ` Zhiping Zhang [this message]
2026-04-22 15:23   ` [PATCH v1 1/2] vfio: add callback to get tph info for dma-buf Alex Williamson
2026-04-22 16:29     ` Jason Gunthorpe
2026-04-22 19:27       ` Alex Williamson
2026-04-20 18:39 ` [PATCH v1 2/2] RDMA/mlx5: get tph for p2p access when registering dma-buf mr Zhiping Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260420183920.3626389-2-zhipingz@meta.com \
    --to=zhipingz@meta.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=helgaas@kernel.org \
    --cc=jgg@ziepe.ca \
    --cc=kbusch@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=sdf@meta.com \
    --cc=yishaih@nvidia.com \
    --cc=yochai@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox