From: fengchengwen <fengchengwen@huawei.com>
To: Zhiping Zhang <zhipingz@meta.com>, Jason Gunthorpe <jgg@ziepe.ca>,
Leon Romanovsky <leon@kernel.org>,
Bjorn Helgaas <bhelgaas@google.com>, <linux-rdma@vger.kernel.org>,
<linux-pci@vger.kernel.org>, <netdev@vger.kernel.org>,
<dri-devel@lists.freedesktop.org>,
Keith Busch <kbusch@kernel.org>, Yochai Cohen <yochai@nvidia.com>,
Yishai Hadas <yishaih@nvidia.com>
Cc: Bjorn Helgaas <helgaas@kernel.org>
Subject: Re: [RFC v2 1/2] vfio: add callback to get tph info for dmabuf
Date: Sat, 28 Mar 2026 10:21:55 +0800 [thread overview]
Message-ID: <04859df4-6fa4-4b2b-aef1-621f3c053c2e@huawei.com> (raw)
In-Reply-To: <20260324234615.3731237-2-zhipingz@meta.com>
Hi Zhiping,
On 3/25/2026 7:46 AM, Zhiping Zhang wrote:
> This patch adds a callback to get the tph info on DMA buffer exporters.
> The tph info includes both the steering tag and the process hint (ph).
>
> The steering tag and ph are encoded in the flags field of
> vfio_device_feature_dma_buf instead of adding new fields to the uapi
> struct, to preserve ABI compatibility.
>
> Signed-off-by: Zhiping Zhang <zhipingz@meta.com>
> ---
> drivers/vfio/pci/vfio_pci_dmabuf.c | 26 ++++++++++++++++++++++++--
> include/linux/dma-buf.h | 30 ++++++++++++++++++++++++++++++
> include/uapi/linux/vfio.h | 9 +++++++--
> 3 files changed, 61 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
> index 478beafc6ac3..c45cb3884b85 100644
> --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> @@ -17,6 +17,8 @@ struct vfio_pci_dma_buf {
> struct phys_vec *phys_vec;
> struct p2pdma_provider *provider;
> u32 nr_ranges;
> + u16 steering_tag;
> + u8 ph;
> u8 revoked : 1;
> };
>
> @@ -60,6 +62,15 @@ vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
> priv->size, dir);
> }
>
> +static int vfio_pci_dma_buf_get_tph(struct dma_buf *dmabuf, u16 *steering_tag,
> + u8 *ph)
> +{
> + struct vfio_pci_dma_buf *priv = dmabuf->priv;
> + *steering_tag = priv->steering_tag;
> + *ph = priv->ph;
If the dmabuf exporter don't provide st&ph, this ops should return error
> + return 0;
> +}
> +
> static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
> struct sg_table *sgt,
> enum dma_data_direction dir)
> @@ -90,6 +101,7 @@ static const struct dma_buf_ops vfio_pci_dmabuf_ops = {
> .unpin = vfio_pci_dma_buf_unpin,
> .attach = vfio_pci_dma_buf_attach,
> .map_dma_buf = vfio_pci_dma_buf_map,
> + .get_tph = vfio_pci_dma_buf_get_tph,
> .unmap_dma_buf = vfio_pci_dma_buf_unmap,
> .release = vfio_pci_dma_buf_release,
> };
> @@ -228,7 +240,10 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
> if (copy_from_user(&get_dma_buf, arg, sizeof(get_dma_buf)))
> return -EFAULT;
>
> - if (!get_dma_buf.nr_ranges || get_dma_buf.flags)
> + if (!get_dma_buf.nr_ranges ||
> + (get_dma_buf.flags & ~(VFIO_DMABUF_FL_TPH |
> + VFIO_DMABUF_TPH_PH_MASK |
> + VFIO_DMABUF_TPH_ST_MASK)))
> return -EINVAL;
>
> /*
> @@ -285,7 +300,14 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
> ret = PTR_ERR(priv->dmabuf);
> goto err_dev_put;
> }
> -
> + if (get_dma_buf.flags & VFIO_DMABUF_FL_TPH) {
> + priv->steering_tag = (get_dma_buf.flags &
> + VFIO_DMABUF_TPH_ST_MASK) >>
> + VFIO_DMABUF_TPH_ST_SHIFT;
> + priv->ph = (get_dma_buf.flags &
> + VFIO_DMABUF_TPH_PH_MASK) >>
> + VFIO_DMABUF_TPH_PH_SHIFT;
> + }
> /* dma_buf_put() now frees priv */
> INIT_LIST_HEAD(&priv->dmabufs_elm);
> down_write(&vdev->memory_lock);
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 133b9e637b55..26705c83ad80 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -113,6 +113,36 @@ struct dma_buf_ops {
> */
> void (*unpin)(struct dma_buf_attachment *attach);
>
> + /**
> + * @get_tph:
> + *
> + * Get the TPH (TLP Processing Hints) for this DMA buffer.
> + *
> + * This callback allows DMA buffer exporters to provide TPH including
> + * both the steering tag and the process hints (ph), which can be used
> + * to optimize peer-to-peer (P2P) memory access. The TPH info is typically
> + * used in scenarios where:
> + * - A PCIe device (e.g., RDMA NIC) needs to access memory on another
> + * PCIe device (e.g., GPU),
> + * - The system supports TPH and can use steering tags / ph to optimize
> + * cache placement and memory access patterns,
> + * - The memory is exported via DMABUF for cross-device sharing.
> + *
> + * @dmabuf: [in] The DMA buffer for which to retrieve TPH
> + * @steering_tag: [out] Pointer to store the 16-bit TPH steering tag value
> + * @ph: [out] Pointer to store the 8-bit TPH processing-hint value
> + *
> + * Returns:
> + * * 0 - Success, steering tag stored in @steering_tag
> + * * -EOPNOTSUPP - TPH steering tags not supported for this buffer
> + * * -EINVAL - Invalid parameters
> + *
> + * This callback is optional. If not implemented, the buffer does not
> + * support TPH.
It seemed already impl...
> + *
> + */
> + int (*get_tph)(struct dma_buf *dmabuf, u16 *steering_tag, u8 *ph);
> +
> /**
> * @map_dma_buf:
> *
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index bb7b89330d35..e2a8962641d2 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -1505,8 +1505,13 @@ struct vfio_region_dma_range {
> struct vfio_device_feature_dma_buf {
> __u32 region_index;
> __u32 open_flags;
> - __u32 flags;
> - __u32 nr_ranges;
> + __u32 flags;
> +#define VFIO_DMABUF_FL_TPH (1U << 0) /* TPH info is present */
> +#define VFIO_DMABUF_TPH_PH_SHIFT 1 /* bits 1-2: PH (2-bit) */
> +#define VFIO_DMABUF_TPH_PH_MASK 0x6U
> +#define VFIO_DMABUF_TPH_ST_SHIFT 16 /* bits 16-31: steering tag */
> +#define VFIO_DMABUF_TPH_ST_MASK 0xffff0000U
> + __u32 nr_ranges;
> struct vfio_region_dma_range dma_ranges[] __counted_by(nr_ranges);
> };
Another question:
1\ PCIE protocol define 8bit and 16bit ST
2\ In host-device ST impl, the ACPI will provide 8bit and 16bit ST, the choice of which
one to use depends on the minimum supported range of the device and the RP.
3\ So in this P2P scene, although exporter (e.g. GPU) support 16bit ST, but the consumer
(e.g. RDMA NIC) only support 8bit this may lead to mis-match
>
> --
> 2.52.0
>
>
>
next prev parent reply other threads:[~2026-03-28 2:22 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20260324234615.3731237-1-zhipingz@meta.com>
[not found] ` <20260324234615.3731237-2-zhipingz@meta.com>
2026-03-25 8:25 ` [RFC v2 1/2] vfio: add callback to get tph info for dmabuf Leon Romanovsky
2026-03-26 22:41 ` Keith Busch
2026-03-26 22:55 ` Zhiping Zhang
2026-03-31 8:39 ` Leon Romanovsky
2026-03-31 8:37 ` Leon Romanovsky
2026-03-31 13:00 ` Keith Busch
2026-03-31 13:29 ` Leon Romanovsky
2026-03-31 13:35 ` Keith Busch
2026-03-31 14:03 ` Leon Romanovsky
2026-03-31 14:13 ` Keith Busch
2026-03-31 19:02 ` Leon Romanovsky
2026-03-31 19:44 ` Keith Busch
2026-04-09 12:04 ` Leon Romanovsky
2026-04-13 18:32 ` Zhiping Zhang
2026-04-13 19:23 ` Leon Romanovsky
2026-04-14 17:34 ` Keith Busch
2026-03-28 2:21 ` fengchengwen [this message]
2026-03-31 0:49 ` Zhiping Zhang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=04859df4-6fa4-4b2b-aef1-621f3c053c2e@huawei.com \
--to=fengchengwen@huawei.com \
--cc=bhelgaas@google.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=helgaas@kernel.org \
--cc=jgg@ziepe.ca \
--cc=kbusch@kernel.org \
--cc=leon@kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=yishaih@nvidia.com \
--cc=yochai@nvidia.com \
--cc=zhipingz@meta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox