* [PATCH 1/1] vfio/pci: add PCIe TPH device ioctl
@ 2025-10-13 16:35 Wathsala Vithanage
2025-10-16 21:41 ` Jeremy Linton
0 siblings, 1 reply; 8+ messages in thread
From: Wathsala Vithanage @ 2025-10-13 16:35 UTC (permalink / raw)
To: alex.williamson, jgg, pstanner, jeremy.linton; +Cc: kvm, Wathsala Vithanage
TLP Processing Hints (TPH) let a requester provide steering hints that
can enable direct cache injection on supported platforms and PCIe
devices. The PCIe core already exposes TPH handling to kernel drivers.
This change adds the VFIO_DEVICE_PCI_TPH ioctl and exposes TPH control
to user space to reduce memory latency and improve throughput for
polling drivers (e.g., DPDK poll-mode drivers). Through this interface,
user-space drivers can:
- enable or disable TPH for the device function
- program steering tags in device-specific mode
The ioctl is available only when the device advertises the TPH
Capability. Invalid modes or tags are rejected. No functional change
occurs unless the ioctl is used.
Signed-off-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
---
drivers/vfio/pci/vfio_pci_core.c | 74 ++++++++++++++++++++++++++++++++
include/uapi/linux/vfio.h | 36 ++++++++++++++++
2 files changed, 110 insertions(+)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 7dcf5439dedc..0646d9a483fb 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -28,6 +28,7 @@
#include <linux/nospec.h>
#include <linux/sched/mm.h>
#include <linux/iommufd.h>
+#include <linux/pci-tph.h>
#if IS_ENABLED(CONFIG_EEH)
#include <asm/eeh.h>
#endif
@@ -1443,6 +1444,77 @@ static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
ioeventfd.fd);
}
+static int vfio_pci_tph_set_st(struct vfio_pci_core_device *vdev,
+ const struct vfio_pci_tph_entry *ent)
+{
+ int ret, mem_type;
+ u16 st;
+ u32 cpu_id = ent->cpu_id;
+
+ if (cpu_id >= nr_cpu_ids || !cpu_present(cpu_id))
+ return -EINVAL;
+
+ if (!cpumask_test_cpu(cpu_id, current->cpus_ptr))
+ return -EINVAL;
+
+ switch (ent->mem_type) {
+ case VFIO_TPH_MEM_TYPE_VMEM:
+ mem_type = TPH_MEM_TYPE_VM;
+ break;
+ case VFIO_TPH_MEM_TYPE_PMEM:
+ mem_type = TPH_MEM_TYPE_PM;
+ break;
+ default:
+ return -EINVAL;
+ }
+ ret = pcie_tph_get_cpu_st(vdev->pdev, mem_type, topology_core_id(cpu_id),
+ &st);
+ if (ret)
+ return ret;
+ /*
+ * PCI core enforces table bounds and disables TPH on error.
+ */
+ return pcie_tph_set_st_entry(vdev->pdev, ent->index, st);
+}
+
+static int vfio_pci_tph_enable(struct vfio_pci_core_device *vdev, int mode)
+{
+ /* IV mode is not supported. */
+ if (mode == PCI_TPH_ST_IV_MODE)
+ return -EINVAL;
+ /* PCI core validates 'mode' and returns -EINVAL on bad values. */
+ return pcie_enable_tph(vdev->pdev, mode);
+}
+
+static int vfio_pci_tph_disable(struct vfio_pci_core_device *vdev)
+{
+ pcie_disable_tph(vdev->pdev);
+ return 0;
+}
+
+static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev,
+ void __user *uarg)
+{
+ struct vfio_pci_tph tph;
+
+ if (copy_from_user(&tph, uarg, sizeof(struct vfio_pci_tph)))
+ return -EFAULT;
+
+ if (tph.argsz != sizeof(struct vfio_pci_tph))
+ return -EINVAL;
+
+ switch (tph.op) {
+ case VFIO_DEVICE_TPH_ENABLE:
+ return vfio_pci_tph_enable(vdev, tph.mode);
+ case VFIO_DEVICE_TPH_DISABLE:
+ return vfio_pci_tph_disable(vdev);
+ case VFIO_DEVICE_TPH_SET_ST:
+ return vfio_pci_tph_set_st(vdev, &tph.ent);
+ default:
+ return -EINVAL;
+ }
+}
+
long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
unsigned long arg)
{
@@ -1467,6 +1539,8 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
return vfio_pci_ioctl_reset(vdev, uarg);
case VFIO_DEVICE_SET_IRQS:
return vfio_pci_ioctl_set_irqs(vdev, uarg);
+ case VFIO_DEVICE_PCI_TPH:
+ return vfio_pci_ioctl_tph(vdev, uarg);
default:
return -ENOTTY;
}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 75100bf009ba..cfdee851031e 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -873,6 +873,42 @@ struct vfio_device_ioeventfd {
#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
+/**
+ * VFIO_DEVICE_PCI_TPH - _IO(VFIO_TYPE, VFIO_BASE + 22)
+ *
+ * Control PCIe TLP Processing Hints (TPH) on a PCIe device.
+ *
+ * Supported operations:
+ * - VFIO_DEVICE_TPH_ENABLE: enable TPH in no-steering-tag (NS) or
+ * device-specific (DS) mode. IV mode is not supported via this ioctl
+ * and returns -EINVAL.
+ * - VFIO_DEVICE_TPH_DISABLE: disable TPH on the device.
+ * - VFIO_DEVICE_TPH_SET_ST: program an entry in the device TPH Steering-Tag
+ * (ST) table. The kernel derives the ST from cpu_id and mem_type; the
+ * value is not returned to userspace.
+ */
+struct vfio_pci_tph_entry {
+ __u32 cpu_id; /* CPU logical ID */
+ __u8 mem_type;
+#define VFIO_TPH_MEM_TYPE_VMEM 0 /* Request volatile memory ST */
+#define VFIO_TPH_MEM_TYPE_PMEM 1 /* Request persistent memory ST */
+ __u8 rsvd[1];
+ __u16 index; /* ST-table index */
+};
+
+struct vfio_pci_tph {
+ __u32 argsz; /* Size of vfio_pci_tph */
+ __u32 mode; /* NS and DS modes; IV not supported */
+ __u32 op;
+#define VFIO_DEVICE_TPH_ENABLE 0
+#define VFIO_DEVICE_TPH_DISABLE 1
+#define VFIO_DEVICE_TPH_SET_ST 2
+ struct vfio_pci_tph_entry ent;
+};
+
+#define VFIO_DEVICE_PCI_TPH _IO(VFIO_TYPE, VFIO_BASE + 22)
+
+
/**
* VFIO_DEVICE_FEATURE - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
* struct vfio_device_feature)
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/1] vfio/pci: add PCIe TPH device ioctl
2025-10-13 16:35 [PATCH 1/1] vfio/pci: add PCIe TPH device ioctl Wathsala Vithanage
@ 2025-10-16 21:41 ` Jeremy Linton
2025-10-27 14:33 ` Wathsala Vithanage
0 siblings, 1 reply; 8+ messages in thread
From: Jeremy Linton @ 2025-10-16 21:41 UTC (permalink / raw)
To: Wathsala Vithanage, alex.williamson, jgg, pstanner
Cc: kvm, linux-kernel@vger.kernel.org
Hi,
On 10/13/25 11:35 AM, Wathsala Vithanage wrote:
> TLP Processing Hints (TPH) let a requester provide steering hints that
> can enable direct cache injection on supported platforms and PCIe
> devices. The PCIe core already exposes TPH handling to kernel drivers.
>
> This change adds the VFIO_DEVICE_PCI_TPH ioctl and exposes TPH control
> to user space to reduce memory latency and improve throughput for
> polling drivers (e.g., DPDK poll-mode drivers). Through this interface,
> user-space drivers can:
> - enable or disable TPH for the device function
> - program steering tags in device-specific mode
>
> The ioctl is available only when the device advertises the TPH
> Capability. Invalid modes or tags are rejected. No functional change
> occurs unless the ioctl is used.
>
> Signed-off-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
> ---
> drivers/vfio/pci/vfio_pci_core.c | 74 ++++++++++++++++++++++++++++++++
> include/uapi/linux/vfio.h | 36 ++++++++++++++++
> 2 files changed, 110 insertions(+)
>
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 7dcf5439dedc..0646d9a483fb 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -28,6 +28,7 @@
> #include <linux/nospec.h>
> #include <linux/sched/mm.h>
> #include <linux/iommufd.h>
> +#include <linux/pci-tph.h>
> #if IS_ENABLED(CONFIG_EEH)
> #include <asm/eeh.h>
> #endif
> @@ -1443,6 +1444,77 @@ static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
> ioeventfd.fd);
> }
>
> +static int vfio_pci_tph_set_st(struct vfio_pci_core_device *vdev,
> + const struct vfio_pci_tph_entry *ent)
> +{
> + int ret, mem_type;
> + u16 st;
> + u32 cpu_id = ent->cpu_id;
> +
> + if (cpu_id >= nr_cpu_ids || !cpu_present(cpu_id))
> + return -EINVAL;
> +
> + if (!cpumask_test_cpu(cpu_id, current->cpus_ptr))
> + return -EINVAL;
> +
> + switch (ent->mem_type) {
> + case VFIO_TPH_MEM_TYPE_VMEM:
> + mem_type = TPH_MEM_TYPE_VM;
> + break;
> + case VFIO_TPH_MEM_TYPE_PMEM:
> + mem_type = TPH_MEM_TYPE_PM;
> + break;
> + default:
> + return -EINVAL;
> + }
> + ret = pcie_tph_get_cpu_st(vdev->pdev, mem_type, topology_core_id(cpu_id),
> + &st);
> + if (ret)
> + return ret;
> + /*
> + * PCI core enforces table bounds and disables TPH on error.
> + */
> + return pcie_tph_set_st_entry(vdev->pdev, ent->index, st);
> +}
> +
> +static int vfio_pci_tph_enable(struct vfio_pci_core_device *vdev, int mode)
> +{
> + /* IV mode is not supported. */
> + if (mode == PCI_TPH_ST_IV_MODE)
> + return -EINVAL;
> + /* PCI core validates 'mode' and returns -EINVAL on bad values. */
> + return pcie_enable_tph(vdev->pdev, mode);
> +}
> +
> +static int vfio_pci_tph_disable(struct vfio_pci_core_device *vdev)
> +{
> + pcie_disable_tph(vdev->pdev);
> + return 0;
> +}
> +
> +static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev,
> + void __user *uarg)
> +{
> + struct vfio_pci_tph tph;
> +
> + if (copy_from_user(&tph, uarg, sizeof(struct vfio_pci_tph)))
> + return -EFAULT;
> +
> + if (tph.argsz != sizeof(struct vfio_pci_tph))
> + return -EINVAL;
> +
> + switch (tph.op) {
> + case VFIO_DEVICE_TPH_ENABLE:
> + return vfio_pci_tph_enable(vdev, tph.mode);
> + case VFIO_DEVICE_TPH_DISABLE:
> + return vfio_pci_tph_disable(vdev);
> + case VFIO_DEVICE_TPH_SET_ST:
> + return vfio_pci_tph_set_st(vdev, &tph.ent);
> + default:
> + return -EINVAL;
> + }
> +}
> +
> long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
> unsigned long arg)
> {
> @@ -1467,6 +1539,8 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
> return vfio_pci_ioctl_reset(vdev, uarg);
> case VFIO_DEVICE_SET_IRQS:
> return vfio_pci_ioctl_set_irqs(vdev, uarg);
> + case VFIO_DEVICE_PCI_TPH:
> + return vfio_pci_ioctl_tph(vdev, uarg);
> default:
> return -ENOTTY;
> }
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 75100bf009ba..cfdee851031e 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -873,6 +873,42 @@ struct vfio_device_ioeventfd {
>
> #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
>
> +/**
> + * VFIO_DEVICE_PCI_TPH - _IO(VFIO_TYPE, VFIO_BASE + 22)
> + *
> + * Control PCIe TLP Processing Hints (TPH) on a PCIe device.
> + *
> + * Supported operations:
> + * - VFIO_DEVICE_TPH_ENABLE: enable TPH in no-steering-tag (NS) or
> + * device-specific (DS) mode. IV mode is not supported via this ioctl
> + * and returns -EINVAL.
> + * - VFIO_DEVICE_TPH_DISABLE: disable TPH on the device.
> + * - VFIO_DEVICE_TPH_SET_ST: program an entry in the device TPH Steering-Tag
> + * (ST) table. The kernel derives the ST from cpu_id and mem_type; the
> + * value is not returned to userspace.
> + */
> +struct vfio_pci_tph_entry {
> + __u32 cpu_id; /* CPU logical ID */
> + __u8 mem_type;
> +#define VFIO_TPH_MEM_TYPE_VMEM 0 /* Request volatile memory ST */
> +#define VFIO_TPH_MEM_TYPE_PMEM 1 /* Request persistent memory ST */
> + __u8 rsvd[1];
> + __u16 index; /* ST-table index */
> +};
> +
> +struct vfio_pci_tph {
> + __u32 argsz; /* Size of vfio_pci_tph */
> + __u32 mode; /* NS and DS modes; IV not supported */
> + __u32 op;
> +#define VFIO_DEVICE_TPH_ENABLE 0
> +#define VFIO_DEVICE_TPH_DISABLE 1
> +#define VFIO_DEVICE_TPH_SET_ST 2
> + struct vfio_pci_tph_entry ent;
> +};
> +
> +#define VFIO_DEVICE_PCI_TPH _IO(VFIO_TYPE, VFIO_BASE + 22)
A quick look at this, it seems its following the way the existing vfio
IOCTls are defined, yet two of them (ENABLE and DISABLE) won't likely
really change their structure, or don't need a structure in the case of
disable. Why not use IOW() and let the kernel error handling deal with
those two as independent ioctls?
Thanks,
> +
> +
> /**
> * VFIO_DEVICE_FEATURE - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
> * struct vfio_device_feature)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/1] vfio/pci: add PCIe TPH device ioctl
2025-10-16 21:41 ` Jeremy Linton
@ 2025-10-27 14:33 ` Wathsala Vithanage
2025-11-05 19:15 ` Alex Williamson
0 siblings, 1 reply; 8+ messages in thread
From: Wathsala Vithanage @ 2025-10-27 14:33 UTC (permalink / raw)
To: Jeremy Linton, alex.williamson, jgg, pstanner
Cc: kvm, linux-kernel@vger.kernel.org
On 10/16/25 16:41, Jeremy Linton wrote:
> Hi,
>
> On 10/13/25 11:35 AM, Wathsala Vithanage wrote:
>> TLP Processing Hints (TPH) let a requester provide steering hints that
>> can enable direct cache injection on supported platforms and PCIe
>> devices. The PCIe core already exposes TPH handling to kernel drivers.
>>
>> This change adds the VFIO_DEVICE_PCI_TPH ioctl and exposes TPH control
>> to user space to reduce memory latency and improve throughput for
>> polling drivers (e.g., DPDK poll-mode drivers). Through this interface,
>> user-space drivers can:
>> - enable or disable TPH for the device function
>> - program steering tags in device-specific mode
>>
>> The ioctl is available only when the device advertises the TPH
>> Capability. Invalid modes or tags are rejected. No functional change
>> occurs unless the ioctl is used.
>>
>> Signed-off-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
>> ---
>> drivers/vfio/pci/vfio_pci_core.c | 74 ++++++++++++++++++++++++++++++++
>> include/uapi/linux/vfio.h | 36 ++++++++++++++++
>> 2 files changed, 110 insertions(+)
>>
>> diff --git a/drivers/vfio/pci/vfio_pci_core.c
>> b/drivers/vfio/pci/vfio_pci_core.c
>> index 7dcf5439dedc..0646d9a483fb 100644
>> --- a/drivers/vfio/pci/vfio_pci_core.c
>> +++ b/drivers/vfio/pci/vfio_pci_core.c
>> @@ -28,6 +28,7 @@
>> #include <linux/nospec.h>
>> #include <linux/sched/mm.h>
>> #include <linux/iommufd.h>
>> +#include <linux/pci-tph.h>
>> #if IS_ENABLED(CONFIG_EEH)
>> #include <asm/eeh.h>
>> #endif
>> @@ -1443,6 +1444,77 @@ static int vfio_pci_ioctl_ioeventfd(struct
>> vfio_pci_core_device *vdev,
>> ioeventfd.fd);
>> }
>> +static int vfio_pci_tph_set_st(struct vfio_pci_core_device *vdev,
>> + const struct vfio_pci_tph_entry *ent)
>> +{
>> + int ret, mem_type;
>> + u16 st;
>> + u32 cpu_id = ent->cpu_id;
>> +
>> + if (cpu_id >= nr_cpu_ids || !cpu_present(cpu_id))
>> + return -EINVAL;
>> +
>> + if (!cpumask_test_cpu(cpu_id, current->cpus_ptr))
>> + return -EINVAL;
>> +
>> + switch (ent->mem_type) {
>> + case VFIO_TPH_MEM_TYPE_VMEM:
>> + mem_type = TPH_MEM_TYPE_VM;
>> + break;
>> + case VFIO_TPH_MEM_TYPE_PMEM:
>> + mem_type = TPH_MEM_TYPE_PM;
>> + break;
>> + default:
>> + return -EINVAL;
>> + }
>> + ret = pcie_tph_get_cpu_st(vdev->pdev, mem_type,
>> topology_core_id(cpu_id),
>> + &st);
>> + if (ret)
>> + return ret;
>> + /*
>> + * PCI core enforces table bounds and disables TPH on error.
>> + */
>> + return pcie_tph_set_st_entry(vdev->pdev, ent->index, st);
>> +}
>> +
>> +static int vfio_pci_tph_enable(struct vfio_pci_core_device *vdev,
>> int mode)
>> +{
>> + /* IV mode is not supported. */
>> + if (mode == PCI_TPH_ST_IV_MODE)
>> + return -EINVAL;
>> + /* PCI core validates 'mode' and returns -EINVAL on bad values. */
>> + return pcie_enable_tph(vdev->pdev, mode);
>> +}
>> +
>> +static int vfio_pci_tph_disable(struct vfio_pci_core_device *vdev)
>> +{
>> + pcie_disable_tph(vdev->pdev);
>> + return 0;
>> +}
>> +
>> +static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev,
>> + void __user *uarg)
>> +{
>> + struct vfio_pci_tph tph;
>> +
>> + if (copy_from_user(&tph, uarg, sizeof(struct vfio_pci_tph)))
>> + return -EFAULT;
>> +
>> + if (tph.argsz != sizeof(struct vfio_pci_tph))
>> + return -EINVAL;
>> +
>> + switch (tph.op) {
>> + case VFIO_DEVICE_TPH_ENABLE:
>> + return vfio_pci_tph_enable(vdev, tph.mode);
>> + case VFIO_DEVICE_TPH_DISABLE:
>> + return vfio_pci_tph_disable(vdev);
>> + case VFIO_DEVICE_TPH_SET_ST:
>> + return vfio_pci_tph_set_st(vdev, &tph.ent);
>> + default:
>> + return -EINVAL;
>> + }
>> +}
>> +
>> long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned
>> int cmd,
>> unsigned long arg)
>> {
>> @@ -1467,6 +1539,8 @@ long vfio_pci_core_ioctl(struct vfio_device
>> *core_vdev, unsigned int cmd,
>> return vfio_pci_ioctl_reset(vdev, uarg);
>> case VFIO_DEVICE_SET_IRQS:
>> return vfio_pci_ioctl_set_irqs(vdev, uarg);
>> + case VFIO_DEVICE_PCI_TPH:
>> + return vfio_pci_ioctl_tph(vdev, uarg);
>> default:
>> return -ENOTTY;
>> }
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index 75100bf009ba..cfdee851031e 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -873,6 +873,42 @@ struct vfio_device_ioeventfd {
>> #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
>> +/**
>> + * VFIO_DEVICE_PCI_TPH - _IO(VFIO_TYPE, VFIO_BASE + 22)
>> + *
>> + * Control PCIe TLP Processing Hints (TPH) on a PCIe device.
>> + *
>> + * Supported operations:
>> + * - VFIO_DEVICE_TPH_ENABLE: enable TPH in no-steering-tag (NS) or
>> + * device-specific (DS) mode. IV mode is not supported via this ioctl
>> + * and returns -EINVAL.
>> + * - VFIO_DEVICE_TPH_DISABLE: disable TPH on the device.
>> + * - VFIO_DEVICE_TPH_SET_ST: program an entry in the device TPH
>> Steering-Tag
>> + * (ST) table. The kernel derives the ST from cpu_id and mem_type;
>> the
>> + * value is not returned to userspace.
>> + */
>> +struct vfio_pci_tph_entry {
>> + __u32 cpu_id; /* CPU logical ID */
>> + __u8 mem_type;
>> +#define VFIO_TPH_MEM_TYPE_VMEM 0 /* Request volatile memory
>> ST */
>> +#define VFIO_TPH_MEM_TYPE_PMEM 1 /* Request persistent
>> memory ST */
>> + __u8 rsvd[1];
>> + __u16 index; /* ST-table index */
>> +};
>> +
>> +struct vfio_pci_tph {
>> + __u32 argsz; /* Size of vfio_pci_tph */
>> + __u32 mode; /* NS and DS modes; IV not supported */
>> + __u32 op;
>> +#define VFIO_DEVICE_TPH_ENABLE 0
>> +#define VFIO_DEVICE_TPH_DISABLE 1
>> +#define VFIO_DEVICE_TPH_SET_ST 2
>> + struct vfio_pci_tph_entry ent;
>> +};
>> +
>> +#define VFIO_DEVICE_PCI_TPH _IO(VFIO_TYPE, VFIO_BASE + 22)
>
> A quick look at this, it seems its following the way the existing vfio
> IOCTls are defined, yet two of them (ENABLE and DISABLE) won't likely
> really change their structure, or don't need a structure in the case
> of disable. Why not use IOW() and let the kernel error handling deal
> with those two as independent ioctls?
>
>
> Thanks,
It will require two IOCTLs. I’m ok with having two IOCTLs for this
feature if the maintainers are fine with it.
Thanks,
>
>> +
>> +
>> /**
>> * VFIO_DEVICE_FEATURE - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
>> * struct vfio_device_feature)
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/1] vfio/pci: add PCIe TPH device ioctl
2025-10-27 14:33 ` Wathsala Vithanage
@ 2025-11-05 19:15 ` Alex Williamson
2025-11-06 23:19 ` Wathsala Vithanage
0 siblings, 1 reply; 8+ messages in thread
From: Alex Williamson @ 2025-11-05 19:15 UTC (permalink / raw)
To: Wathsala Vithanage
Cc: Jeremy Linton, alex.williamson, jgg, pstanner, kvm,
linux-kernel@vger.kernel.org
On Mon, 27 Oct 2025 09:33:33 -0500
Wathsala Vithanage <wathsala.vithanage@arm.com> wrote:
> On 10/16/25 16:41, Jeremy Linton wrote:
> > Hi,
> >
> > On 10/13/25 11:35 AM, Wathsala Vithanage wrote:
> >> TLP Processing Hints (TPH) let a requester provide steering hints that
> >> can enable direct cache injection on supported platforms and PCIe
> >> devices. The PCIe core already exposes TPH handling to kernel drivers.
> >>
> >> This change adds the VFIO_DEVICE_PCI_TPH ioctl and exposes TPH control
> >> to user space to reduce memory latency and improve throughput for
> >> polling drivers (e.g., DPDK poll-mode drivers). Through this interface,
> >> user-space drivers can:
> >> - enable or disable TPH for the device function
> >> - program steering tags in device-specific mode
> >>
> >> The ioctl is available only when the device advertises the TPH
> >> Capability. Invalid modes or tags are rejected. No functional change
> >> occurs unless the ioctl is used.
> >>
> >> Signed-off-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
> >> ---
> >> drivers/vfio/pci/vfio_pci_core.c | 74 ++++++++++++++++++++++++++++++++
> >> include/uapi/linux/vfio.h | 36 ++++++++++++++++
> >> 2 files changed, 110 insertions(+)
> >>
> >> diff --git a/drivers/vfio/pci/vfio_pci_core.c
> >> b/drivers/vfio/pci/vfio_pci_core.c
> >> index 7dcf5439dedc..0646d9a483fb 100644
> >> --- a/drivers/vfio/pci/vfio_pci_core.c
> >> +++ b/drivers/vfio/pci/vfio_pci_core.c
> >> @@ -28,6 +28,7 @@
> >> #include <linux/nospec.h>
> >> #include <linux/sched/mm.h>
> >> #include <linux/iommufd.h>
> >> +#include <linux/pci-tph.h>
> >> #if IS_ENABLED(CONFIG_EEH)
> >> #include <asm/eeh.h>
> >> #endif
> >> @@ -1443,6 +1444,77 @@ static int vfio_pci_ioctl_ioeventfd(struct
> >> vfio_pci_core_device *vdev,
> >> ioeventfd.fd);
> >> }
> >> +static int vfio_pci_tph_set_st(struct vfio_pci_core_device *vdev,
> >> + const struct vfio_pci_tph_entry *ent)
> >> +{
> >> + int ret, mem_type;
> >> + u16 st;
> >> + u32 cpu_id = ent->cpu_id;
> >> +
> >> + if (cpu_id >= nr_cpu_ids || !cpu_present(cpu_id))
> >> + return -EINVAL;
> >> +
> >> + if (!cpumask_test_cpu(cpu_id, current->cpus_ptr))
> >> + return -EINVAL;
> >> +
> >> + switch (ent->mem_type) {
> >> + case VFIO_TPH_MEM_TYPE_VMEM:
> >> + mem_type = TPH_MEM_TYPE_VM;
> >> + break;
> >> + case VFIO_TPH_MEM_TYPE_PMEM:
> >> + mem_type = TPH_MEM_TYPE_PM;
> >> + break;
> >> + default:
> >> + return -EINVAL;
> >> + }
> >> + ret = pcie_tph_get_cpu_st(vdev->pdev, mem_type,
> >> topology_core_id(cpu_id),
> >> + &st);
> >> + if (ret)
> >> + return ret;
> >> + /*
> >> + * PCI core enforces table bounds and disables TPH on error.
> >> + */
> >> + return pcie_tph_set_st_entry(vdev->pdev, ent->index, st);
> >> +}
> >> +
> >> +static int vfio_pci_tph_enable(struct vfio_pci_core_device *vdev,
> >> int mode)
> >> +{
> >> + /* IV mode is not supported. */
> >> + if (mode == PCI_TPH_ST_IV_MODE)
> >> + return -EINVAL;
> >> + /* PCI core validates 'mode' and returns -EINVAL on bad values. */
> >> + return pcie_enable_tph(vdev->pdev, mode);
> >> +}
> >> +
> >> +static int vfio_pci_tph_disable(struct vfio_pci_core_device *vdev)
> >> +{
> >> + pcie_disable_tph(vdev->pdev);
> >> + return 0;
> >> +}
> >> +
> >> +static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev,
> >> + void __user *uarg)
> >> +{
> >> + struct vfio_pci_tph tph;
> >> +
> >> + if (copy_from_user(&tph, uarg, sizeof(struct vfio_pci_tph)))
> >> + return -EFAULT;
> >> +
> >> + if (tph.argsz != sizeof(struct vfio_pci_tph))
> >> + return -EINVAL;
> >> +
> >> + switch (tph.op) {
> >> + case VFIO_DEVICE_TPH_ENABLE:
> >> + return vfio_pci_tph_enable(vdev, tph.mode);
> >> + case VFIO_DEVICE_TPH_DISABLE:
> >> + return vfio_pci_tph_disable(vdev);
> >> + case VFIO_DEVICE_TPH_SET_ST:
> >> + return vfio_pci_tph_set_st(vdev, &tph.ent);
> >> + default:
> >> + return -EINVAL;
> >> + }
> >> +}
> >> +
> >> long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned
> >> int cmd,
> >> unsigned long arg)
> >> {
> >> @@ -1467,6 +1539,8 @@ long vfio_pci_core_ioctl(struct vfio_device
> >> *core_vdev, unsigned int cmd,
> >> return vfio_pci_ioctl_reset(vdev, uarg);
> >> case VFIO_DEVICE_SET_IRQS:
> >> return vfio_pci_ioctl_set_irqs(vdev, uarg);
> >> + case VFIO_DEVICE_PCI_TPH:
> >> + return vfio_pci_ioctl_tph(vdev, uarg);
> >> default:
> >> return -ENOTTY;
> >> }
> >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> >> index 75100bf009ba..cfdee851031e 100644
> >> --- a/include/uapi/linux/vfio.h
> >> +++ b/include/uapi/linux/vfio.h
> >> @@ -873,6 +873,42 @@ struct vfio_device_ioeventfd {
> >> #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
> >> +/**
> >> + * VFIO_DEVICE_PCI_TPH - _IO(VFIO_TYPE, VFIO_BASE + 22)
> >> + *
> >> + * Control PCIe TLP Processing Hints (TPH) on a PCIe device.
> >> + *
> >> + * Supported operations:
> >> + * - VFIO_DEVICE_TPH_ENABLE: enable TPH in no-steering-tag (NS) or
> >> + * device-specific (DS) mode. IV mode is not supported via this ioctl
> >> + * and returns -EINVAL.
> >> + * - VFIO_DEVICE_TPH_DISABLE: disable TPH on the device.
> >> + * - VFIO_DEVICE_TPH_SET_ST: program an entry in the device TPH
> >> Steering-Tag
> >> + * (ST) table. The kernel derives the ST from cpu_id and mem_type;
> >> the
> >> + * value is not returned to userspace.
> >> + */
> >> +struct vfio_pci_tph_entry {
> >> + __u32 cpu_id; /* CPU logical ID */
> >> + __u8 mem_type;
> >> +#define VFIO_TPH_MEM_TYPE_VMEM 0 /* Request volatile memory
> >> ST */
> >> +#define VFIO_TPH_MEM_TYPE_PMEM 1 /* Request persistent
> >> memory ST */
> >> + __u8 rsvd[1];
> >> + __u16 index; /* ST-table index */
> >> +};
> >> +
> >> +struct vfio_pci_tph {
> >> + __u32 argsz; /* Size of vfio_pci_tph */
> >> + __u32 mode; /* NS and DS modes; IV not supported */
> >> + __u32 op;
> >> +#define VFIO_DEVICE_TPH_ENABLE 0
> >> +#define VFIO_DEVICE_TPH_DISABLE 1
> >> +#define VFIO_DEVICE_TPH_SET_ST 2
> >> + struct vfio_pci_tph_entry ent;
> >> +};
> >> +
> >> +#define VFIO_DEVICE_PCI_TPH _IO(VFIO_TYPE, VFIO_BASE + 22)
> >
> > A quick look at this, it seems its following the way the existing vfio
> > IOCTls are defined, yet two of them (ENABLE and DISABLE) won't likely
> > really change their structure, or don't need a structure in the case
> > of disable. Why not use IOW() and let the kernel error handling deal
> > with those two as independent ioctls?
> >
> >
> > Thanks,
>
>
> It will require two IOCTLs. I’m ok with having two IOCTLs for this
> feature if the maintainers are fine with it.
TBH, I'm not sure why we didn't use a DEVICE_FEATURE for this. Seems
like we could implement a SET operation that does enable/disable and
another for steering tags. I still need to fully grasp the
implications of this support though. Thanks,
Alex
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/1] vfio/pci: add PCIe TPH device ioctl
2025-11-05 19:15 ` Alex Williamson
@ 2025-11-06 23:19 ` Wathsala Vithanage
2026-01-29 14:06 ` Wathsala Vithanage
0 siblings, 1 reply; 8+ messages in thread
From: Wathsala Vithanage @ 2025-11-06 23:19 UTC (permalink / raw)
To: Alex Williamson
Cc: Jeremy Linton, alex.williamson, jgg, pstanner, kvm,
linux-kernel@vger.kernel.org
On 11/5/25 13:15, Alex Williamson wrote:
> On Mon, 27 Oct 2025 09:33:33 -0500
> Wathsala Vithanage <wathsala.vithanage@arm.com> wrote:
>
>> On 10/16/25 16:41, Jeremy Linton wrote:
>>> Hi,
>>>
>>> On 10/13/25 11:35 AM, Wathsala Vithanage wrote:
>>>> TLP Processing Hints (TPH) let a requester provide steering hints that
>>>> can enable direct cache injection on supported platforms and PCIe
>>>> devices. The PCIe core already exposes TPH handling to kernel drivers.
>>>>
>>>> This change adds the VFIO_DEVICE_PCI_TPH ioctl and exposes TPH control
>>>> to user space to reduce memory latency and improve throughput for
>>>> polling drivers (e.g., DPDK poll-mode drivers). Through this interface,
>>>> user-space drivers can:
>>>> - enable or disable TPH for the device function
>>>> - program steering tags in device-specific mode
>>>>
>>>> The ioctl is available only when the device advertises the TPH
>>>> Capability. Invalid modes or tags are rejected. No functional change
>>>> occurs unless the ioctl is used.
>>>>
>>>> Signed-off-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
>>>> ---
>>>> drivers/vfio/pci/vfio_pci_core.c | 74 ++++++++++++++++++++++++++++++++
>>>> include/uapi/linux/vfio.h | 36 ++++++++++++++++
>>>> 2 files changed, 110 insertions(+)
>>>>
>>>> diff --git a/drivers/vfio/pci/vfio_pci_core.c
>>>> b/drivers/vfio/pci/vfio_pci_core.c
>>>> index 7dcf5439dedc..0646d9a483fb 100644
>>>> --- a/drivers/vfio/pci/vfio_pci_core.c
>>>> +++ b/drivers/vfio/pci/vfio_pci_core.c
>>>> @@ -28,6 +28,7 @@
>>>> #include <linux/nospec.h>
>>>> #include <linux/sched/mm.h>
>>>> #include <linux/iommufd.h>
>>>> +#include <linux/pci-tph.h>
>>>> #if IS_ENABLED(CONFIG_EEH)
>>>> #include <asm/eeh.h>
>>>> #endif
>>>> @@ -1443,6 +1444,77 @@ static int vfio_pci_ioctl_ioeventfd(struct
>>>> vfio_pci_core_device *vdev,
>>>> ioeventfd.fd);
>>>> }
>>>> +static int vfio_pci_tph_set_st(struct vfio_pci_core_device *vdev,
>>>> + const struct vfio_pci_tph_entry *ent)
>>>> +{
>>>> + int ret, mem_type;
>>>> + u16 st;
>>>> + u32 cpu_id = ent->cpu_id;
>>>> +
>>>> + if (cpu_id >= nr_cpu_ids || !cpu_present(cpu_id))
>>>> + return -EINVAL;
>>>> +
>>>> + if (!cpumask_test_cpu(cpu_id, current->cpus_ptr))
>>>> + return -EINVAL;
>>>> +
>>>> + switch (ent->mem_type) {
>>>> + case VFIO_TPH_MEM_TYPE_VMEM:
>>>> + mem_type = TPH_MEM_TYPE_VM;
>>>> + break;
>>>> + case VFIO_TPH_MEM_TYPE_PMEM:
>>>> + mem_type = TPH_MEM_TYPE_PM;
>>>> + break;
>>>> + default:
>>>> + return -EINVAL;
>>>> + }
>>>> + ret = pcie_tph_get_cpu_st(vdev->pdev, mem_type,
>>>> topology_core_id(cpu_id),
>>>> + &st);
>>>> + if (ret)
>>>> + return ret;
>>>> + /*
>>>> + * PCI core enforces table bounds and disables TPH on error.
>>>> + */
>>>> + return pcie_tph_set_st_entry(vdev->pdev, ent->index, st);
>>>> +}
>>>> +
>>>> +static int vfio_pci_tph_enable(struct vfio_pci_core_device *vdev,
>>>> int mode)
>>>> +{
>>>> + /* IV mode is not supported. */
>>>> + if (mode == PCI_TPH_ST_IV_MODE)
>>>> + return -EINVAL;
>>>> + /* PCI core validates 'mode' and returns -EINVAL on bad values. */
>>>> + return pcie_enable_tph(vdev->pdev, mode);
>>>> +}
>>>> +
>>>> +static int vfio_pci_tph_disable(struct vfio_pci_core_device *vdev)
>>>> +{
>>>> + pcie_disable_tph(vdev->pdev);
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev,
>>>> + void __user *uarg)
>>>> +{
>>>> + struct vfio_pci_tph tph;
>>>> +
>>>> + if (copy_from_user(&tph, uarg, sizeof(struct vfio_pci_tph)))
>>>> + return -EFAULT;
>>>> +
>>>> + if (tph.argsz != sizeof(struct vfio_pci_tph))
>>>> + return -EINVAL;
>>>> +
>>>> + switch (tph.op) {
>>>> + case VFIO_DEVICE_TPH_ENABLE:
>>>> + return vfio_pci_tph_enable(vdev, tph.mode);
>>>> + case VFIO_DEVICE_TPH_DISABLE:
>>>> + return vfio_pci_tph_disable(vdev);
>>>> + case VFIO_DEVICE_TPH_SET_ST:
>>>> + return vfio_pci_tph_set_st(vdev, &tph.ent);
>>>> + default:
>>>> + return -EINVAL;
>>>> + }
>>>> +}
>>>> +
>>>> long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned
>>>> int cmd,
>>>> unsigned long arg)
>>>> {
>>>> @@ -1467,6 +1539,8 @@ long vfio_pci_core_ioctl(struct vfio_device
>>>> *core_vdev, unsigned int cmd,
>>>> return vfio_pci_ioctl_reset(vdev, uarg);
>>>> case VFIO_DEVICE_SET_IRQS:
>>>> return vfio_pci_ioctl_set_irqs(vdev, uarg);
>>>> + case VFIO_DEVICE_PCI_TPH:
>>>> + return vfio_pci_ioctl_tph(vdev, uarg);
>>>> default:
>>>> return -ENOTTY;
>>>> }
>>>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>>>> index 75100bf009ba..cfdee851031e 100644
>>>> --- a/include/uapi/linux/vfio.h
>>>> +++ b/include/uapi/linux/vfio.h
>>>> @@ -873,6 +873,42 @@ struct vfio_device_ioeventfd {
>>>> #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
>>>> +/**
>>>> + * VFIO_DEVICE_PCI_TPH - _IO(VFIO_TYPE, VFIO_BASE + 22)
>>>> + *
>>>> + * Control PCIe TLP Processing Hints (TPH) on a PCIe device.
>>>> + *
>>>> + * Supported operations:
>>>> + * - VFIO_DEVICE_TPH_ENABLE: enable TPH in no-steering-tag (NS) or
>>>> + * device-specific (DS) mode. IV mode is not supported via this ioctl
>>>> + * and returns -EINVAL.
>>>> + * - VFIO_DEVICE_TPH_DISABLE: disable TPH on the device.
>>>> + * - VFIO_DEVICE_TPH_SET_ST: program an entry in the device TPH
>>>> Steering-Tag
>>>> + * (ST) table. The kernel derives the ST from cpu_id and mem_type;
>>>> the
>>>> + * value is not returned to userspace.
>>>> + */
>>>> +struct vfio_pci_tph_entry {
>>>> + __u32 cpu_id; /* CPU logical ID */
>>>> + __u8 mem_type;
>>>> +#define VFIO_TPH_MEM_TYPE_VMEM 0 /* Request volatile memory
>>>> ST */
>>>> +#define VFIO_TPH_MEM_TYPE_PMEM 1 /* Request persistent
>>>> memory ST */
>>>> + __u8 rsvd[1];
>>>> + __u16 index; /* ST-table index */
>>>> +};
>>>> +
>>>> +struct vfio_pci_tph {
>>>> + __u32 argsz; /* Size of vfio_pci_tph */
>>>> + __u32 mode; /* NS and DS modes; IV not supported */
>>>> + __u32 op;
>>>> +#define VFIO_DEVICE_TPH_ENABLE 0
>>>> +#define VFIO_DEVICE_TPH_DISABLE 1
>>>> +#define VFIO_DEVICE_TPH_SET_ST 2
>>>> + struct vfio_pci_tph_entry ent;
>>>> +};
>>>> +
>>>> +#define VFIO_DEVICE_PCI_TPH _IO(VFIO_TYPE, VFIO_BASE + 22)
>>> A quick look at this, it seems its following the way the existing vfio
>>> IOCTls are defined, yet two of them (ENABLE and DISABLE) won't likely
>>> really change their structure, or don't need a structure in the case
>>> of disable. Why not use IOW() and let the kernel error handling deal
>>> with those two as independent ioctls?
>>>
>>>
>>> Thanks,
>>
>> It will require two IOCTLs. I’m ok with having two IOCTLs for this
>> feature if the maintainers are fine with it.
> TBH, I'm not sure why we didn't use a DEVICE_FEATURE for this. Seems
> like we could implement a SET operation that does enable/disable and
Thanks Alex, it was implemented as a DEVICE_FEATURE in RFC v1,
except it had a GET operation to get the tag to the user; which we
decided to drop.
> another for steering tags. I still need to fully grasp the
> implications of this support though. Thanks,
This is now same as the already merged RDMA TPH feature.
https://lore.kernel.org/linux-rdma/cover.1751907231.git.leon@kernel.org/
--wathsala
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/1] vfio/pci: add PCIe TPH device ioctl
2025-11-06 23:19 ` Wathsala Vithanage
@ 2026-01-29 14:06 ` Wathsala Vithanage
2026-03-11 3:58 ` fengchengwen
0 siblings, 1 reply; 8+ messages in thread
From: Wathsala Vithanage @ 2026-01-29 14:06 UTC (permalink / raw)
To: Alex Williamson
Cc: Jeremy Linton, alex.williamson, jgg, pstanner, kvm,
linux-kernel@vger.kernel.org
Hi Alex,
Just checking back on the VFIO PCI TPH patch below. You’d mentioned
wanting more time to evaluate the implications, so I wanted to see if
you had any remaining concerns or if you’d like me to rework this in a
different direction.
Thanks,
Wathsala
On 11/6/25 17:19, Wathsala Vithanage wrote:
>
> On 11/5/25 13:15, Alex Williamson wrote:
>> On Mon, 27 Oct 2025 09:33:33 -0500
>> Wathsala Vithanage <wathsala.vithanage@arm.com> wrote:
>>
>>> On 10/16/25 16:41, Jeremy Linton wrote:
>>>> Hi,
>>>>
>>>> On 10/13/25 11:35 AM, Wathsala Vithanage wrote:
>>>>> TLP Processing Hints (TPH) let a requester provide steering hints
>>>>> that
>>>>> can enable direct cache injection on supported platforms and PCIe
>>>>> devices. The PCIe core already exposes TPH handling to kernel
>>>>> drivers.
>>>>>
>>>>> This change adds the VFIO_DEVICE_PCI_TPH ioctl and exposes TPH
>>>>> control
>>>>> to user space to reduce memory latency and improve throughput for
>>>>> polling drivers (e.g., DPDK poll-mode drivers). Through this
>>>>> interface,
>>>>> user-space drivers can:
>>>>> - enable or disable TPH for the device function
>>>>> - program steering tags in device-specific mode
>>>>>
>>>>> The ioctl is available only when the device advertises the TPH
>>>>> Capability. Invalid modes or tags are rejected. No functional change
>>>>> occurs unless the ioctl is used.
>>>>>
>>>>> Signed-off-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
>>>>> ---
>>>>> drivers/vfio/pci/vfio_pci_core.c | 74
>>>>> ++++++++++++++++++++++++++++++++
>>>>> include/uapi/linux/vfio.h | 36 ++++++++++++++++
>>>>> 2 files changed, 110 insertions(+)
>>>>>
>>>>> diff --git a/drivers/vfio/pci/vfio_pci_core.c
>>>>> b/drivers/vfio/pci/vfio_pci_core.c
>>>>> index 7dcf5439dedc..0646d9a483fb 100644
>>>>> --- a/drivers/vfio/pci/vfio_pci_core.c
>>>>> +++ b/drivers/vfio/pci/vfio_pci_core.c
>>>>> @@ -28,6 +28,7 @@
>>>>> #include <linux/nospec.h>
>>>>> #include <linux/sched/mm.h>
>>>>> #include <linux/iommufd.h>
>>>>> +#include <linux/pci-tph.h>
>>>>> #if IS_ENABLED(CONFIG_EEH)
>>>>> #include <asm/eeh.h>
>>>>> #endif
>>>>> @@ -1443,6 +1444,77 @@ static int vfio_pci_ioctl_ioeventfd(struct
>>>>> vfio_pci_core_device *vdev,
>>>>> ioeventfd.fd);
>>>>> }
>>>>> +static int vfio_pci_tph_set_st(struct vfio_pci_core_device *vdev,
>>>>> + const struct vfio_pci_tph_entry *ent)
>>>>> +{
>>>>> + int ret, mem_type;
>>>>> + u16 st;
>>>>> + u32 cpu_id = ent->cpu_id;
>>>>> +
>>>>> + if (cpu_id >= nr_cpu_ids || !cpu_present(cpu_id))
>>>>> + return -EINVAL;
>>>>> +
>>>>> + if (!cpumask_test_cpu(cpu_id, current->cpus_ptr))
>>>>> + return -EINVAL;
>>>>> +
>>>>> + switch (ent->mem_type) {
>>>>> + case VFIO_TPH_MEM_TYPE_VMEM:
>>>>> + mem_type = TPH_MEM_TYPE_VM;
>>>>> + break;
>>>>> + case VFIO_TPH_MEM_TYPE_PMEM:
>>>>> + mem_type = TPH_MEM_TYPE_PM;
>>>>> + break;
>>>>> + default:
>>>>> + return -EINVAL;
>>>>> + }
>>>>> + ret = pcie_tph_get_cpu_st(vdev->pdev, mem_type,
>>>>> topology_core_id(cpu_id),
>>>>> + &st);
>>>>> + if (ret)
>>>>> + return ret;
>>>>> + /*
>>>>> + * PCI core enforces table bounds and disables TPH on error.
>>>>> + */
>>>>> + return pcie_tph_set_st_entry(vdev->pdev, ent->index, st);
>>>>> +}
>>>>> +
>>>>> +static int vfio_pci_tph_enable(struct vfio_pci_core_device *vdev,
>>>>> int mode)
>>>>> +{
>>>>> + /* IV mode is not supported. */
>>>>> + if (mode == PCI_TPH_ST_IV_MODE)
>>>>> + return -EINVAL;
>>>>> + /* PCI core validates 'mode' and returns -EINVAL on bad
>>>>> values. */
>>>>> + return pcie_enable_tph(vdev->pdev, mode);
>>>>> +}
>>>>> +
>>>>> +static int vfio_pci_tph_disable(struct vfio_pci_core_device *vdev)
>>>>> +{
>>>>> + pcie_disable_tph(vdev->pdev);
>>>>> + return 0;
>>>>> +}
>>>>> +
>>>>> +static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev,
>>>>> + void __user *uarg)
>>>>> +{
>>>>> + struct vfio_pci_tph tph;
>>>>> +
>>>>> + if (copy_from_user(&tph, uarg, sizeof(struct vfio_pci_tph)))
>>>>> + return -EFAULT;
>>>>> +
>>>>> + if (tph.argsz != sizeof(struct vfio_pci_tph))
>>>>> + return -EINVAL;
>>>>> +
>>>>> + switch (tph.op) {
>>>>> + case VFIO_DEVICE_TPH_ENABLE:
>>>>> + return vfio_pci_tph_enable(vdev, tph.mode);
>>>>> + case VFIO_DEVICE_TPH_DISABLE:
>>>>> + return vfio_pci_tph_disable(vdev);
>>>>> + case VFIO_DEVICE_TPH_SET_ST:
>>>>> + return vfio_pci_tph_set_st(vdev, &tph.ent);
>>>>> + default:
>>>>> + return -EINVAL;
>>>>> + }
>>>>> +}
>>>>> +
>>>>> long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned
>>>>> int cmd,
>>>>> unsigned long arg)
>>>>> {
>>>>> @@ -1467,6 +1539,8 @@ long vfio_pci_core_ioctl(struct vfio_device
>>>>> *core_vdev, unsigned int cmd,
>>>>> return vfio_pci_ioctl_reset(vdev, uarg);
>>>>> case VFIO_DEVICE_SET_IRQS:
>>>>> return vfio_pci_ioctl_set_irqs(vdev, uarg);
>>>>> + case VFIO_DEVICE_PCI_TPH:
>>>>> + return vfio_pci_ioctl_tph(vdev, uarg);
>>>>> default:
>>>>> return -ENOTTY;
>>>>> }
>>>>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>>>>> index 75100bf009ba..cfdee851031e 100644
>>>>> --- a/include/uapi/linux/vfio.h
>>>>> +++ b/include/uapi/linux/vfio.h
>>>>> @@ -873,6 +873,42 @@ struct vfio_device_ioeventfd {
>>>>> #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE
>>>>> + 16)
>>>>> +/**
>>>>> + * VFIO_DEVICE_PCI_TPH - _IO(VFIO_TYPE, VFIO_BASE + 22)
>>>>> + *
>>>>> + * Control PCIe TLP Processing Hints (TPH) on a PCIe device.
>>>>> + *
>>>>> + * Supported operations:
>>>>> + * - VFIO_DEVICE_TPH_ENABLE: enable TPH in no-steering-tag (NS) or
>>>>> + * device-specific (DS) mode. IV mode is not supported via this
>>>>> ioctl
>>>>> + * and returns -EINVAL.
>>>>> + * - VFIO_DEVICE_TPH_DISABLE: disable TPH on the device.
>>>>> + * - VFIO_DEVICE_TPH_SET_ST: program an entry in the device TPH
>>>>> Steering-Tag
>>>>> + * (ST) table. The kernel derives the ST from cpu_id and mem_type;
>>>>> the
>>>>> + * value is not returned to userspace.
>>>>> + */
>>>>> +struct vfio_pci_tph_entry {
>>>>> + __u32 cpu_id; /* CPU logical ID */
>>>>> + __u8 mem_type;
>>>>> +#define VFIO_TPH_MEM_TYPE_VMEM 0 /* Request volatile memory
>>>>> ST */
>>>>> +#define VFIO_TPH_MEM_TYPE_PMEM 1 /* Request persistent
>>>>> memory ST */
>>>>> + __u8 rsvd[1];
>>>>> + __u16 index; /* ST-table index */
>>>>> +};
>>>>> +
>>>>> +struct vfio_pci_tph {
>>>>> + __u32 argsz; /* Size of vfio_pci_tph */
>>>>> + __u32 mode; /* NS and DS modes; IV not supported */
>>>>> + __u32 op;
>>>>> +#define VFIO_DEVICE_TPH_ENABLE 0
>>>>> +#define VFIO_DEVICE_TPH_DISABLE 1
>>>>> +#define VFIO_DEVICE_TPH_SET_ST 2
>>>>> + struct vfio_pci_tph_entry ent;
>>>>> +};
>>>>> +
>>>>> +#define VFIO_DEVICE_PCI_TPH _IO(VFIO_TYPE, VFIO_BASE + 22)
>>>> A quick look at this, it seems its following the way the existing vfio
>>>> IOCTls are defined, yet two of them (ENABLE and DISABLE) won't likely
>>>> really change their structure, or don't need a structure in the case
>>>> of disable. Why not use IOW() and let the kernel error handling deal
>>>> with those two as independent ioctls?
>>>>
>>>>
>>>> Thanks,
>>>
>>> It will require two IOCTLs. I’m ok with having two IOCTLs for this
>>> feature if the maintainers are fine with it.
>> TBH, I'm not sure why we didn't use a DEVICE_FEATURE for this. Seems
>> like we could implement a SET operation that does enable/disable and
>
> Thanks Alex, it was implemented as a DEVICE_FEATURE in RFC v1,
> except it had a GET operation to get the tag to the user; which we
> decided to drop.
>
>> another for steering tags. I still need to fully grasp the
>> implications of this support though. Thanks,
> This is now same as the already merged RDMA TPH feature.
> https://lore.kernel.org/linux-rdma/cover.1751907231.git.leon@kernel.org/
>
> --wathsala
>
>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/1] vfio/pci: add PCIe TPH device ioctl
2026-01-29 14:06 ` Wathsala Vithanage
@ 2026-03-11 3:58 ` fengchengwen
2026-04-14 17:00 ` Wathsala Vithanage
0 siblings, 1 reply; 8+ messages in thread
From: fengchengwen @ 2026-03-11 3:58 UTC (permalink / raw)
To: Wathsala Vithanage, Alex Williamson
Cc: Jeremy Linton, alex.williamson, jgg, pstanner, kvm,
linux-kernel@vger.kernel.org
We also hope to support this TPH feature in the Kunpeng (ARM server) user-space, and hope that this patch can continue.
@Wathsala, I don't have the previous email, could you please send v2 again? I reviewed v2 [1] and found a few issues that I'd like to discuss.
[1] https://lkml.org/lkml/2025/6/2/1116
Thanks
On 1/29/2026 10:06 PM, Wathsala Vithanage wrote:
> Hi Alex,
>
> Just checking back on the VFIO PCI TPH patch below. You’d mentioned wanting more time to evaluate the implications, so I wanted to see if you had any remaining concerns or if you’d like me to rework this in a different direction.
>
> Thanks,
> Wathsala
>
> On 11/6/25 17:19, Wathsala Vithanage wrote:
>>
>> On 11/5/25 13:15, Alex Williamson wrote:
>>> On Mon, 27 Oct 2025 09:33:33 -0500
>>> Wathsala Vithanage <wathsala.vithanage@arm.com> wrote:
>>>
>>>> On 10/16/25 16:41, Jeremy Linton wrote:
>>>>> Hi,
>>>>>
>>>>> On 10/13/25 11:35 AM, Wathsala Vithanage wrote:
>>>>>> TLP Processing Hints (TPH) let a requester provide steering hints that
>>>>>> can enable direct cache injection on supported platforms and PCIe
>>>>>> devices. The PCIe core already exposes TPH handling to kernel drivers.
>>>>>>
>>>>>> This change adds the VFIO_DEVICE_PCI_TPH ioctl and exposes TPH control
>>>>>> to user space to reduce memory latency and improve throughput for
>>>>>> polling drivers (e.g., DPDK poll-mode drivers). Through this interface,
>>>>>> user-space drivers can:
>>>>>> - enable or disable TPH for the device function
>>>>>> - program steering tags in device-specific mode
>>>>>>
>>>>>> The ioctl is available only when the device advertises the TPH
>>>>>> Capability. Invalid modes or tags are rejected. No functional change
>>>>>> occurs unless the ioctl is used.
>>>>>>
>>>>>> Signed-off-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
>>>>>> ---
>>>>>> drivers/vfio/pci/vfio_pci_core.c | 74 ++++++++++++++++++++++++++++++++
>>>>>> include/uapi/linux/vfio.h | 36 ++++++++++++++++
>>>>>> 2 files changed, 110 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/vfio/pci/vfio_pci_core.c
>>>>>> b/drivers/vfio/pci/vfio_pci_core.c
>>>>>> index 7dcf5439dedc..0646d9a483fb 100644
>>>>>> --- a/drivers/vfio/pci/vfio_pci_core.c
>>>>>> +++ b/drivers/vfio/pci/vfio_pci_core.c
>>>>>> @@ -28,6 +28,7 @@
>>>>>> #include <linux/nospec.h>
>>>>>> #include <linux/sched/mm.h>
>>>>>> #include <linux/iommufd.h>
>>>>>> +#include <linux/pci-tph.h>
>>>>>> #if IS_ENABLED(CONFIG_EEH)
>>>>>> #include <asm/eeh.h>
>>>>>> #endif
>>>>>> @@ -1443,6 +1444,77 @@ static int vfio_pci_ioctl_ioeventfd(struct
>>>>>> vfio_pci_core_device *vdev,
>>>>>> ioeventfd.fd);
>>>>>> }
>>>>>> +static int vfio_pci_tph_set_st(struct vfio_pci_core_device *vdev,
>>>>>> + const struct vfio_pci_tph_entry *ent)
>>>>>> +{
>>>>>> + int ret, mem_type;
>>>>>> + u16 st;
>>>>>> + u32 cpu_id = ent->cpu_id;
>>>>>> +
>>>>>> + if (cpu_id >= nr_cpu_ids || !cpu_present(cpu_id))
>>>>>> + return -EINVAL;
>>>>>> +
>>>>>> + if (!cpumask_test_cpu(cpu_id, current->cpus_ptr))
>>>>>> + return -EINVAL;
>>>>>> +
>>>>>> + switch (ent->mem_type) {
>>>>>> + case VFIO_TPH_MEM_TYPE_VMEM:
>>>>>> + mem_type = TPH_MEM_TYPE_VM;
>>>>>> + break;
>>>>>> + case VFIO_TPH_MEM_TYPE_PMEM:
>>>>>> + mem_type = TPH_MEM_TYPE_PM;
>>>>>> + break;
>>>>>> + default:
>>>>>> + return -EINVAL;
>>>>>> + }
>>>>>> + ret = pcie_tph_get_cpu_st(vdev->pdev, mem_type,
>>>>>> topology_core_id(cpu_id),
>>>>>> + &st);
>>>>>> + if (ret)
>>>>>> + return ret;
>>>>>> + /*
>>>>>> + * PCI core enforces table bounds and disables TPH on error.
>>>>>> + */
>>>>>> + return pcie_tph_set_st_entry(vdev->pdev, ent->index, st);
>>>>>> +}
>>>>>> +
>>>>>> +static int vfio_pci_tph_enable(struct vfio_pci_core_device *vdev,
>>>>>> int mode)
>>>>>> +{
>>>>>> + /* IV mode is not supported. */
>>>>>> + if (mode == PCI_TPH_ST_IV_MODE)
>>>>>> + return -EINVAL;
>>>>>> + /* PCI core validates 'mode' and returns -EINVAL on bad values. */
>>>>>> + return pcie_enable_tph(vdev->pdev, mode);
>>>>>> +}
>>>>>> +
>>>>>> +static int vfio_pci_tph_disable(struct vfio_pci_core_device *vdev)
>>>>>> +{
>>>>>> + pcie_disable_tph(vdev->pdev);
>>>>>> + return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev,
>>>>>> + void __user *uarg)
>>>>>> +{
>>>>>> + struct vfio_pci_tph tph;
>>>>>> +
>>>>>> + if (copy_from_user(&tph, uarg, sizeof(struct vfio_pci_tph)))
>>>>>> + return -EFAULT;
>>>>>> +
>>>>>> + if (tph.argsz != sizeof(struct vfio_pci_tph))
>>>>>> + return -EINVAL;
>>>>>> +
>>>>>> + switch (tph.op) {
>>>>>> + case VFIO_DEVICE_TPH_ENABLE:
>>>>>> + return vfio_pci_tph_enable(vdev, tph.mode);
>>>>>> + case VFIO_DEVICE_TPH_DISABLE:
>>>>>> + return vfio_pci_tph_disable(vdev);
>>>>>> + case VFIO_DEVICE_TPH_SET_ST:
>>>>>> + return vfio_pci_tph_set_st(vdev, &tph.ent);
>>>>>> + default:
>>>>>> + return -EINVAL;
>>>>>> + }
>>>>>> +}
>>>>>> +
>>>>>> long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned
>>>>>> int cmd,
>>>>>> unsigned long arg)
>>>>>> {
>>>>>> @@ -1467,6 +1539,8 @@ long vfio_pci_core_ioctl(struct vfio_device
>>>>>> *core_vdev, unsigned int cmd,
>>>>>> return vfio_pci_ioctl_reset(vdev, uarg);
>>>>>> case VFIO_DEVICE_SET_IRQS:
>>>>>> return vfio_pci_ioctl_set_irqs(vdev, uarg);
>>>>>> + case VFIO_DEVICE_PCI_TPH:
>>>>>> + return vfio_pci_ioctl_tph(vdev, uarg);
>>>>>> default:
>>>>>> return -ENOTTY;
>>>>>> }
>>>>>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>>>>>> index 75100bf009ba..cfdee851031e 100644
>>>>>> --- a/include/uapi/linux/vfio.h
>>>>>> +++ b/include/uapi/linux/vfio.h
>>>>>> @@ -873,6 +873,42 @@ struct vfio_device_ioeventfd {
>>>>>> #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
>>>>>> +/**
>>>>>> + * VFIO_DEVICE_PCI_TPH - _IO(VFIO_TYPE, VFIO_BASE + 22)
>>>>>> + *
>>>>>> + * Control PCIe TLP Processing Hints (TPH) on a PCIe device.
>>>>>> + *
>>>>>> + * Supported operations:
>>>>>> + * - VFIO_DEVICE_TPH_ENABLE: enable TPH in no-steering-tag (NS) or
>>>>>> + * device-specific (DS) mode. IV mode is not supported via this ioctl
>>>>>> + * and returns -EINVAL.
>>>>>> + * - VFIO_DEVICE_TPH_DISABLE: disable TPH on the device.
>>>>>> + * - VFIO_DEVICE_TPH_SET_ST: program an entry in the device TPH
>>>>>> Steering-Tag
>>>>>> + * (ST) table. The kernel derives the ST from cpu_id and mem_type;
>>>>>> the
>>>>>> + * value is not returned to userspace.
>>>>>> + */
>>>>>> +struct vfio_pci_tph_entry {
>>>>>> + __u32 cpu_id; /* CPU logical ID */
>>>>>> + __u8 mem_type;
>>>>>> +#define VFIO_TPH_MEM_TYPE_VMEM 0 /* Request volatile memory
>>>>>> ST */
>>>>>> +#define VFIO_TPH_MEM_TYPE_PMEM 1 /* Request persistent
>>>>>> memory ST */
>>>>>> + __u8 rsvd[1];
>>>>>> + __u16 index; /* ST-table index */
>>>>>> +};
>>>>>> +
>>>>>> +struct vfio_pci_tph {
>>>>>> + __u32 argsz; /* Size of vfio_pci_tph */
>>>>>> + __u32 mode; /* NS and DS modes; IV not supported */
>>>>>> + __u32 op;
>>>>>> +#define VFIO_DEVICE_TPH_ENABLE 0
>>>>>> +#define VFIO_DEVICE_TPH_DISABLE 1
>>>>>> +#define VFIO_DEVICE_TPH_SET_ST 2
>>>>>> + struct vfio_pci_tph_entry ent;
>>>>>> +};
>>>>>> +
>>>>>> +#define VFIO_DEVICE_PCI_TPH _IO(VFIO_TYPE, VFIO_BASE + 22)
>>>>> A quick look at this, it seems its following the way the existing vfio
>>>>> IOCTls are defined, yet two of them (ENABLE and DISABLE) won't likely
>>>>> really change their structure, or don't need a structure in the case
>>>>> of disable. Why not use IOW() and let the kernel error handling deal
>>>>> with those two as independent ioctls?
>>>>>
>>>>>
>>>>> Thanks,
>>>>
>>>> It will require two IOCTLs. I’m ok with having two IOCTLs for this
>>>> feature if the maintainers are fine with it.
>>> TBH, I'm not sure why we didn't use a DEVICE_FEATURE for this. Seems
>>> like we could implement a SET operation that does enable/disable and
>>
>> Thanks Alex, it was implemented as a DEVICE_FEATURE in RFC v1,
>> except it had a GET operation to get the tag to the user; which we
>> decided to drop.
>>
>>> another for steering tags. I still need to fully grasp the
>>> implications of this support though. Thanks,
>> This is now same as the already merged RDMA TPH feature.
>> https://lore.kernel.org/linux-rdma/cover.1751907231.git.leon@kernel.org/
>>
>> --wathsala
>>
>>
>>
>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/1] vfio/pci: add PCIe TPH device ioctl
2026-03-11 3:58 ` fengchengwen
@ 2026-04-14 17:00 ` Wathsala Vithanage
0 siblings, 0 replies; 8+ messages in thread
From: Wathsala Vithanage @ 2026-04-14 17:00 UTC (permalink / raw)
To: fengchengwen, Alex Williamson
Cc: Jeremy Linton, alex.williamson, jgg, pstanner, kvm,
linux-kernel@vger.kernel.org
Hi Feng,
> We also hope to support this TPH feature in the Kunpeng (ARM server) user-space, and hope that this patch can continue.
>
> @Wathsala, I don't have the previous email, could you please send v2 again? I reviewed v2 [1] and found a few issues that I'd like to discuss.
>
> [1] https://lkml.org/lkml/2025/6/2/1116
This is the last patch I sent here. This is a more simplified version
of the previous RFCs (which I see you have commented on)
However, I haven't heard form Alex, since my last follow up.
Happy to discuss if you are still interested.
--wathsala
>
> Thanks
>
> On 1/29/2026 10:06 PM, Wathsala Vithanage wrote:
>> Hi Alex,
>>
>> Just checking back on the VFIO PCI TPH patch below. You’d mentioned wanting more time to evaluate the implications, so I wanted to see if you had any remaining concerns or if you’d like me to rework this in a different direction.
>>
>> Thanks,
>> Wathsala
>>
>> On 11/6/25 17:19, Wathsala Vithanage wrote:
>>> On 11/5/25 13:15, Alex Williamson wrote:
>>>> On Mon, 27 Oct 2025 09:33:33 -0500
>>>> Wathsala Vithanage <wathsala.vithanage@arm.com> wrote:
>>>>
>>>>> On 10/16/25 16:41, Jeremy Linton wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 10/13/25 11:35 AM, Wathsala Vithanage wrote:
>>>>>>> TLP Processing Hints (TPH) let a requester provide steering hints that
>>>>>>> can enable direct cache injection on supported platforms and PCIe
>>>>>>> devices. The PCIe core already exposes TPH handling to kernel drivers.
>>>>>>>
>>>>>>> This change adds the VFIO_DEVICE_PCI_TPH ioctl and exposes TPH control
>>>>>>> to user space to reduce memory latency and improve throughput for
>>>>>>> polling drivers (e.g., DPDK poll-mode drivers). Through this interface,
>>>>>>> user-space drivers can:
>>>>>>> - enable or disable TPH for the device function
>>>>>>> - program steering tags in device-specific mode
>>>>>>>
>>>>>>> The ioctl is available only when the device advertises the TPH
>>>>>>> Capability. Invalid modes or tags are rejected. No functional change
>>>>>>> occurs unless the ioctl is used.
>>>>>>>
>>>>>>> Signed-off-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
>>>>>>> ---
>>>>>>> drivers/vfio/pci/vfio_pci_core.c | 74 ++++++++++++++++++++++++++++++++
>>>>>>> include/uapi/linux/vfio.h | 36 ++++++++++++++++
>>>>>>> 2 files changed, 110 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/vfio/pci/vfio_pci_core.c
>>>>>>> b/drivers/vfio/pci/vfio_pci_core.c
>>>>>>> index 7dcf5439dedc..0646d9a483fb 100644
>>>>>>> --- a/drivers/vfio/pci/vfio_pci_core.c
>>>>>>> +++ b/drivers/vfio/pci/vfio_pci_core.c
>>>>>>> @@ -28,6 +28,7 @@
>>>>>>> #include <linux/nospec.h>
>>>>>>> #include <linux/sched/mm.h>
>>>>>>> #include <linux/iommufd.h>
>>>>>>> +#include <linux/pci-tph.h>
>>>>>>> #if IS_ENABLED(CONFIG_EEH)
>>>>>>> #include <asm/eeh.h>
>>>>>>> #endif
>>>>>>> @@ -1443,6 +1444,77 @@ static int vfio_pci_ioctl_ioeventfd(struct
>>>>>>> vfio_pci_core_device *vdev,
>>>>>>> ioeventfd.fd);
>>>>>>> }
>>>>>>> +static int vfio_pci_tph_set_st(struct vfio_pci_core_device *vdev,
>>>>>>> + const struct vfio_pci_tph_entry *ent)
>>>>>>> +{
>>>>>>> + int ret, mem_type;
>>>>>>> + u16 st;
>>>>>>> + u32 cpu_id = ent->cpu_id;
>>>>>>> +
>>>>>>> + if (cpu_id >= nr_cpu_ids || !cpu_present(cpu_id))
>>>>>>> + return -EINVAL;
>>>>>>> +
>>>>>>> + if (!cpumask_test_cpu(cpu_id, current->cpus_ptr))
>>>>>>> + return -EINVAL;
>>>>>>> +
>>>>>>> + switch (ent->mem_type) {
>>>>>>> + case VFIO_TPH_MEM_TYPE_VMEM:
>>>>>>> + mem_type = TPH_MEM_TYPE_VM;
>>>>>>> + break;
>>>>>>> + case VFIO_TPH_MEM_TYPE_PMEM:
>>>>>>> + mem_type = TPH_MEM_TYPE_PM;
>>>>>>> + break;
>>>>>>> + default:
>>>>>>> + return -EINVAL;
>>>>>>> + }
>>>>>>> + ret = pcie_tph_get_cpu_st(vdev->pdev, mem_type,
>>>>>>> topology_core_id(cpu_id),
>>>>>>> + &st);
>>>>>>> + if (ret)
>>>>>>> + return ret;
>>>>>>> + /*
>>>>>>> + * PCI core enforces table bounds and disables TPH on error.
>>>>>>> + */
>>>>>>> + return pcie_tph_set_st_entry(vdev->pdev, ent->index, st);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int vfio_pci_tph_enable(struct vfio_pci_core_device *vdev,
>>>>>>> int mode)
>>>>>>> +{
>>>>>>> + /* IV mode is not supported. */
>>>>>>> + if (mode == PCI_TPH_ST_IV_MODE)
>>>>>>> + return -EINVAL;
>>>>>>> + /* PCI core validates 'mode' and returns -EINVAL on bad values. */
>>>>>>> + return pcie_enable_tph(vdev->pdev, mode);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int vfio_pci_tph_disable(struct vfio_pci_core_device *vdev)
>>>>>>> +{
>>>>>>> + pcie_disable_tph(vdev->pdev);
>>>>>>> + return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev,
>>>>>>> + void __user *uarg)
>>>>>>> +{
>>>>>>> + struct vfio_pci_tph tph;
>>>>>>> +
>>>>>>> + if (copy_from_user(&tph, uarg, sizeof(struct vfio_pci_tph)))
>>>>>>> + return -EFAULT;
>>>>>>> +
>>>>>>> + if (tph.argsz != sizeof(struct vfio_pci_tph))
>>>>>>> + return -EINVAL;
>>>>>>> +
>>>>>>> + switch (tph.op) {
>>>>>>> + case VFIO_DEVICE_TPH_ENABLE:
>>>>>>> + return vfio_pci_tph_enable(vdev, tph.mode);
>>>>>>> + case VFIO_DEVICE_TPH_DISABLE:
>>>>>>> + return vfio_pci_tph_disable(vdev);
>>>>>>> + case VFIO_DEVICE_TPH_SET_ST:
>>>>>>> + return vfio_pci_tph_set_st(vdev, &tph.ent);
>>>>>>> + default:
>>>>>>> + return -EINVAL;
>>>>>>> + }
>>>>>>> +}
>>>>>>> +
>>>>>>> long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned
>>>>>>> int cmd,
>>>>>>> unsigned long arg)
>>>>>>> {
>>>>>>> @@ -1467,6 +1539,8 @@ long vfio_pci_core_ioctl(struct vfio_device
>>>>>>> *core_vdev, unsigned int cmd,
>>>>>>> return vfio_pci_ioctl_reset(vdev, uarg);
>>>>>>> case VFIO_DEVICE_SET_IRQS:
>>>>>>> return vfio_pci_ioctl_set_irqs(vdev, uarg);
>>>>>>> + case VFIO_DEVICE_PCI_TPH:
>>>>>>> + return vfio_pci_ioctl_tph(vdev, uarg);
>>>>>>> default:
>>>>>>> return -ENOTTY;
>>>>>>> }
>>>>>>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>>>>>>> index 75100bf009ba..cfdee851031e 100644
>>>>>>> --- a/include/uapi/linux/vfio.h
>>>>>>> +++ b/include/uapi/linux/vfio.h
>>>>>>> @@ -873,6 +873,42 @@ struct vfio_device_ioeventfd {
>>>>>>> #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
>>>>>>> +/**
>>>>>>> + * VFIO_DEVICE_PCI_TPH - _IO(VFIO_TYPE, VFIO_BASE + 22)
>>>>>>> + *
>>>>>>> + * Control PCIe TLP Processing Hints (TPH) on a PCIe device.
>>>>>>> + *
>>>>>>> + * Supported operations:
>>>>>>> + * - VFIO_DEVICE_TPH_ENABLE: enable TPH in no-steering-tag (NS) or
>>>>>>> + * device-specific (DS) mode. IV mode is not supported via this ioctl
>>>>>>> + * and returns -EINVAL.
>>>>>>> + * - VFIO_DEVICE_TPH_DISABLE: disable TPH on the device.
>>>>>>> + * - VFIO_DEVICE_TPH_SET_ST: program an entry in the device TPH
>>>>>>> Steering-Tag
>>>>>>> + * (ST) table. The kernel derives the ST from cpu_id and mem_type;
>>>>>>> the
>>>>>>> + * value is not returned to userspace.
>>>>>>> + */
>>>>>>> +struct vfio_pci_tph_entry {
>>>>>>> + __u32 cpu_id; /* CPU logical ID */
>>>>>>> + __u8 mem_type;
>>>>>>> +#define VFIO_TPH_MEM_TYPE_VMEM 0 /* Request volatile memory
>>>>>>> ST */
>>>>>>> +#define VFIO_TPH_MEM_TYPE_PMEM 1 /* Request persistent
>>>>>>> memory ST */
>>>>>>> + __u8 rsvd[1];
>>>>>>> + __u16 index; /* ST-table index */
>>>>>>> +};
>>>>>>> +
>>>>>>> +struct vfio_pci_tph {
>>>>>>> + __u32 argsz; /* Size of vfio_pci_tph */
>>>>>>> + __u32 mode; /* NS and DS modes; IV not supported */
>>>>>>> + __u32 op;
>>>>>>> +#define VFIO_DEVICE_TPH_ENABLE 0
>>>>>>> +#define VFIO_DEVICE_TPH_DISABLE 1
>>>>>>> +#define VFIO_DEVICE_TPH_SET_ST 2
>>>>>>> + struct vfio_pci_tph_entry ent;
>>>>>>> +};
>>>>>>> +
>>>>>>> +#define VFIO_DEVICE_PCI_TPH _IO(VFIO_TYPE, VFIO_BASE + 22)
>>>>>> A quick look at this, it seems its following the way the existing vfio
>>>>>> IOCTls are defined, yet two of them (ENABLE and DISABLE) won't likely
>>>>>> really change their structure, or don't need a structure in the case
>>>>>> of disable. Why not use IOW() and let the kernel error handling deal
>>>>>> with those two as independent ioctls?
>>>>>>
>>>>>>
>>>>>> Thanks,
>>>>> It will require two IOCTLs. I’m ok with having two IOCTLs for this
>>>>> feature if the maintainers are fine with it.
>>>> TBH, I'm not sure why we didn't use a DEVICE_FEATURE for this. Seems
>>>> like we could implement a SET operation that does enable/disable and
>>> Thanks Alex, it was implemented as a DEVICE_FEATURE in RFC v1,
>>> except it had a GET operation to get the tag to the user; which we
>>> decided to drop.
>>>
>>>> another for steering tags. I still need to fully grasp the
>>>> implications of this support though. Thanks,
>>> This is now same as the already merged RDMA TPH feature.
>>> https://lore.kernel.org/linux-rdma/cover.1751907231.git.leon@kernel.org/
>>>
>>> --wathsala
>>>
>>>
>>>
>>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2026-04-14 17:00 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-13 16:35 [PATCH 1/1] vfio/pci: add PCIe TPH device ioctl Wathsala Vithanage
2025-10-16 21:41 ` Jeremy Linton
2025-10-27 14:33 ` Wathsala Vithanage
2025-11-05 19:15 ` Alex Williamson
2025-11-06 23:19 ` Wathsala Vithanage
2026-01-29 14:06 ` Wathsala Vithanage
2026-03-11 3:58 ` fengchengwen
2026-04-14 17:00 ` Wathsala Vithanage
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox