* [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices
2026-03-06 23:41 [PATCH v3 0/3] Allow ATS to be always on for certain ATS-capable devices Nicolin Chen
@ 2026-03-06 23:41 ` Nicolin Chen
2026-03-08 20:49 ` Nirmoy Das
` (2 more replies)
2026-03-06 23:41 ` [PATCH v3 2/3] PCI: Allow ATS to be always on for pre-CXL devices Nicolin Chen
2026-03-06 23:41 ` [PATCH v3 3/3] iommu/arm-smmu-v3: Allow ATS to be always on Nicolin Chen
2 siblings, 3 replies; 13+ messages in thread
From: Nicolin Chen @ 2026-03-06 23:41 UTC (permalink / raw)
To: jgg, will, robin.murphy, bhelgaas
Cc: joro, praan, baolu.lu, kevin.tian, miko.lenczewski,
linux-arm-kernel, iommu, linux-kernel, linux-pci, dan.j.williams,
jonathan.cameron, vsethi, linux-cxl
Controlled by the IOMMU driver, ATS is usually enabled "on demand" when a
device requests a translation service from its associated IOMMU HW running
on the channel of a given PASID. This is working even when a device has no
translation on its RID (i.e., the RID is IOMMU bypassed).
However, certain PCIe devices require non-PASID ATS on their RID even when
the RID is IOMMU bypassed. Call this "always on".
For instance, the CXL spec notes in "3.2.5.13 Memory Type on CXL.cache":
"To source requests on CXL.cache, devices need to get the Host Physical
Address (HPA) from the Host by means of an ATS request on CXL.io."
In other words, the CXL.cache capability requires ATS; otherwise, it can't
access host physical memory.
Introduce a new pci_ats_always_on() helper for the IOMMU driver to scan a
PCI device and shift ATS policies between "on demand" and "always on".
Add the support for CXL.cache devices first. Pre-CXL devices will be added
in quirks.c file.
Note that pci_ats_always_on() validates against pci_ats_supported(), so we
ensure that untrusted devices (e.g. external ports) will not be always on.
This maintains the existing ATS security policy regarding potential side-
channel attacks via ATS.
Cc: linux-cxl@vger.kernel.org
Suggested-by: Vikram Sethi <vsethi@nvidia.com>
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
include/linux/pci-ats.h | 3 +++
include/uapi/linux/pci_regs.h | 1 +
drivers/pci/ats.c | 42 +++++++++++++++++++++++++++++++++++
3 files changed, 46 insertions(+)
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 75c6c86cf09dc..d14ba727d38b3 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -12,6 +12,7 @@ int pci_prepare_ats(struct pci_dev *dev, int ps);
void pci_disable_ats(struct pci_dev *dev);
int pci_ats_queue_depth(struct pci_dev *dev);
int pci_ats_page_aligned(struct pci_dev *dev);
+bool pci_ats_always_on(struct pci_dev *dev);
#else /* CONFIG_PCI_ATS */
static inline bool pci_ats_supported(struct pci_dev *d)
{ return false; }
@@ -24,6 +25,8 @@ static inline int pci_ats_queue_depth(struct pci_dev *d)
{ return -ENODEV; }
static inline int pci_ats_page_aligned(struct pci_dev *dev)
{ return 0; }
+static inline bool pci_ats_always_on(struct pci_dev *dev)
+{ return false; }
#endif /* CONFIG_PCI_ATS */
#ifdef CONFIG_PCI_PRI
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 14f634ab9350d..6ac45be1008b8 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1349,6 +1349,7 @@
/* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */
#define PCI_DVSEC_CXL_DEVICE 0
#define PCI_DVSEC_CXL_CAP 0xA
+#define PCI_DVSEC_CXL_CACHE_CAPABLE _BITUL(0)
#define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2)
#define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4)
#define PCI_DVSEC_CXL_CTRL 0xC
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index ec6c8dbdc5e9c..cf262eb6e6890 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -205,6 +205,48 @@ int pci_ats_page_aligned(struct pci_dev *pdev)
return 0;
}
+/*
+ * CXL r4.0, sec 3.2.5.13 Memory Type on CXL.cache notes: to source requests on
+ * CXL.cache, devices need to get the Host Physical Address (HPA) from the Host
+ * by means of an ATS request on CXL.io.
+ *
+ * In other world, CXL.cache devices cannot access host physical memory without
+ * ATS.
+ */
+static bool pci_cxl_ats_always_on(struct pci_dev *pdev)
+{
+ u16 cap = 0;
+ int offset;
+
+ offset = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+ PCI_DVSEC_CXL_DEVICE);
+ if (!offset)
+ return false;
+
+ pci_read_config_word(pdev, offset + PCI_DVSEC_CXL_CAP, &cap);
+
+ return cap & PCI_DVSEC_CXL_CACHE_CAPABLE;
+}
+
+/**
+ * pci_ats_always_on - Whether the PCI device requires ATS to be always enabled
+ * @pdev: the PCI device
+ *
+ * Returns true, if the PCI device requires ATS for basic functional operation.
+ */
+bool pci_ats_always_on(struct pci_dev *pdev)
+{
+ if (pci_ats_disabled() || !pci_ats_supported(pdev))
+ return false;
+
+ /* A VF inherits its PF's requirement for ATS function */
+ if (pdev->is_virtfn)
+ pdev = pci_physfn(pdev);
+
+ return pci_cxl_ats_always_on(pdev);
+}
+EXPORT_SYMBOL_GPL(pci_ats_always_on);
+
#ifdef CONFIG_PCI_PRI
void pci_pri_init(struct pci_dev *pdev)
{
--
2.43.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices
2026-03-06 23:41 ` [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices Nicolin Chen
@ 2026-03-08 20:49 ` Nirmoy Das
2026-03-08 20:53 ` Nirmoy Das
2026-03-09 11:48 ` Jonathan Cameron
2026-03-26 21:38 ` Bjorn Helgaas
2 siblings, 1 reply; 13+ messages in thread
From: Nirmoy Das @ 2026-03-08 20:49 UTC (permalink / raw)
To: Nicolin Chen, jgg, will, robin.murphy, bhelgaas
Cc: joro, praan, baolu.lu, kevin.tian, miko.lenczewski,
linux-arm-kernel, iommu, linux-kernel, linux-pci, dan.j.williams,
jonathan.cameron, vsethi, linux-cxl
On 07.03.26 00:41, Nicolin Chen wrote:
> Controlled by the IOMMU driver, ATS is usually enabled "on demand" when a
> device requests a translation service from its associated IOMMU HW running
> on the channel of a given PASID. This is working even when a device has no
> translation on its RID (i.e., the RID is IOMMU bypassed).
>
> However, certain PCIe devices require non-PASID ATS on their RID even when
> the RID is IOMMU bypassed. Call this "always on".
>
> For instance, the CXL spec notes in "3.2.5.13 Memory Type on CXL.cache":
> "To source requests on CXL.cache, devices need to get the Host Physical
> Address (HPA) from the Host by means of an ATS request on CXL.io."
>
> In other words, the CXL.cache capability requires ATS; otherwise, it can't
> access host physical memory.
>
> Introduce a new pci_ats_always_on() helper for the IOMMU driver to scan a
> PCI device and shift ATS policies between "on demand" and "always on".
>
> Add the support for CXL.cache devices first. Pre-CXL devices will be added
> in quirks.c file.
>
> Note that pci_ats_always_on() validates against pci_ats_supported(), so we
> ensure that untrusted devices (e.g. external ports) will not be always on.
> This maintains the existing ATS security policy regarding potential side-
> channel attacks via ATS.
>
> Cc: linux-cxl@vger.kernel.org
> Suggested-by: Vikram Sethi <vsethi@nvidia.com>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Tested the series with a Type 2 CXL device.
Tested-by: Nirmoy Das <nirmoyd@nvidia.com>
Acked-by: Nirmoy Das <nirmoy@nvidia.com>
> ---
> include/linux/pci-ats.h | 3 +++
> include/uapi/linux/pci_regs.h | 1 +
> drivers/pci/ats.c | 42 +++++++++++++++++++++++++++++++++++
> 3 files changed, 46 insertions(+)
>
> diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
> index 75c6c86cf09dc..d14ba727d38b3 100644
> --- a/include/linux/pci-ats.h
> +++ b/include/linux/pci-ats.h
> @@ -12,6 +12,7 @@ int pci_prepare_ats(struct pci_dev *dev, int ps);
> void pci_disable_ats(struct pci_dev *dev);
> int pci_ats_queue_depth(struct pci_dev *dev);
> int pci_ats_page_aligned(struct pci_dev *dev);
> +bool pci_ats_always_on(struct pci_dev *dev);
> #else /* CONFIG_PCI_ATS */
> static inline bool pci_ats_supported(struct pci_dev *d)
> { return false; }
> @@ -24,6 +25,8 @@ static inline int pci_ats_queue_depth(struct pci_dev *d)
> { return -ENODEV; }
> static inline int pci_ats_page_aligned(struct pci_dev *dev)
> { return 0; }
> +static inline bool pci_ats_always_on(struct pci_dev *dev)
> +{ return false; }
> #endif /* CONFIG_PCI_ATS */
>
> #ifdef CONFIG_PCI_PRI
> diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
> index 14f634ab9350d..6ac45be1008b8 100644
> --- a/include/uapi/linux/pci_regs.h
> +++ b/include/uapi/linux/pci_regs.h
> @@ -1349,6 +1349,7 @@
> /* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */
> #define PCI_DVSEC_CXL_DEVICE 0
> #define PCI_DVSEC_CXL_CAP 0xA
> +#define PCI_DVSEC_CXL_CACHE_CAPABLE _BITUL(0)
> #define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2)
> #define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4)
> #define PCI_DVSEC_CXL_CTRL 0xC
> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
> index ec6c8dbdc5e9c..cf262eb6e6890 100644
> --- a/drivers/pci/ats.c
> +++ b/drivers/pci/ats.c
> @@ -205,6 +205,48 @@ int pci_ats_page_aligned(struct pci_dev *pdev)
> return 0;
> }
>
> +/*
> + * CXL r4.0, sec 3.2.5.13 Memory Type on CXL.cache notes: to source requests on
> + * CXL.cache, devices need to get the Host Physical Address (HPA) from the Host
> + * by means of an ATS request on CXL.io.
> + *
> + * In other world, CXL.cache devices cannot access host physical memory without
> + * ATS.
> + */
> +static bool pci_cxl_ats_always_on(struct pci_dev *pdev)
> +{
> + u16 cap = 0;
> + int offset;
> +
> + offset = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
> + PCI_DVSEC_CXL_DEVICE);
> + if (!offset)
> + return false;
> +
> + pci_read_config_word(pdev, offset + PCI_DVSEC_CXL_CAP, &cap);
> +
> + return cap & PCI_DVSEC_CXL_CACHE_CAPABLE;
> +}
> +
> +/**
> + * pci_ats_always_on - Whether the PCI device requires ATS to be always enabled
> + * @pdev: the PCI device
> + *
> + * Returns true, if the PCI device requires ATS for basic functional operation.
> + */
> +bool pci_ats_always_on(struct pci_dev *pdev)
> +{
> + if (pci_ats_disabled() || !pci_ats_supported(pdev))
> + return false;
> +
> + /* A VF inherits its PF's requirement for ATS function */
> + if (pdev->is_virtfn)
> + pdev = pci_physfn(pdev);
> +
> + return pci_cxl_ats_always_on(pdev);
> +}
> +EXPORT_SYMBOL_GPL(pci_ats_always_on);
> +
> #ifdef CONFIG_PCI_PRI
> void pci_pri_init(struct pci_dev *pdev)
> {
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices
2026-03-08 20:49 ` Nirmoy Das
@ 2026-03-08 20:53 ` Nirmoy Das
0 siblings, 0 replies; 13+ messages in thread
From: Nirmoy Das @ 2026-03-08 20:53 UTC (permalink / raw)
To: Nicolin Chen, jgg, will, robin.murphy, bhelgaas
Cc: joro, praan, baolu.lu, kevin.tian, miko.lenczewski,
linux-arm-kernel, iommu, linux-kernel, linux-pci, dan.j.williams,
jonathan.cameron, vsethi, linux-cxl
On 08.03.26 21:49, Nirmoy Das wrote:
>
> On 07.03.26 00:41, Nicolin Chen wrote:
>> Controlled by the IOMMU driver, ATS is usually enabled "on demand"
>> when a
>> device requests a translation service from its associated IOMMU HW
>> running
>> on the channel of a given PASID. This is working even when a device
>> has no
>> translation on its RID (i.e., the RID is IOMMU bypassed).
>>
>> However, certain PCIe devices require non-PASID ATS on their RID even
>> when
>> the RID is IOMMU bypassed. Call this "always on".
>>
>> For instance, the CXL spec notes in "3.2.5.13 Memory Type on CXL.cache":
>> "To source requests on CXL.cache, devices need to get the Host Physical
>> Address (HPA) from the Host by means of an ATS request on CXL.io."
>>
>> In other words, the CXL.cache capability requires ATS; otherwise, it
>> can't
>> access host physical memory.
>>
>> Introduce a new pci_ats_always_on() helper for the IOMMU driver to
>> scan a
>> PCI device and shift ATS policies between "on demand" and "always on".
>>
>> Add the support for CXL.cache devices first. Pre-CXL devices will be
>> added
>> in quirks.c file.
>>
>> Note that pci_ats_always_on() validates against pci_ats_supported(),
>> so we
>> ensure that untrusted devices (e.g. external ports) will not be
>> always on.
>> This maintains the existing ATS security policy regarding potential
>> side-
>> channel attacks via ATS.
>>
>> Cc: linux-cxl@vger.kernel.org
>> Suggested-by: Vikram Sethi <vsethi@nvidia.com>
>> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>
> Tested the series with a Type 2 CXL device.
>
> Tested-by: Nirmoy Das <nirmoyd@nvidia.com>
>
> Acked-by: Nirmoy Das <nirmoy@nvidia.com>
Sent with wrong email address
Acked-by: Nirmoy Das <nirmoyd@nvidia.com>
>
>> ---
>> include/linux/pci-ats.h | 3 +++
>> include/uapi/linux/pci_regs.h | 1 +
>> drivers/pci/ats.c | 42 +++++++++++++++++++++++++++++++++++
>> 3 files changed, 46 insertions(+)
>>
>> diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
>> index 75c6c86cf09dc..d14ba727d38b3 100644
>> --- a/include/linux/pci-ats.h
>> +++ b/include/linux/pci-ats.h
>> @@ -12,6 +12,7 @@ int pci_prepare_ats(struct pci_dev *dev, int ps);
>> void pci_disable_ats(struct pci_dev *dev);
>> int pci_ats_queue_depth(struct pci_dev *dev);
>> int pci_ats_page_aligned(struct pci_dev *dev);
>> +bool pci_ats_always_on(struct pci_dev *dev);
>> #else /* CONFIG_PCI_ATS */
>> static inline bool pci_ats_supported(struct pci_dev *d)
>> { return false; }
>> @@ -24,6 +25,8 @@ static inline int pci_ats_queue_depth(struct
>> pci_dev *d)
>> { return -ENODEV; }
>> static inline int pci_ats_page_aligned(struct pci_dev *dev)
>> { return 0; }
>> +static inline bool pci_ats_always_on(struct pci_dev *dev)
>> +{ return false; }
>> #endif /* CONFIG_PCI_ATS */
>> #ifdef CONFIG_PCI_PRI
>> diff --git a/include/uapi/linux/pci_regs.h
>> b/include/uapi/linux/pci_regs.h
>> index 14f634ab9350d..6ac45be1008b8 100644
>> --- a/include/uapi/linux/pci_regs.h
>> +++ b/include/uapi/linux/pci_regs.h
>> @@ -1349,6 +1349,7 @@
>> /* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */
>> #define PCI_DVSEC_CXL_DEVICE 0
>> #define PCI_DVSEC_CXL_CAP 0xA
>> +#define PCI_DVSEC_CXL_CACHE_CAPABLE _BITUL(0)
>> #define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2)
>> #define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4)
>> #define PCI_DVSEC_CXL_CTRL 0xC
>> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
>> index ec6c8dbdc5e9c..cf262eb6e6890 100644
>> --- a/drivers/pci/ats.c
>> +++ b/drivers/pci/ats.c
>> @@ -205,6 +205,48 @@ int pci_ats_page_aligned(struct pci_dev *pdev)
>> return 0;
>> }
>> +/*
>> + * CXL r4.0, sec 3.2.5.13 Memory Type on CXL.cache notes: to source
>> requests on
>> + * CXL.cache, devices need to get the Host Physical Address (HPA)
>> from the Host
>> + * by means of an ATS request on CXL.io.
>> + *
>> + * In other world, CXL.cache devices cannot access host physical
>> memory without
>> + * ATS.
>> + */
>> +static bool pci_cxl_ats_always_on(struct pci_dev *pdev)
>> +{
>> + u16 cap = 0;
>> + int offset;
>> +
>> + offset = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
>> + PCI_DVSEC_CXL_DEVICE);
>> + if (!offset)
>> + return false;
>> +
>> + pci_read_config_word(pdev, offset + PCI_DVSEC_CXL_CAP, &cap);
>> +
>> + return cap & PCI_DVSEC_CXL_CACHE_CAPABLE;
>> +}
>> +
>> +/**
>> + * pci_ats_always_on - Whether the PCI device requires ATS to be
>> always enabled
>> + * @pdev: the PCI device
>> + *
>> + * Returns true, if the PCI device requires ATS for basic functional
>> operation.
>> + */
>> +bool pci_ats_always_on(struct pci_dev *pdev)
>> +{
>> + if (pci_ats_disabled() || !pci_ats_supported(pdev))
>> + return false;
>> +
>> + /* A VF inherits its PF's requirement for ATS function */
>> + if (pdev->is_virtfn)
>> + pdev = pci_physfn(pdev);
>> +
>> + return pci_cxl_ats_always_on(pdev);
>> +}
>> +EXPORT_SYMBOL_GPL(pci_ats_always_on);
>> +
>> #ifdef CONFIG_PCI_PRI
>> void pci_pri_init(struct pci_dev *pdev)
>> {
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices
2026-03-06 23:41 ` [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices Nicolin Chen
2026-03-08 20:49 ` Nirmoy Das
@ 2026-03-09 11:48 ` Jonathan Cameron
2026-03-26 21:38 ` Bjorn Helgaas
2 siblings, 0 replies; 13+ messages in thread
From: Jonathan Cameron @ 2026-03-09 11:48 UTC (permalink / raw)
To: Nicolin Chen
Cc: jgg, will, robin.murphy, bhelgaas, joro, praan, baolu.lu,
kevin.tian, miko.lenczewski, linux-arm-kernel, iommu,
linux-kernel, linux-pci, dan.j.williams, vsethi, linux-cxl
On Fri, 6 Mar 2026 15:41:15 -0800
Nicolin Chen <nicolinc@nvidia.com> wrote:
> Controlled by the IOMMU driver, ATS is usually enabled "on demand" when a
> device requests a translation service from its associated IOMMU HW running
> on the channel of a given PASID. This is working even when a device has no
> translation on its RID (i.e., the RID is IOMMU bypassed).
>
> However, certain PCIe devices require non-PASID ATS on their RID even when
> the RID is IOMMU bypassed. Call this "always on".
>
> For instance, the CXL spec notes in "3.2.5.13 Memory Type on CXL.cache":
> "To source requests on CXL.cache, devices need to get the Host Physical
> Address (HPA) from the Host by means of an ATS request on CXL.io."
>
> In other words, the CXL.cache capability requires ATS; otherwise, it can't
> access host physical memory.
>
> Introduce a new pci_ats_always_on() helper for the IOMMU driver to scan a
> PCI device and shift ATS policies between "on demand" and "always on".
>
> Add the support for CXL.cache devices first. Pre-CXL devices will be added
> in quirks.c file.
>
> Note that pci_ats_always_on() validates against pci_ats_supported(), so we
> ensure that untrusted devices (e.g. external ports) will not be always on.
> This maintains the existing ATS security policy regarding potential side-
> channel attacks via ATS.
>
> Cc: linux-cxl@vger.kernel.org
> Suggested-by: Vikram Sethi <vsethi@nvidia.com>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices
2026-03-06 23:41 ` [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices Nicolin Chen
2026-03-08 20:49 ` Nirmoy Das
2026-03-09 11:48 ` Jonathan Cameron
@ 2026-03-26 21:38 ` Bjorn Helgaas
2026-03-26 21:51 ` Jason Gunthorpe
2 siblings, 1 reply; 13+ messages in thread
From: Bjorn Helgaas @ 2026-03-26 21:38 UTC (permalink / raw)
To: Nicolin Chen
Cc: jgg, will, robin.murphy, bhelgaas, joro, praan, baolu.lu,
kevin.tian, miko.lenczewski, linux-arm-kernel, iommu,
linux-kernel, linux-pci, dan.j.williams, jonathan.cameron, vsethi,
linux-cxl
On Fri, Mar 06, 2026 at 03:41:15PM -0800, Nicolin Chen wrote:
> Controlled by the IOMMU driver, ATS is usually enabled "on demand" when a
> device requests a translation service from its associated IOMMU HW running
> on the channel of a given PASID. This is working even when a device has no
> translation on its RID (i.e., the RID is IOMMU bypassed).
>
> However, certain PCIe devices require non-PASID ATS on their RID even when
> the RID is IOMMU bypassed. Call this "always on".
>
> For instance, the CXL spec notes in "3.2.5.13 Memory Type on CXL.cache":
> "To source requests on CXL.cache, devices need to get the Host Physical
> Address (HPA) from the Host by means of an ATS request on CXL.io."
Add CXL spec rev, e.g., "CXL r4.0, sec 3.2.5.13"
> In other words, the CXL.cache capability requires ATS; otherwise, it can't
> access host physical memory.
>
> Introduce a new pci_ats_always_on() helper for the IOMMU driver to scan a
> PCI device and shift ATS policies between "on demand" and "always on".
>
> Add the support for CXL.cache devices first. Pre-CXL devices will be added
> in quirks.c file.
>
> Note that pci_ats_always_on() validates against pci_ats_supported(), so we
> ensure that untrusted devices (e.g. external ports) will not be always on.
> This maintains the existing ATS security policy regarding potential side-
> channel attacks via ATS.
I don't think this really has anything to do with the PCI core.
pci_ats_always_on() doesn't *do* anything with a PCI device; it just
looks for PCI_DVSEC_CXL_CACHE_CAPABLE, and the caller figures out what
to do with the result. This doesn't make the PCI core turn on ATS
automatically by itself, and the PCI core doesn't care whether the
IOMMU driver always enables ATS.
It's only called from arm-smmu-v3.c. Is there something unique about
SMMU, or will other IOMMUs need something similar?
> +++ b/drivers/pci/ats.c
> @@ -205,6 +205,48 @@ int pci_ats_page_aligned(struct pci_dev *pdev)
> return 0;
> }
>
> +/*
> + * CXL r4.0, sec 3.2.5.13 Memory Type on CXL.cache notes: to source requests on
> + * CXL.cache, devices need to get the Host Physical Address (HPA) from the Host
> + * by means of an ATS request on CXL.io.
> + *
> + * In other world, CXL.cache devices cannot access host physical memory without
> + * ATS.
s/other world/other words/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices
2026-03-26 21:38 ` Bjorn Helgaas
@ 2026-03-26 21:51 ` Jason Gunthorpe
0 siblings, 0 replies; 13+ messages in thread
From: Jason Gunthorpe @ 2026-03-26 21:51 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: Nicolin Chen, will, robin.murphy, bhelgaas, joro, praan, baolu.lu,
kevin.tian, miko.lenczewski, linux-arm-kernel, iommu,
linux-kernel, linux-pci, dan.j.williams, jonathan.cameron, vsethi,
linux-cxl
On Thu, Mar 26, 2026 at 04:38:54PM -0500, Bjorn Helgaas wrote:
> It's only called from arm-smmu-v3.c. Is there something unique about
> SMMU, or will other IOMMUs need something similar?
Yeah, every iommu driver will eventually want to use this. IIRC the
motivation for using the pci core was to make use of pci quirks.
Right now ARM is the only one that turns ATS off for identity, but
IMHO those are bugs in the other drivers to not do that.
Jason
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v3 2/3] PCI: Allow ATS to be always on for pre-CXL devices
2026-03-06 23:41 [PATCH v3 0/3] Allow ATS to be always on for certain ATS-capable devices Nicolin Chen
2026-03-06 23:41 ` [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices Nicolin Chen
@ 2026-03-06 23:41 ` Nicolin Chen
2026-03-08 20:50 ` Nirmoy Das
2026-03-09 11:50 ` Jonathan Cameron
2026-03-06 23:41 ` [PATCH v3 3/3] iommu/arm-smmu-v3: Allow ATS to be always on Nicolin Chen
2 siblings, 2 replies; 13+ messages in thread
From: Nicolin Chen @ 2026-03-06 23:41 UTC (permalink / raw)
To: jgg, will, robin.murphy, bhelgaas
Cc: joro, praan, baolu.lu, kevin.tian, miko.lenczewski,
linux-arm-kernel, iommu, linux-kernel, linux-pci, dan.j.williams,
jonathan.cameron, vsethi, linux-cxl
Some NVIDIA GPU/NIC devices, although don't implement the CXL config space,
they have many CXL-like properties. Call this kind "pre-CXL".
Similar to CXL.cache capaiblity, these pre-CXL devices also require the ATS
function even when their RIDs are IOMMU bypassed, i.e. keep ATS "always on"
v.s. "on demand" when a non-zero PASID line gets enabled in SVA use cases.
Introduce pci_dev_specific_ats_always_on() quirk function to scan a list of
IDs for these device. Then, include it pci_ats_always_on().
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/pci/pci.h | 9 +++++++++
drivers/pci/ats.c | 3 ++-
drivers/pci/quirks.c | 26 ++++++++++++++++++++++++++
3 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 13d998fbacce6..13fa71f965900 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1150,6 +1150,15 @@ static inline int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
}
#endif
+#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_PCI_ATS)
+bool pci_dev_specific_ats_always_on(struct pci_dev *dev);
+#else
+static inline bool pci_dev_specific_ats_always_on(struct pci_dev *dev)
+{
+ return false;
+}
+#endif
+
#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment,
struct resource *res);
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index cf262eb6e6890..e6995b536ad4c 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -243,7 +243,8 @@ bool pci_ats_always_on(struct pci_dev *pdev)
if (pdev->is_virtfn)
pdev = pci_physfn(pdev);
- return pci_cxl_ats_always_on(pdev);
+ return pci_cxl_ats_always_on(pdev) ||
+ pci_dev_specific_ats_always_on(pdev);
}
EXPORT_SYMBOL_GPL(pci_ats_always_on);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 48946cca4be72..21451e62f284e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -5714,6 +5714,32 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1457, quirk_intel_e2000_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1459, quirk_intel_e2000_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145a, quirk_intel_e2000_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145c, quirk_intel_e2000_no_ats);
+
+static const struct pci_dev_ats_always_on {
+ u16 vendor;
+ u16 device;
+} pci_dev_ats_always_on[] = {
+ /* NVIDIA GPUs */
+ { PCI_VENDOR_ID_NVIDIA, 0x2e12, },
+ { PCI_VENDOR_ID_NVIDIA, 0x2e2a, },
+ { PCI_VENDOR_ID_NVIDIA, 0x2e2b, },
+ /* NVIDIA CX10 Family NVlink-C2C */
+ { PCI_VENDOR_ID_MELLANOX, 0x2101, },
+ { 0 }
+};
+
+/* Some pre-CXL devices require ATS on the RID when it is IOMMU-bypassed */
+bool pci_dev_specific_ats_always_on(struct pci_dev *pdev)
+{
+ const struct pci_dev_ats_always_on *i;
+
+ for (i = pci_dev_ats_always_on; i->vendor; i++) {
+ if (i->vendor == pdev->vendor && i->device == pdev->device)
+ return true;
+ }
+
+ return false;
+}
#endif /* CONFIG_PCI_ATS */
/* Freescale PCIe doesn't support MSI in RC mode */
--
2.43.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v3 2/3] PCI: Allow ATS to be always on for pre-CXL devices
2026-03-06 23:41 ` [PATCH v3 2/3] PCI: Allow ATS to be always on for pre-CXL devices Nicolin Chen
@ 2026-03-08 20:50 ` Nirmoy Das
2026-03-08 20:54 ` Nirmoy Das
2026-03-09 11:50 ` Jonathan Cameron
1 sibling, 1 reply; 13+ messages in thread
From: Nirmoy Das @ 2026-03-08 20:50 UTC (permalink / raw)
To: Nicolin Chen, jgg, will, robin.murphy, bhelgaas
Cc: joro, praan, baolu.lu, kevin.tian, miko.lenczewski,
linux-arm-kernel, iommu, linux-kernel, linux-pci, dan.j.williams,
jonathan.cameron, vsethi, linux-cxl
On 07.03.26 00:41, Nicolin Chen wrote:
> Some NVIDIA GPU/NIC devices, although don't implement the CXL config space,
> they have many CXL-like properties. Call this kind "pre-CXL".
>
> Similar to CXL.cache capaiblity, these pre-CXL devices also require the ATS
> function even when their RIDs are IOMMU bypassed, i.e. keep ATS "always on"
> v.s. "on demand" when a non-zero PASID line gets enabled in SVA use cases.
>
> Introduce pci_dev_specific_ats_always_on() quirk function to scan a list of
> IDs for these device. Then, include it pci_ats_always_on().
>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Nirmoy Das <nirmoyd@nvidia.com>
Reviewed-by: Nirmoy Das <nirmoy@nvidia.com>
> ---
> drivers/pci/pci.h | 9 +++++++++
> drivers/pci/ats.c | 3 ++-
> drivers/pci/quirks.c | 26 ++++++++++++++++++++++++++
> 3 files changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 13d998fbacce6..13fa71f965900 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -1150,6 +1150,15 @@ static inline int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
> }
> #endif
>
> +#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_PCI_ATS)
> +bool pci_dev_specific_ats_always_on(struct pci_dev *dev);
> +#else
> +static inline bool pci_dev_specific_ats_always_on(struct pci_dev *dev)
> +{
> + return false;
> +}
> +#endif
> +
> #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
> int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment,
> struct resource *res);
> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
> index cf262eb6e6890..e6995b536ad4c 100644
> --- a/drivers/pci/ats.c
> +++ b/drivers/pci/ats.c
> @@ -243,7 +243,8 @@ bool pci_ats_always_on(struct pci_dev *pdev)
> if (pdev->is_virtfn)
> pdev = pci_physfn(pdev);
>
> - return pci_cxl_ats_always_on(pdev);
> + return pci_cxl_ats_always_on(pdev) ||
> + pci_dev_specific_ats_always_on(pdev);
> }
> EXPORT_SYMBOL_GPL(pci_ats_always_on);
>
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index 48946cca4be72..21451e62f284e 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -5714,6 +5714,32 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1457, quirk_intel_e2000_no_ats);
> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1459, quirk_intel_e2000_no_ats);
> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145a, quirk_intel_e2000_no_ats);
> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145c, quirk_intel_e2000_no_ats);
> +
> +static const struct pci_dev_ats_always_on {
> + u16 vendor;
> + u16 device;
> +} pci_dev_ats_always_on[] = {
> + /* NVIDIA GPUs */
> + { PCI_VENDOR_ID_NVIDIA, 0x2e12, },
> + { PCI_VENDOR_ID_NVIDIA, 0x2e2a, },
> + { PCI_VENDOR_ID_NVIDIA, 0x2e2b, },
> + /* NVIDIA CX10 Family NVlink-C2C */
> + { PCI_VENDOR_ID_MELLANOX, 0x2101, },
> + { 0 }
> +};
> +
> +/* Some pre-CXL devices require ATS on the RID when it is IOMMU-bypassed */
> +bool pci_dev_specific_ats_always_on(struct pci_dev *pdev)
> +{
> + const struct pci_dev_ats_always_on *i;
> +
> + for (i = pci_dev_ats_always_on; i->vendor; i++) {
> + if (i->vendor == pdev->vendor && i->device == pdev->device)
> + return true;
> + }
> +
> + return false;
> +}
> #endif /* CONFIG_PCI_ATS */
>
> /* Freescale PCIe doesn't support MSI in RC mode */
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v3 2/3] PCI: Allow ATS to be always on for pre-CXL devices
2026-03-08 20:50 ` Nirmoy Das
@ 2026-03-08 20:54 ` Nirmoy Das
0 siblings, 0 replies; 13+ messages in thread
From: Nirmoy Das @ 2026-03-08 20:54 UTC (permalink / raw)
To: Nicolin Chen, jgg, will, robin.murphy, bhelgaas
Cc: joro, praan, baolu.lu, kevin.tian, miko.lenczewski,
linux-arm-kernel, iommu, linux-kernel, linux-pci, dan.j.williams,
jonathan.cameron, vsethi, linux-cxl
On 08.03.26 21:50, Nirmoy Das wrote:
>
> On 07.03.26 00:41, Nicolin Chen wrote:
>> Some NVIDIA GPU/NIC devices, although don't implement the CXL config
>> space,
>> they have many CXL-like properties. Call this kind "pre-CXL".
>>
>> Similar to CXL.cache capaiblity, these pre-CXL devices also require
>> the ATS
>> function even when their RIDs are IOMMU bypassed, i.e. keep ATS
>> "always on"
>> v.s. "on demand" when a non-zero PASID line gets enabled in SVA use
>> cases.
>>
>> Introduce pci_dev_specific_ats_always_on() quirk function to scan a
>> list of
>> IDs for these device. Then, include it pci_ats_always_on().
>>
>> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>
> Tested-by: Nirmoy Das <nirmoyd@nvidia.com>
>
> Reviewed-by: Nirmoy Das <nirmoy@nvidia.com>
Sent with wrong email address
Reviewed-by: Nirmoy Das <nirmoyd@nvidia.com>
>
>> ---
>> drivers/pci/pci.h | 9 +++++++++
>> drivers/pci/ats.c | 3 ++-
>> drivers/pci/quirks.c | 26 ++++++++++++++++++++++++++
>> 3 files changed, 37 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>> index 13d998fbacce6..13fa71f965900 100644
>> --- a/drivers/pci/pci.h
>> +++ b/drivers/pci/pci.h
>> @@ -1150,6 +1150,15 @@ static inline int
>> pci_dev_specific_reset(struct pci_dev *dev, bool probe)
>> }
>> #endif
>> +#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_PCI_ATS)
>> +bool pci_dev_specific_ats_always_on(struct pci_dev *dev);
>> +#else
>> +static inline bool pci_dev_specific_ats_always_on(struct pci_dev *dev)
>> +{
>> + return false;
>> +}
>> +#endif
>> +
>> #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
>> int acpi_get_rc_resources(struct device *dev, const char *hid, u16
>> segment,
>> struct resource *res);
>> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
>> index cf262eb6e6890..e6995b536ad4c 100644
>> --- a/drivers/pci/ats.c
>> +++ b/drivers/pci/ats.c
>> @@ -243,7 +243,8 @@ bool pci_ats_always_on(struct pci_dev *pdev)
>> if (pdev->is_virtfn)
>> pdev = pci_physfn(pdev);
>> - return pci_cxl_ats_always_on(pdev);
>> + return pci_cxl_ats_always_on(pdev) ||
>> + pci_dev_specific_ats_always_on(pdev);
>> }
>> EXPORT_SYMBOL_GPL(pci_ats_always_on);
>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>> index 48946cca4be72..21451e62f284e 100644
>> --- a/drivers/pci/quirks.c
>> +++ b/drivers/pci/quirks.c
>> @@ -5714,6 +5714,32 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,
>> 0x1457, quirk_intel_e2000_no_ats);
>> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1459,
>> quirk_intel_e2000_no_ats);
>> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145a,
>> quirk_intel_e2000_no_ats);
>> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145c,
>> quirk_intel_e2000_no_ats);
>> +
>> +static const struct pci_dev_ats_always_on {
>> + u16 vendor;
>> + u16 device;
>> +} pci_dev_ats_always_on[] = {
>> + /* NVIDIA GPUs */
>> + { PCI_VENDOR_ID_NVIDIA, 0x2e12, },
>> + { PCI_VENDOR_ID_NVIDIA, 0x2e2a, },
>> + { PCI_VENDOR_ID_NVIDIA, 0x2e2b, },
>> + /* NVIDIA CX10 Family NVlink-C2C */
>> + { PCI_VENDOR_ID_MELLANOX, 0x2101, },
>> + { 0 }
>> +};
>> +
>> +/* Some pre-CXL devices require ATS on the RID when it is
>> IOMMU-bypassed */
>> +bool pci_dev_specific_ats_always_on(struct pci_dev *pdev)
>> +{
>> + const struct pci_dev_ats_always_on *i;
>> +
>> + for (i = pci_dev_ats_always_on; i->vendor; i++) {
>> + if (i->vendor == pdev->vendor && i->device == pdev->device)
>> + return true;
>> + }
>> +
>> + return false;
>> +}
>> #endif /* CONFIG_PCI_ATS */
>> /* Freescale PCIe doesn't support MSI in RC mode */
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 2/3] PCI: Allow ATS to be always on for pre-CXL devices
2026-03-06 23:41 ` [PATCH v3 2/3] PCI: Allow ATS to be always on for pre-CXL devices Nicolin Chen
2026-03-08 20:50 ` Nirmoy Das
@ 2026-03-09 11:50 ` Jonathan Cameron
1 sibling, 0 replies; 13+ messages in thread
From: Jonathan Cameron @ 2026-03-09 11:50 UTC (permalink / raw)
To: Nicolin Chen
Cc: jgg, will, robin.murphy, bhelgaas, joro, praan, baolu.lu,
kevin.tian, miko.lenczewski, linux-arm-kernel, iommu,
linux-kernel, linux-pci, dan.j.williams, vsethi, linux-cxl
On Fri, 6 Mar 2026 15:41:16 -0800
Nicolin Chen <nicolinc@nvidia.com> wrote:
> Some NVIDIA GPU/NIC devices, although don't implement the CXL config space,
> they have many CXL-like properties. Call this kind "pre-CXL".
>
> Similar to CXL.cache capaiblity, these pre-CXL devices also require the ATS
> function even when their RIDs are IOMMU bypassed, i.e. keep ATS "always on"
> v.s. "on demand" when a non-zero PASID line gets enabled in SVA use cases.
>
> Introduce pci_dev_specific_ats_always_on() quirk function to scan a list of
> IDs for these device. Then, include it pci_ats_always_on().
>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Seems like a reasonably tidy quirk.
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v3 3/3] iommu/arm-smmu-v3: Allow ATS to be always on
2026-03-06 23:41 [PATCH v3 0/3] Allow ATS to be always on for certain ATS-capable devices Nicolin Chen
2026-03-06 23:41 ` [PATCH v3 1/3] PCI: Allow ATS to be always on for CXL.cache capable devices Nicolin Chen
2026-03-06 23:41 ` [PATCH v3 2/3] PCI: Allow ATS to be always on for pre-CXL devices Nicolin Chen
@ 2026-03-06 23:41 ` Nicolin Chen
2026-03-08 20:52 ` Nirmoy Das
2 siblings, 1 reply; 13+ messages in thread
From: Nicolin Chen @ 2026-03-06 23:41 UTC (permalink / raw)
To: jgg, will, robin.murphy, bhelgaas
Cc: joro, praan, baolu.lu, kevin.tian, miko.lenczewski,
linux-arm-kernel, iommu, linux-kernel, linux-pci, dan.j.williams,
jonathan.cameron, vsethi, linux-cxl
When a device's default substream attaches to an identity domain, the SMMU
driver currently sets the device's STE between two modes:
Mode 1: Cfg=Translate, S1DSS=Bypass, EATS=1
Mode 2: Cfg=bypass (EATS is ignored by HW)
When there is an active PASID (non-default substream), mode 1 is used. And
when there is no PASID support or no active PASID, mode 2 is used.
The driver will also downgrade an STE from mode 1 to mode 2, when the last
active substream becomes inactive.
However, there are PCIe devices that demand ATS to be always on. For these
devices, their STEs have to use the mode 1 as HW ignores EATS with mode 2.
Change the driver accordingly:
- always use the mode 1
- never downgrade to mode 2
- allocate and retain a CD table (see note below)
Note that these devices might not support PASID, i.e. doing non-PASID ATS.
In such a case, the ssid_bits is set to 0. However, s1cdmax must be set to
a !0 value in order to keep the S1DSS field effective. Thus, when a master
requires ats_always_on, set its s1cdmax to minimal 1, meaning the CD table
will have a dummy entry (SSID=1) that will be never used.
Now, for these device, arm_smmu_cdtab_allocated() will always return true,
v.s. false prior to this change. When its default substream is attached to
an IDENTITY domain, its first CD is NULL in the table, which is a totally
valid case. Thus, add "!master->ats_always_on" to the condition.
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 72 ++++++++++++++++++---
2 files changed, 65 insertions(+), 8 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 3c6d65d36164f..f966d474b61fd 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -848,6 +848,7 @@ struct arm_smmu_master {
bool ats_enabled : 1;
bool ste_ats_enabled : 1;
bool stall_enabled;
+ bool ats_always_on;
unsigned int ssid_bits;
unsigned int iopf_refcount;
};
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 4d00d796f0783..ef98e79b4e75d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1482,8 +1482,11 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
if (!arm_smmu_cdtab_allocated(&master->cd_table))
return;
cdptr = arm_smmu_get_cd_ptr(master, ssid);
- if (WARN_ON(!cdptr))
+ if (!cdptr) {
+ /* Only ats_always_on allows a NULL CD on default substream */
+ WARN_ON(!master->ats_always_on || ssid);
return;
+ }
arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
}
@@ -1496,6 +1499,22 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
cd_table->s1cdmax = master->ssid_bits;
+
+ /*
+ * When a device doesn't support PASID (non default SSID), ssid_bits is
+ * set to 0. This also sets S1CDMAX to 0, which disables the substreams
+ * and ignores the S1DSS field.
+ *
+ * On the other hand, if a device demands ATS to be always on even when
+ * its default substream is IOMMU bypassed, it has to use EATS that is
+ * only effective with an STE (CFG=S1translate, S1DSS=Bypass). For such
+ * use cases, S1CDMAX has to be !0, in order to make use of S1DSS/EATS.
+ *
+ * Set S1CDMAX no lower than 1. This would add a dummy substream in the
+ * CD table but it should never be used by an actual CD.
+ */
+ if (master->ats_always_on)
+ cd_table->s1cdmax = max_t(u8, cd_table->s1cdmax, 1);
max_contexts = 1 << cd_table->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
@@ -3250,7 +3269,8 @@ static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
* When the last user of the CD table goes away downgrade the STE back
* to a non-cd_table one, by re-attaching its sid_domain.
*/
- if (!arm_smmu_ssids_in_use(&master->cd_table)) {
+ if (!master->ats_always_on &&
+ !arm_smmu_ssids_in_use(&master->cd_table)) {
struct iommu_domain *sid_domain =
iommu_driver_get_domain_for_dev(master->dev);
@@ -3274,6 +3294,8 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
.old_domain = old_domain,
.ssid = IOMMU_NO_PASID,
};
+ bool ats_always_on = master->ats_always_on &&
+ s1dss != STRTAB_STE_1_S1DSS_TERMINATE;
/*
* Do not allow any ASID to be changed while are working on the STE,
@@ -3285,7 +3307,7 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
* If the CD table is not in use we can use the provided STE, otherwise
* we use a cdtable STE with the provided S1DSS.
*/
- if (arm_smmu_ssids_in_use(&master->cd_table)) {
+ if (ats_always_on || arm_smmu_ssids_in_use(&master->cd_table)) {
/*
* If a CD table has to be present then we need to run with ATS
* on because we have to assume a PASID is using ATS. For
@@ -3581,6 +3603,40 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master)
kfree(master->streams);
}
+static int arm_smmu_master_prepare_ats(struct arm_smmu_master *master)
+{
+ bool s1p = master->smmu->features & ARM_SMMU_FEAT_TRANS_S1;
+ unsigned int stu = __ffs(master->smmu->pgsize_bitmap);
+ struct pci_dev *pdev = to_pci_dev(master->dev);
+ int ret;
+
+ if (!arm_smmu_ats_supported(master))
+ return 0;
+
+ if (!pci_ats_always_on(pdev))
+ goto out_prepare;
+
+ /*
+ * S1DSS is required for ATS to be always on for identity domain cases.
+ * However, the S1DSS field is ignored if !IDR0_S1P or !IDR1_SSIDSIZE.
+ */
+ if (!s1p || !master->smmu->ssid_bits) {
+ dev_info_once(master->dev,
+ "SMMU doesn't support ATS to be always on\n");
+ goto out_prepare;
+ }
+
+ master->ats_always_on = true;
+
+ ret = arm_smmu_alloc_cd_tables(master);
+ if (ret)
+ return ret;
+
+out_prepare:
+ pci_prepare_ats(pdev, stu);
+ return 0;
+}
+
static struct iommu_device *arm_smmu_probe_device(struct device *dev)
{
int ret;
@@ -3629,14 +3685,14 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
master->stall_enabled = true;
- if (dev_is_pci(dev)) {
- unsigned int stu = __ffs(smmu->pgsize_bitmap);
-
- pci_prepare_ats(to_pci_dev(dev), stu);
- }
+ ret = arm_smmu_master_prepare_ats(master);
+ if (ret)
+ goto err_disable_pasid;
return &smmu->iommu;
+err_disable_pasid:
+ arm_smmu_disable_pasid(master);
err_free_master:
kfree(master);
return ERR_PTR(ret);
--
2.43.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v3 3/3] iommu/arm-smmu-v3: Allow ATS to be always on
2026-03-06 23:41 ` [PATCH v3 3/3] iommu/arm-smmu-v3: Allow ATS to be always on Nicolin Chen
@ 2026-03-08 20:52 ` Nirmoy Das
0 siblings, 0 replies; 13+ messages in thread
From: Nirmoy Das @ 2026-03-08 20:52 UTC (permalink / raw)
To: Nicolin Chen, jgg, will, robin.murphy, bhelgaas
Cc: joro, praan, baolu.lu, kevin.tian, miko.lenczewski,
linux-arm-kernel, iommu, linux-kernel, linux-pci, dan.j.williams,
jonathan.cameron, vsethi, linux-cxl
On 07.03.26 00:41, Nicolin Chen wrote:
> When a device's default substream attaches to an identity domain, the SMMU
> driver currently sets the device's STE between two modes:
>
> Mode 1: Cfg=Translate, S1DSS=Bypass, EATS=1
> Mode 2: Cfg=bypass (EATS is ignored by HW)
>
> When there is an active PASID (non-default substream), mode 1 is used. And
> when there is no PASID support or no active PASID, mode 2 is used.
>
> The driver will also downgrade an STE from mode 1 to mode 2, when the last
> active substream becomes inactive.
>
> However, there are PCIe devices that demand ATS to be always on. For these
> devices, their STEs have to use the mode 1 as HW ignores EATS with mode 2.
>
> Change the driver accordingly:
> - always use the mode 1
> - never downgrade to mode 2
> - allocate and retain a CD table (see note below)
>
> Note that these devices might not support PASID, i.e. doing non-PASID ATS.
> In such a case, the ssid_bits is set to 0. However, s1cdmax must be set to
> a !0 value in order to keep the S1DSS field effective. Thus, when a master
> requires ats_always_on, set its s1cdmax to minimal 1, meaning the CD table
> will have a dummy entry (SSID=1) that will be never used.
>
> Now, for these device, arm_smmu_cdtab_allocated() will always return true,
> v.s. false prior to this change. When its default substream is attached to
> an IDENTITY domain, its first CD is NULL in the table, which is a totally
> valid case. Thus, add "!master->ats_always_on" to the condition.
>
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Nirmoy Das <nirmoyd@nvidia.com>
Acked-by: Nirmoy Das <nirmoyd@nvidia.com>
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 72 ++++++++++++++++++---
> 2 files changed, 65 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 3c6d65d36164f..f966d474b61fd 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -848,6 +848,7 @@ struct arm_smmu_master {
> bool ats_enabled : 1;
> bool ste_ats_enabled : 1;
> bool stall_enabled;
> + bool ats_always_on;
> unsigned int ssid_bits;
> unsigned int iopf_refcount;
> };
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 4d00d796f0783..ef98e79b4e75d 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1482,8 +1482,11 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
> if (!arm_smmu_cdtab_allocated(&master->cd_table))
> return;
> cdptr = arm_smmu_get_cd_ptr(master, ssid);
> - if (WARN_ON(!cdptr))
> + if (!cdptr) {
> + /* Only ats_always_on allows a NULL CD on default substream */
> + WARN_ON(!master->ats_always_on || ssid);
> return;
> + }
> arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
> }
>
> @@ -1496,6 +1499,22 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
> struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
>
> cd_table->s1cdmax = master->ssid_bits;
> +
> + /*
> + * When a device doesn't support PASID (non default SSID), ssid_bits is
> + * set to 0. This also sets S1CDMAX to 0, which disables the substreams
> + * and ignores the S1DSS field.
> + *
> + * On the other hand, if a device demands ATS to be always on even when
> + * its default substream is IOMMU bypassed, it has to use EATS that is
> + * only effective with an STE (CFG=S1translate, S1DSS=Bypass). For such
> + * use cases, S1CDMAX has to be !0, in order to make use of S1DSS/EATS.
> + *
> + * Set S1CDMAX no lower than 1. This would add a dummy substream in the
> + * CD table but it should never be used by an actual CD.
> + */
> + if (master->ats_always_on)
> + cd_table->s1cdmax = max_t(u8, cd_table->s1cdmax, 1);
> max_contexts = 1 << cd_table->s1cdmax;
>
> if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
> @@ -3250,7 +3269,8 @@ static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
> * When the last user of the CD table goes away downgrade the STE back
> * to a non-cd_table one, by re-attaching its sid_domain.
> */
> - if (!arm_smmu_ssids_in_use(&master->cd_table)) {
> + if (!master->ats_always_on &&
> + !arm_smmu_ssids_in_use(&master->cd_table)) {
> struct iommu_domain *sid_domain =
> iommu_driver_get_domain_for_dev(master->dev);
>
> @@ -3274,6 +3294,8 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
> .old_domain = old_domain,
> .ssid = IOMMU_NO_PASID,
> };
> + bool ats_always_on = master->ats_always_on &&
> + s1dss != STRTAB_STE_1_S1DSS_TERMINATE;
>
> /*
> * Do not allow any ASID to be changed while are working on the STE,
> @@ -3285,7 +3307,7 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
> * If the CD table is not in use we can use the provided STE, otherwise
> * we use a cdtable STE with the provided S1DSS.
> */
> - if (arm_smmu_ssids_in_use(&master->cd_table)) {
> + if (ats_always_on || arm_smmu_ssids_in_use(&master->cd_table)) {
> /*
> * If a CD table has to be present then we need to run with ATS
> * on because we have to assume a PASID is using ATS. For
> @@ -3581,6 +3603,40 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master)
> kfree(master->streams);
> }
>
> +static int arm_smmu_master_prepare_ats(struct arm_smmu_master *master)
> +{
> + bool s1p = master->smmu->features & ARM_SMMU_FEAT_TRANS_S1;
> + unsigned int stu = __ffs(master->smmu->pgsize_bitmap);
> + struct pci_dev *pdev = to_pci_dev(master->dev);
> + int ret;
> +
> + if (!arm_smmu_ats_supported(master))
> + return 0;
> +
> + if (!pci_ats_always_on(pdev))
> + goto out_prepare;
> +
> + /*
> + * S1DSS is required for ATS to be always on for identity domain cases.
> + * However, the S1DSS field is ignored if !IDR0_S1P or !IDR1_SSIDSIZE.
> + */
> + if (!s1p || !master->smmu->ssid_bits) {
> + dev_info_once(master->dev,
> + "SMMU doesn't support ATS to be always on\n");
> + goto out_prepare;
> + }
> +
> + master->ats_always_on = true;
> +
> + ret = arm_smmu_alloc_cd_tables(master);
> + if (ret)
> + return ret;
> +
> +out_prepare:
> + pci_prepare_ats(pdev, stu);
> + return 0;
> +}
> +
> static struct iommu_device *arm_smmu_probe_device(struct device *dev)
> {
> int ret;
> @@ -3629,14 +3685,14 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
> smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> master->stall_enabled = true;
>
> - if (dev_is_pci(dev)) {
> - unsigned int stu = __ffs(smmu->pgsize_bitmap);
> -
> - pci_prepare_ats(to_pci_dev(dev), stu);
> - }
> + ret = arm_smmu_master_prepare_ats(master);
> + if (ret)
> + goto err_disable_pasid;
>
> return &smmu->iommu;
>
> +err_disable_pasid:
> + arm_smmu_disable_pasid(master);
> err_free_master:
> kfree(master);
> return ERR_PTR(ret);
^ permalink raw reply [flat|nested] 13+ messages in thread