* [PATCH v6 1/3] PCI: Add pci_ats_required() for CXL.cache capable devices
2026-05-21 20:34 [PATCH v6 0/3] Allow ATS to be always on for certain ATS-capable devices Nicolin Chen
@ 2026-05-21 20:34 ` Nicolin Chen
2026-05-21 20:57 ` Bjorn Helgaas
2026-05-21 20:34 ` [PATCH v6 2/3] PCI: Allow ATS to be always on for pre-CXL devices Nicolin Chen
2026-05-21 20:34 ` [PATCH v6 3/3] iommu/arm-smmu-v3: Allow ATS to be always on Nicolin Chen
2 siblings, 1 reply; 8+ messages in thread
From: Nicolin Chen @ 2026-05-21 20:34 UTC (permalink / raw)
To: jgg, will
Cc: robin.murphy, joro, bhelgaas, praan, baolu.lu, kevin.tian,
miko.lenczewski, linux-arm-kernel, iommu, linux-kernel, linux-pci,
dan.j.williams, jonathan.cameron, vsethi, linux-cxl, nirmoyd
Controlled by IOMMU drivers, ATS can be enabled "on demand", when a given
PASID on a device is attached to an I/O page table. This is working, even
when a device has no translation on its RID (i.e., RID is IOMMU bypassed).
However, certain PCIe devices require non-PASID ATS on their RID even when
the RID is IOMMU bypassed. Call this "ATS always on" in IOMMU term.
For example, CXL spec r4.0 notes in sec 3.2.5.13 Memory Type on CXL.cache:
"To source requests on CXL.cache, devices need to get the Host Physical
Address (HPA) from the Host by means of an ATS request on CXL.io."
In other words, the CXL.cache capability requires ATS; otherwise, it can't
access host physical memory.
Introduce a new pci_ats_required() helper for the IOMMU driver to scan a
PCI device and shift ATS policies between "on demand" and "always on".
Add the support for CXL.cache devices first. Pre-CXL devices will be added
in quirks.c file.
Note that pci_ats_required() validates against pci_ats_supported(), so we
ensure that untrusted devices (e.g. external ports) will not be always on.
This maintains the existing ATS security policy regarding potential side-
channel attacks via ATS.
Cc: linux-cxl@vger.kernel.org
Suggested-by: Vikram Sethi <vsethi@nvidia.com>
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Tested-by: Nirmoy Das <nirmoyd@nvidia.com>
Acked-by: Nirmoy Das <nirmoyd@nvidia.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
include/linux/pci-ats.h | 3 +++
include/uapi/linux/pci_regs.h | 1 +
drivers/pci/ats.c | 46 +++++++++++++++++++++++++++++++++++
3 files changed, 50 insertions(+)
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 75c6c86cf09dc..f3723b6861294 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -12,6 +12,7 @@ int pci_prepare_ats(struct pci_dev *dev, int ps);
void pci_disable_ats(struct pci_dev *dev);
int pci_ats_queue_depth(struct pci_dev *dev);
int pci_ats_page_aligned(struct pci_dev *dev);
+bool pci_ats_required(struct pci_dev *dev);
#else /* CONFIG_PCI_ATS */
static inline bool pci_ats_supported(struct pci_dev *d)
{ return false; }
@@ -24,6 +25,8 @@ static inline int pci_ats_queue_depth(struct pci_dev *d)
{ return -ENODEV; }
static inline int pci_ats_page_aligned(struct pci_dev *dev)
{ return 0; }
+static inline bool pci_ats_required(struct pci_dev *dev)
+{ return false; }
#endif /* CONFIG_PCI_ATS */
#ifdef CONFIG_PCI_PRI
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 14f634ab9350d..6ac45be1008b8 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1349,6 +1349,7 @@
/* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */
#define PCI_DVSEC_CXL_DEVICE 0
#define PCI_DVSEC_CXL_CAP 0xA
+#define PCI_DVSEC_CXL_CACHE_CAPABLE _BITUL(0)
#define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2)
#define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4)
#define PCI_DVSEC_CXL_CTRL 0xC
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index ec6c8dbdc5e9c..84cd06d74fc9c 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -205,6 +205,52 @@ int pci_ats_page_aligned(struct pci_dev *pdev)
return 0;
}
+/*
+ * CXL r4.0, sec 3.2.5.13 Memory Type on CXL.cache notes: to source requests on
+ * CXL.cache, devices need to get the Host Physical Address (HPA) from the Host
+ * by means of an ATS request on CXL.io.
+ *
+ * In other words, CXL.cache devices cannot access host physical memory without
+ * ATS.
+ *
+ * Check Cache_Capable instead of Cache_Enable because CXL.cache may be enabled
+ * after the caller uses this to make its ATS decision.
+ */
+static bool pci_cxl_ats_required(struct pci_dev *pdev)
+{
+ int offset;
+ u16 cap;
+
+ offset = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+ PCI_DVSEC_CXL_DEVICE);
+ if (!offset)
+ return false;
+
+ if (pci_read_config_word(pdev, offset + PCI_DVSEC_CXL_CAP, &cap))
+ return false;
+
+ return cap & PCI_DVSEC_CXL_CACHE_CAPABLE;
+}
+
+/**
+ * pci_ats_required - Whether the PCI device requires ATS
+ * @pdev: the PCI device
+ *
+ * Returns true, if the PCI device requires ATS for basic functional operation.
+ */
+bool pci_ats_required(struct pci_dev *pdev)
+{
+ if (!pci_ats_supported(pdev))
+ return false;
+
+ /* A VF inherits its PF's requirement for ATS function */
+ if (pdev->is_virtfn)
+ pdev = pci_physfn(pdev);
+
+ return pci_cxl_ats_required(pdev);
+}
+EXPORT_SYMBOL_GPL(pci_ats_required);
+
#ifdef CONFIG_PCI_PRI
void pci_pri_init(struct pci_dev *pdev)
{
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH v6 1/3] PCI: Add pci_ats_required() for CXL.cache capable devices
2026-05-21 20:34 ` [PATCH v6 1/3] PCI: Add pci_ats_required() for CXL.cache capable devices Nicolin Chen
@ 2026-05-21 20:57 ` Bjorn Helgaas
2026-05-21 21:07 ` Nicolin Chen
0 siblings, 1 reply; 8+ messages in thread
From: Bjorn Helgaas @ 2026-05-21 20:57 UTC (permalink / raw)
To: Nicolin Chen
Cc: jgg, will, robin.murphy, joro, bhelgaas, praan, baolu.lu,
kevin.tian, miko.lenczewski, linux-arm-kernel, iommu,
linux-kernel, linux-pci, dan.j.williams, jonathan.cameron, vsethi,
linux-cxl, nirmoyd
On Thu, May 21, 2026 at 01:34:20PM -0700, Nicolin Chen wrote:
> Controlled by IOMMU drivers, ATS can be enabled "on demand", when a given
> PASID on a device is attached to an I/O page table. This is working, even
> when a device has no translation on its RID (i.e., RID is IOMMU bypassed).
>
> However, certain PCIe devices require non-PASID ATS on their RID even when
> the RID is IOMMU bypassed. Call this "ATS always on" in IOMMU term.
>
> For example, CXL spec r4.0 notes in sec 3.2.5.13 Memory Type on CXL.cache:
> "To source requests on CXL.cache, devices need to get the Host Physical
> Address (HPA) from the Host by means of an ATS request on CXL.io."
>
> In other words, the CXL.cache capability requires ATS; otherwise, it can't
> access host physical memory.
>
> Introduce a new pci_ats_required() helper for the IOMMU driver to scan a
> PCI device and shift ATS policies between "on demand" and "always on".
>
> Add the support for CXL.cache devices first. Pre-CXL devices will be added
> in quirks.c file.
>
> Note that pci_ats_required() validates against pci_ats_supported(), so we
> ensure that untrusted devices (e.g. external ports) will not be always on.
> This maintains the existing ATS security policy regarding potential side-
> channel attacks via ATS.
>
> Cc: linux-cxl@vger.kernel.org
> Suggested-by: Vikram Sethi <vsethi@nvidia.com>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> Tested-by: Nirmoy Das <nirmoyd@nvidia.com>
> Acked-by: Nirmoy Das <nirmoyd@nvidia.com>
> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> Acked-by: Bjorn Helgaas <bhelgaas@google.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ...
> +bool pci_ats_required(struct pci_dev *pdev)
> +{
> + if (!pci_ats_supported(pdev))
> + return false;
> +
> + /* A VF inherits its PF's requirement for ATS function */
> + if (pdev->is_virtfn)
> + pdev = pci_physfn(pdev);
> +
> + return pci_cxl_ats_required(pdev);
I acked this before I saw this sashiko feedback, which looks like a
legit issue to me:
Will this VF inheritance logic ever be reached?
According to the PCIe SR-IOV specification (section 9.3.3.1), VFs do
not implement the ATS Extended Capability, which means pdev->ats_cap
is always 0 for VFs.
Because of this, pci_ats_supported(pdev) will unconditionally return
false for any VF. This causes the function to return false before it
can ever reach the pdev->is_virtfn check.
Could this prevent VFs from correctly enabling the ATS always on
feature and leave them unable to access host memory without
triggering IOMMU faults?
(From https://sashiko.dev/#/patchset/cover.1779304390.git.nicolinc%40nvidia.com)
I withdraw my ack for now until we figure out if it's a real issue.
> +}
> +EXPORT_SYMBOL_GPL(pci_ats_required);
> +
> #ifdef CONFIG_PCI_PRI
> void pci_pri_init(struct pci_dev *pdev)
> {
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH v6 1/3] PCI: Add pci_ats_required() for CXL.cache capable devices
2026-05-21 20:57 ` Bjorn Helgaas
@ 2026-05-21 21:07 ` Nicolin Chen
2026-05-21 21:31 ` Bjorn Helgaas
0 siblings, 1 reply; 8+ messages in thread
From: Nicolin Chen @ 2026-05-21 21:07 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: jgg, will, robin.murphy, joro, bhelgaas, praan, baolu.lu,
kevin.tian, miko.lenczewski, linux-arm-kernel, iommu,
linux-kernel, linux-pci, dan.j.williams, jonathan.cameron, vsethi,
linux-cxl, nirmoyd
On Thu, May 21, 2026 at 03:57:23PM -0500, Bjorn Helgaas wrote:
> On Thu, May 21, 2026 at 01:34:20PM -0700, Nicolin Chen wrote:
> > +bool pci_ats_required(struct pci_dev *pdev)
> > +{
> > + if (!pci_ats_supported(pdev))
> > + return false;
> > +
> > + /* A VF inherits its PF's requirement for ATS function */
> > + if (pdev->is_virtfn)
> > + pdev = pci_physfn(pdev);
> > +
> > + return pci_cxl_ats_required(pdev);
>
> I acked this before I saw this sashiko feedback, which looks like a
> legit issue to me:
>
> Will this VF inheritance logic ever be reached?
>
> According to the PCIe SR-IOV specification (section 9.3.3.1), VFs do
> not implement the ATS Extended Capability, which means pdev->ats_cap
> is always 0 for VFs.
>
> Because of this, pci_ats_supported(pdev) will unconditionally return
> false for any VF. This causes the function to return false before it
> can ever reach the pdev->is_virtfn check.
>
> Could this prevent VFs from correctly enabling the ATS always on
> feature and leave them unable to access host memory without
> triggering IOMMU faults?
>
> (From https://sashiko.dev/#/patchset/cover.1779304390.git.nicolinc%40nvidia.com)
>
> I withdraw my ack for now until we figure out if it's a real issue.
I did a bit of research here.
The existing pci_enable_ats() checks:
if (!pci_ats_supported(dev))
return -EINVAL;
at the top, prior to:
if (dev->is_virtfn) {
pdev = pci_physfn(dev);
if (pdev->ats_stu != ps)
return -EINVAL;
So, VF must *support* ATS (its ats_cap must be !0) so as to turn
on ATS, instead of relying on the PF's ats_cap.
IOW, Sashiko's comment "which means pdev->ats_cap is always 0 for
VFs" is wrong.
So, I kept the same order in this new pci_ats_required().
FWIW, I also did an inverted version and asked Sashiko to review:
====================================================================
> @@ -205,6 +205,52 @@ int pci_ats_page_aligned(struct pci_dev *pdev)
[ ... ]
> +bool pci_ats_required(struct pci_dev *pdev)
> +{
> + /* A VF inherits its PF's requirement for ATS function */
> + if (pdev->is_virtfn)
> + pdev = pci_physfn(pdev);
> +
> + if (!pci_ats_supported(pdev))
> + return false;
This isn't a bug, but it looks like these two lines are indented with spaces
instead of tabs.
Also, since pdev is reassigned to pci_physfn(pdev) before checking ATS
support, does this incorrectly check the ATS capability of the PF rather than
the VF?
If a VF has its ATS support explicitly disabled (for example, via PCIe quirks
that set vf->ats_cap = 0), this check might bypass the VF's specific
properties and incorrectly return true based on the PF's capabilities.
Consequently, if a caller attempts to enable ATS on the VF by calling
pci_enable_ats(), it will fail because pci_enable_ats() correctly checks
pci_ats_supported() against the VF.
Would it be better to evaluate pci_ats_supported() on the original pdev
before reassigning it to the PF?
====================================================================
What would you like me to do, Bjorn?
Nicolin
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH v6 1/3] PCI: Add pci_ats_required() for CXL.cache capable devices
2026-05-21 21:07 ` Nicolin Chen
@ 2026-05-21 21:31 ` Bjorn Helgaas
2026-05-21 21:59 ` Nicolin Chen
0 siblings, 1 reply; 8+ messages in thread
From: Bjorn Helgaas @ 2026-05-21 21:31 UTC (permalink / raw)
To: Nicolin Chen
Cc: jgg, will, robin.murphy, joro, bhelgaas, praan, baolu.lu,
kevin.tian, miko.lenczewski, linux-arm-kernel, iommu,
linux-kernel, linux-pci, dan.j.williams, jonathan.cameron, vsethi,
linux-cxl, nirmoyd
On Thu, May 21, 2026 at 02:07:34PM -0700, Nicolin Chen wrote:
> On Thu, May 21, 2026 at 03:57:23PM -0500, Bjorn Helgaas wrote:
> > On Thu, May 21, 2026 at 01:34:20PM -0700, Nicolin Chen wrote:
> > > +bool pci_ats_required(struct pci_dev *pdev)
> > > +{
> > > + if (!pci_ats_supported(pdev))
> > > + return false;
> > > +
> > > + /* A VF inherits its PF's requirement for ATS function */
> > > + if (pdev->is_virtfn)
> > > + pdev = pci_physfn(pdev);
> > > +
> > > + return pci_cxl_ats_required(pdev);
> >
> > I acked this before I saw this sashiko feedback, which looks like a
> > legit issue to me:
> >
> > Will this VF inheritance logic ever be reached?
> >
> > According to the PCIe SR-IOV specification (section 9.3.3.1), VFs do
> > not implement the ATS Extended Capability, which means pdev->ats_cap
> > is always 0 for VFs.
Huh. I wish sashiko would include the spec revision because that sure
looks wrong. In PCIe r7.0, there is no sec 9.3.3.1. In PCIe r6.0,
sec 9.3.3.1 is the SR-IOV Extended Capability, which doesn't mention
ATS. In both, sec 10.5.1 is the ATS Extended Capability and says both
PFs and VFs can implement it.
So I think this is OK as-is:
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH v6 1/3] PCI: Add pci_ats_required() for CXL.cache capable devices
2026-05-21 21:31 ` Bjorn Helgaas
@ 2026-05-21 21:59 ` Nicolin Chen
0 siblings, 0 replies; 8+ messages in thread
From: Nicolin Chen @ 2026-05-21 21:59 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: jgg, will, robin.murphy, joro, bhelgaas, praan, baolu.lu,
kevin.tian, miko.lenczewski, linux-arm-kernel, iommu,
linux-kernel, linux-pci, dan.j.williams, jonathan.cameron, vsethi,
linux-cxl, nirmoyd
On Thu, May 21, 2026 at 04:31:23PM -0500, Bjorn Helgaas wrote:
> On Thu, May 21, 2026 at 02:07:34PM -0700, Nicolin Chen wrote:
> > On Thu, May 21, 2026 at 03:57:23PM -0500, Bjorn Helgaas wrote:
> > > On Thu, May 21, 2026 at 01:34:20PM -0700, Nicolin Chen wrote:
> > > > +bool pci_ats_required(struct pci_dev *pdev)
> > > > +{
> > > > + if (!pci_ats_supported(pdev))
> > > > + return false;
> > > > +
> > > > + /* A VF inherits its PF's requirement for ATS function */
> > > > + if (pdev->is_virtfn)
> > > > + pdev = pci_physfn(pdev);
> > > > +
> > > > + return pci_cxl_ats_required(pdev);
> > >
> > > I acked this before I saw this sashiko feedback, which looks like a
> > > legit issue to me:
> > >
> > > Will this VF inheritance logic ever be reached?
> > >
> > > According to the PCIe SR-IOV specification (section 9.3.3.1), VFs do
> > > not implement the ATS Extended Capability, which means pdev->ats_cap
> > > is always 0 for VFs.
>
> Huh. I wish sashiko would include the spec revision because that sure
> looks wrong. In PCIe r7.0, there is no sec 9.3.3.1. In PCIe r6.0,
> sec 9.3.3.1 is the SR-IOV Extended Capability, which doesn't mention
> ATS. In both, sec 10.5.1 is the ATS Extended Capability and says both
> PFs and VFs can implement it.
I am glad you checked the spec. I should have done the same.
> So I think this is OK as-is:
>
> Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Thanks!
Nicolin
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v6 2/3] PCI: Allow ATS to be always on for pre-CXL devices
2026-05-21 20:34 [PATCH v6 0/3] Allow ATS to be always on for certain ATS-capable devices Nicolin Chen
2026-05-21 20:34 ` [PATCH v6 1/3] PCI: Add pci_ats_required() for CXL.cache capable devices Nicolin Chen
@ 2026-05-21 20:34 ` Nicolin Chen
2026-05-21 20:34 ` [PATCH v6 3/3] iommu/arm-smmu-v3: Allow ATS to be always on Nicolin Chen
2 siblings, 0 replies; 8+ messages in thread
From: Nicolin Chen @ 2026-05-21 20:34 UTC (permalink / raw)
To: jgg, will
Cc: robin.murphy, joro, bhelgaas, praan, baolu.lu, kevin.tian,
miko.lenczewski, linux-arm-kernel, iommu, linux-kernel, linux-pci,
dan.j.williams, jonathan.cameron, vsethi, linux-cxl, nirmoyd
Some NVIDIA GPU/NIC devices, though they don't implement CXL config space,
have many CXL-like properties. Call this kind "pre-CXL".
Similar to CXL.cache capability, these pre-CXL devices also require the ATS
function even when their RIDs are IOMMU bypassed, i.e. keep ATS "always on"
v.s. "on demand" when a non-zero PASID line gets enabled in SVA use cases.
Introduce pci_dev_specific_ats_required() quirk function to scan a list of
IDs for these devices. Then, include it in pci_ats_required().
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Nirmoy Das <nirmoyd@nvidia.com>
Tested-by: Nirmoy Das <nirmoyd@nvidia.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/pci/pci.h | 9 +++++++++
drivers/pci/ats.c | 3 ++-
drivers/pci/quirks.c | 42 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4a14f88e543a2..e8ad27abb1cfe 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1155,6 +1155,15 @@ static inline int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
}
#endif
+#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_PCI_ATS)
+bool pci_dev_specific_ats_required(struct pci_dev *dev);
+#else
+static inline bool pci_dev_specific_ats_required(struct pci_dev *dev)
+{
+ return false;
+}
+#endif
+
#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment,
struct resource *res);
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 84cd06d74fc9c..96efa00d97433 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -247,7 +247,8 @@ bool pci_ats_required(struct pci_dev *pdev)
if (pdev->is_virtfn)
pdev = pci_physfn(pdev);
- return pci_cxl_ats_required(pdev);
+ return pci_cxl_ats_required(pdev) ||
+ pci_dev_specific_ats_required(pdev);
}
EXPORT_SYMBOL_GPL(pci_ats_required);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index caaed1a01dc02..c0242f3e9f063 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -5715,6 +5715,48 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1457, quirk_intel_e2000_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1459, quirk_intel_e2000_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145a, quirk_intel_e2000_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145c, quirk_intel_e2000_no_ats);
+
+static bool quirk_nvidia_gpu_ats_required(struct pci_dev *pdev)
+{
+ switch (pdev->device) {
+ case 0x2e00 ... 0x2e3f: /* GB20B */
+ return true;
+ }
+ return false;
+}
+
+static const struct pci_dev_ats_required {
+ u16 vendor;
+ u16 device;
+ bool (*ats_required)(struct pci_dev *dev);
+} pci_dev_ats_required[] = {
+ /* NVIDIA GPUs */
+ { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, quirk_nvidia_gpu_ats_required },
+ /* NVIDIA CX10 Family NVlink-C2C */
+ { PCI_VENDOR_ID_MELLANOX, 0x2101, NULL },
+ { 0 }
+};
+
+/*
+ * Some NVIDIA devices do not implement CXL config space, but present as PCIe
+ * devices that can issue CXL-like cache operations like CXL.cache. Thus, they
+ * require ATS to obtain host physical addresses, like pci_cxl_ats_required().
+ */
+bool pci_dev_specific_ats_required(struct pci_dev *pdev)
+{
+ const struct pci_dev_ats_required *i;
+
+ for (i = pci_dev_ats_required; i->vendor; i++) {
+ if (i->vendor != pdev->vendor)
+ continue;
+ if (i->ats_required && i->ats_required(pdev))
+ return true;
+ if (!i->ats_required && i->device == pdev->device)
+ return true;
+ }
+
+ return false;
+}
#endif /* CONFIG_PCI_ATS */
/* Freescale PCIe doesn't support MSI in RC mode */
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v6 3/3] iommu/arm-smmu-v3: Allow ATS to be always on
2026-05-21 20:34 [PATCH v6 0/3] Allow ATS to be always on for certain ATS-capable devices Nicolin Chen
2026-05-21 20:34 ` [PATCH v6 1/3] PCI: Add pci_ats_required() for CXL.cache capable devices Nicolin Chen
2026-05-21 20:34 ` [PATCH v6 2/3] PCI: Allow ATS to be always on for pre-CXL devices Nicolin Chen
@ 2026-05-21 20:34 ` Nicolin Chen
2 siblings, 0 replies; 8+ messages in thread
From: Nicolin Chen @ 2026-05-21 20:34 UTC (permalink / raw)
To: jgg, will
Cc: robin.murphy, joro, bhelgaas, praan, baolu.lu, kevin.tian,
miko.lenczewski, linux-arm-kernel, iommu, linux-kernel, linux-pci,
dan.j.williams, jonathan.cameron, vsethi, linux-cxl, nirmoyd
When a device's default substream attaches to an identity domain, the SMMU
driver currently sets the device's STE between two modes:
Mode 1: Cfg=Translate, S1DSS=Bypass, EATS=1
Mode 2: Cfg=bypass (EATS is ignored by HW)
When there is an active PASID (non-default substream), mode 1 is used. And
when there is no PASID support or no active PASID, mode 2 is used.
The driver will also downgrade an STE from mode 1 to mode 2, when the last
active substream becomes inactive.
However, there are PCIe devices that demand ATS to be always on. For these
devices, their STEs have to use the mode 1 as HW ignores EATS with mode 2.
Change the driver accordingly:
- always use the mode 1
- never downgrade to mode 2
- allocate and retain a CD table (see note below)
Note that these devices might not support PASID, i.e. doing non-PASID ATS.
In such a case, the ssid_bits is set to 0. However, s1cdmax must be set to
a !0 value in order to keep the S1DSS field effective. Thus, when a master
requires ats_always_on, set its s1cdmax to at least 1, meaning that the CD
table will have a dummy entry (SSID=1) that will never be used.
Now for these devices, arm_smmu_cdtab_allocated() will always return true,
v.s. false prior to this change. When its default substream is attached to
an IDENTITY domain, its first CD is NULL in the table, which is a totally
valid case. Thus, add "!master->ats_always_on" to the condition.
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Nirmoy Das <nirmoyd@nvidia.com>
Acked-by: Nirmoy Das <nirmoyd@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 81 ++++++++++++++++++---
2 files changed, 73 insertions(+), 9 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index ef42df4753ec4..8c3600f4364c5 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -943,6 +943,7 @@ struct arm_smmu_master {
bool ats_enabled : 1;
bool ste_ats_enabled : 1;
bool stall_enabled;
+ bool ats_always_on;
unsigned int ssid_bits;
unsigned int iopf_refcount;
};
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e8d7dbe495f03..4afdb775e0722 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1742,8 +1742,11 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
if (!arm_smmu_cdtab_allocated(&master->cd_table))
return;
cdptr = arm_smmu_get_cd_ptr(master, ssid);
- if (WARN_ON(!cdptr))
+ if (!cdptr) {
+ /* Only ats_always_on allows a NULL CD on default substream */
+ WARN_ON(!master->ats_always_on || ssid);
return;
+ }
arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
}
@@ -1756,6 +1759,22 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
cd_table->s1cdmax = master->ssid_bits;
+
+ /*
+ * When a device doesn't support PASID (non default SSID), ssid_bits is
+ * set to 0. This also sets S1CDMAX to 0, which disables the substreams
+ * and ignores the S1DSS field.
+ *
+ * On the other hand, if a device demands ATS to be always on even when
+ * its default substream is IOMMU bypassed, it has to use EATS that is
+ * only effective with an STE (CFG=S1translate, S1DSS=Bypass). For such
+ * use cases, S1CDMAX has to be !0, in order to make use of S1DSS/EATS.
+ *
+ * Set S1CDMAX no lower than 1. This would add a dummy substream in the
+ * CD table but it should never be used by an actual CD.
+ */
+ if (master->ats_always_on)
+ cd_table->s1cdmax = max_t(u8, cd_table->s1cdmax, 1);
max_contexts = 1 << cd_table->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
@@ -3854,9 +3873,12 @@ static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
if (!arm_smmu_ssids_in_use(&master->cd_table)) {
struct iommu_domain *sid_domain =
iommu_driver_get_domain_for_dev(master->dev);
+ bool ats_always_on = master->ats_always_on &&
+ sid_domain->type != IOMMU_DOMAIN_BLOCKED;
+ bool downgrade = sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
+ sid_domain->type == IOMMU_DOMAIN_BLOCKED;
- if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
- sid_domain->type == IOMMU_DOMAIN_BLOCKED)
+ if (!ats_always_on && downgrade)
sid_domain->ops->attach_dev(sid_domain, dev,
sid_domain);
}
@@ -3875,6 +3897,8 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
.old_domain = old_domain,
.ssid = IOMMU_NO_PASID,
};
+ bool ats_always_on = master->ats_always_on &&
+ s1dss != STRTAB_STE_1_S1DSS_TERMINATE;
/*
* Do not allow any ASID to be changed while are working on the STE,
@@ -3886,7 +3910,7 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
* If the CD table is not in use we can use the provided STE, otherwise
* we use a cdtable STE with the provided S1DSS.
*/
- if (arm_smmu_ssids_in_use(&master->cd_table)) {
+ if (ats_always_on || arm_smmu_ssids_in_use(&master->cd_table)) {
/*
* If a CD table has to be present then we need to run with ATS
* on because we have to assume a PASID is using ATS. For
@@ -4215,6 +4239,44 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master)
kfree(master->build_invs);
}
+static int arm_smmu_master_prepare_ats(struct arm_smmu_master *master)
+{
+ bool s1p = master->smmu->features & ARM_SMMU_FEAT_TRANS_S1;
+ unsigned int stu = __ffs(master->smmu->pgsize_bitmap);
+ struct pci_dev *pdev;
+ int ret;
+
+ if (!dev_is_pci(master->dev))
+ return 0;
+ pdev = to_pci_dev(master->dev);
+
+ if (!arm_smmu_ats_supported(master)) {
+ if (pci_ats_required(pdev)) {
+ dev_err_once(master->dev, "SMMU doesn't support ATS\n");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+ }
+
+ ret = pci_prepare_ats(pdev, stu);
+ if (ret || !pci_ats_required(pdev))
+ return ret;
+
+ /*
+ * S1DSS is required for ATS to be always on for identity domain cases.
+ * However, the S1DSS field is ignored if !IDR0_S1P or !IDR1_SSIDSIZE.
+ */
+ if (!s1p || !master->smmu->ssid_bits) {
+ dev_err_once(master->dev,
+ "SMMU doesn't support ATS to be always on\n");
+ return -EOPNOTSUPP;
+ }
+
+ master->ats_always_on = true;
+
+ return arm_smmu_alloc_cd_tables(master);
+}
+
static struct iommu_device *arm_smmu_probe_device(struct device *dev)
{
int ret;
@@ -4263,14 +4325,15 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
master->stall_enabled = true;
- if (dev_is_pci(dev)) {
- unsigned int stu = __ffs(smmu->pgsize_bitmap);
-
- pci_prepare_ats(to_pci_dev(dev), stu);
- }
+ ret = arm_smmu_master_prepare_ats(master);
+ if (ret)
+ goto err_disable_pasid;
return &smmu->iommu;
+err_disable_pasid:
+ arm_smmu_disable_pasid(master);
+ arm_smmu_remove_master(master);
err_free_master:
kfree(master);
return ERR_PTR(ret);
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread