* [PATCH v2] iommu/arm-smmu: Use pm_runtime in fault handlers
@ 2026-03-13 10:23 Prakash Gupta
2026-04-03 2:20 ` Pranjal Shrivastava
0 siblings, 1 reply; 2+ messages in thread
From: Prakash Gupta @ 2026-03-13 10:23 UTC (permalink / raw)
To: Will Deacon, Robin Murphy, Joerg Roedel, Rob Clark, Connor Abbott
Cc: linux-arm-msm, linux-arm-kernel, iommu, linux-kernel,
Akhil P Oommen, Pranjal Shrivastava, Pratyush Brahma,
Prakash Gupta
Commit d4a44f0750bb ("iommu/arm-smmu: Invoke pm_runtime across the driver")
enabled pm_runtime for the arm-smmu device. On systems where the SMMU
sits in a power domain, all register accesses must be done while the
device is runtime active to avoid unclocked register reads and
potential NoC errors.
So far, this has not been an issue for most SMMU clients because
stall-on-fault is enabled by default. While a translation fault is
being handled, the SMMU stalls further translations for that context
bank, so the fault handler would not race with a powered-down
SMMU.
Adreno SMMU now disables stall-on-fault in the presence of fault
storms to avoid saturating SMMU resources and hanging the GMU. With
stall-on-fault disabled, the SMMU can generate faults while its power
domain may no longer be enabled, which makes unclocked accesses to
fault-status registers in the SMMU fault handlers possible.
Guard the context and global fault handlers with pm_runtime_get_if_active()
and pm_runtime_put_autosuspend() so that all SMMU fault register accesses
are done with the SMMU powered. In case pm_runtime is not active we can
safely ignore the fault as for pm runtime resume the smmu device is
reset and fault registers are cleared.
Fixes: b13044092c1e ("drm/msm: Temporarily disable stall-on-fault after a page fault")
Co-developed-by: Pratyush Brahma <pratyush.brahma@oss.qualcomm.com>
Signed-off-by: Pratyush Brahma <pratyush.brahma@oss.qualcomm.com>
Signed-off-by: Prakash Gupta <prakash.gupta@oss.qualcomm.com>
---
Changes in v2:
- Switched from arm_smmu_rpm_get()/arm_smmu_rpm_put() wrappers to
pm_runtime_get_if_active()/pm_runtime_put_autosuspend() APIs
- Added support for smmu->impl->global_fault callback in global fault handler
- Remove threaded irq context fault restriction to allow modifying stall
mode for adreno smmu
- Link to v1: https://patch.msgid.link/20260127-smmu-rpm-v1-1-2ef2f4c85305@oss.qualcomm.com
---
drivers/iommu/arm/arm-smmu/arm-smmu.c | 60 +++++++++++++++++++++++------------
1 file changed, 39 insertions(+), 21 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 5e690cf85ec9..f4c46491a03d 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -462,10 +462,20 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
int idx = smmu_domain->cfg.cbndx;
int ret;
+ if (!pm_runtime_get_if_active(smmu->dev))
+ return IRQ_NONE;
+
+ if (smmu->impl && smmu->impl->context_fault) {
+ ret = smmu->impl->context_fault(irq, dev);
+ goto out_power_off;
+ }
+
arm_smmu_read_context_fault_info(smmu, idx, &cfi);
- if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT))
- return IRQ_NONE;
+ if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) {
+ ret = IRQ_NONE;
+ goto out_power_off;
+ }
ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
@@ -480,7 +490,12 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE);
}
- return IRQ_HANDLED;
+ ret = IRQ_HANDLED;
+
+out_power_off:
+ pm_runtime_put_autosuspend(smmu->dev);
+
+ return ret;
}
static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
@@ -489,14 +504,25 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
struct arm_smmu_device *smmu = dev;
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
+ int ret;
+
+ if (!pm_runtime_get_if_active(smmu->dev))
+ return IRQ_NONE;
+
+ if (smmu->impl && smmu->impl->global_fault) {
+ ret = smmu->impl->global_fault(irq, dev);
+ goto out_power_off;
+ }
gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
- if (!gfsr)
- return IRQ_NONE;
+ if (!gfsr) {
+ ret = IRQ_NONE;
+ goto out_power_off;
+ }
if (__ratelimit(&rs)) {
if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
@@ -513,7 +539,11 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
}
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
- return IRQ_HANDLED;
+ ret = IRQ_HANDLED;
+
+out_power_off:
+ pm_runtime_put_autosuspend(smmu->dev);
+ return ret;
}
static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
@@ -683,7 +713,6 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
enum io_pgtable_fmt fmt;
struct iommu_domain *domain = &smmu_domain->domain;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- irqreturn_t (*context_fault)(int irq, void *dev);
mutex_lock(&smmu_domain->init_mutex);
if (smmu_domain->smmu)
@@ -850,19 +879,14 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
*/
irq = smmu->irqs[cfg->irptndx];
- if (smmu->impl && smmu->impl->context_fault)
- context_fault = smmu->impl->context_fault;
- else
- context_fault = arm_smmu_context_fault;
-
if (smmu->impl && smmu->impl->context_fault_needs_threaded_irq)
ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
- context_fault,
+ arm_smmu_context_fault,
IRQF_ONESHOT | IRQF_SHARED,
"arm-smmu-context-fault",
smmu_domain);
else
- ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED,
+ ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault, IRQF_SHARED,
"arm-smmu-context-fault", smmu_domain);
if (ret < 0) {
@@ -2125,7 +2149,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
int num_irqs, i, err;
u32 global_irqs, pmu_irqs;
- irqreturn_t (*global_fault)(int irq, void *dev);
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
if (!smmu) {
@@ -2205,18 +2228,13 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
smmu->num_context_irqs = smmu->num_context_banks;
}
- if (smmu->impl && smmu->impl->global_fault)
- global_fault = smmu->impl->global_fault;
- else
- global_fault = arm_smmu_global_fault;
-
for (i = 0; i < global_irqs; i++) {
int irq = platform_get_irq(pdev, i);
if (irq < 0)
return irq;
- err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
+ err = devm_request_irq(dev, irq, arm_smmu_global_fault, IRQF_SHARED,
"arm-smmu global fault", smmu);
if (err)
return dev_err_probe(dev, err,
---
base-commit: fcb70a56f4d81450114034b2c61f48ce7444a0e2
change-id: 20251208-smmu-rpm-8bd67db93dca
Best regards,
--
Prakash Gupta <prakash.gupta@oss.qualcomm.com>
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH v2] iommu/arm-smmu: Use pm_runtime in fault handlers
2026-03-13 10:23 [PATCH v2] iommu/arm-smmu: Use pm_runtime in fault handlers Prakash Gupta
@ 2026-04-03 2:20 ` Pranjal Shrivastava
0 siblings, 0 replies; 2+ messages in thread
From: Pranjal Shrivastava @ 2026-04-03 2:20 UTC (permalink / raw)
To: Prakash Gupta
Cc: Will Deacon, Robin Murphy, Joerg Roedel, Rob Clark, Connor Abbott,
linux-arm-msm, linux-arm-kernel, iommu, linux-kernel,
Akhil P Oommen, Pratyush Brahma
On Fri, Mar 13, 2026 at 03:53:53PM +0530, Prakash Gupta wrote:
> Commit d4a44f0750bb ("iommu/arm-smmu: Invoke pm_runtime across the driver")
> enabled pm_runtime for the arm-smmu device. On systems where the SMMU
> sits in a power domain, all register accesses must be done while the
> device is runtime active to avoid unclocked register reads and
> potential NoC errors.
>
> So far, this has not been an issue for most SMMU clients because
> stall-on-fault is enabled by default. While a translation fault is
> being handled, the SMMU stalls further translations for that context
> bank, so the fault handler would not race with a powered-down
> SMMU.
>
> Adreno SMMU now disables stall-on-fault in the presence of fault
> storms to avoid saturating SMMU resources and hanging the GMU. With
> stall-on-fault disabled, the SMMU can generate faults while its power
> domain may no longer be enabled, which makes unclocked accesses to
> fault-status registers in the SMMU fault handlers possible.
>
> Guard the context and global fault handlers with pm_runtime_get_if_active()
> and pm_runtime_put_autosuspend() so that all SMMU fault register accesses
> are done with the SMMU powered. In case pm_runtime is not active we can
> safely ignore the fault as for pm runtime resume the smmu device is
> reset and fault registers are cleared.
>
> Fixes: b13044092c1e ("drm/msm: Temporarily disable stall-on-fault after a page fault")
> Co-developed-by: Pratyush Brahma <pratyush.brahma@oss.qualcomm.com>
> Signed-off-by: Pratyush Brahma <pratyush.brahma@oss.qualcomm.com>
> Signed-off-by: Prakash Gupta <prakash.gupta@oss.qualcomm.com>
> ---
> Changes in v2:
> - Switched from arm_smmu_rpm_get()/arm_smmu_rpm_put() wrappers to
> pm_runtime_get_if_active()/pm_runtime_put_autosuspend() APIs
> - Added support for smmu->impl->global_fault callback in global fault handler
> - Remove threaded irq context fault restriction to allow modifying stall
> mode for adreno smmu
> - Link to v1: https://patch.msgid.link/20260127-smmu-rpm-v1-1-2ef2f4c85305@oss.qualcomm.com
> ---
> drivers/iommu/arm/arm-smmu/arm-smmu.c | 60 +++++++++++++++++++++++------------
> 1 file changed, 39 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 5e690cf85ec9..f4c46491a03d 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -462,10 +462,20 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
> int idx = smmu_domain->cfg.cbndx;
> int ret;
>
> + if (!pm_runtime_get_if_active(smmu->dev))
Note that the pm_runtime_get_if_active(dev) only returns a positive
value if the device state is exactly RPM_ACTIVE. If the device is in the
middle of its runtime_suspend callback, its state is RPM_SUSPENDING.
Thus, if a fault races with the suspend callback, we'll return IRQ_NONE
and the suspend callback doesn't seem to be disabling interrupts.
This isn't any better if we're in a fault-storm caused by
level-triggered interrupts, we'd simply keep entering this handler and
return.
I believe we should clear / handle any pending faults/interrupts or
atleast mask interrupt in the suspend handler to avoid this situation.
> + return IRQ_NONE;
> +
Maybe we could add another wrapped-helper to maintain consistency:
static inline int arm_smmu_rpm_get_if_active(struct arm_smmu_device *smmu)
{
if (!pm_runtime_enabled(smmu->dev))
return 1; // Assume active/powered if RPM is not used
return pm_runtime_get_if_active(smmu->dev);
}
This returns -EINVAL otherwise which isn't a problem for the if
condition but slightly cleaner.
> + if (smmu->impl && smmu->impl->context_fault) {
> + ret = smmu->impl->context_fault(irq, dev);
> + goto out_power_off;
> + }
> +
We've moved impl-specific handlers here, I don't see a functional change.
This looks fine.
> arm_smmu_read_context_fault_info(smmu, idx, &cfi);
>
> - if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT))
> - return IRQ_NONE;
> + if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) {
> + ret = IRQ_NONE;
> + goto out_power_off;
> + }
>
> ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> @@ -480,7 +490,12 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
> ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE);
> }
>
> - return IRQ_HANDLED;
> + ret = IRQ_HANDLED;
> +
> +out_power_off:
> + pm_runtime_put_autosuspend(smmu->dev);
Nit: Please use arm_smmu_rpm_put() here.. while at it, I guess we can
also bring back pm_runtime_put_autosuspend() in arm_smmu_rpm_put() since
it is updated now to also mark last busy.
> +
> + return ret;
> }
>
> static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
> @@ -489,14 +504,25 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
> struct arm_smmu_device *smmu = dev;
> static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
> DEFAULT_RATELIMIT_BURST);
> + int ret;
> +
> + if (!pm_runtime_get_if_active(smmu->dev))
> + return IRQ_NONE;
> +
Same here.
> + if (smmu->impl && smmu->impl->global_fault) {
> + ret = smmu->impl->global_fault(irq, dev);
> + goto out_power_off;
> + }
>
> gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
> gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
> gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
> gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
>
> - if (!gfsr)
> - return IRQ_NONE;
> + if (!gfsr) {
> + ret = IRQ_NONE;
> + goto out_power_off;
> + }
>
> if (__ratelimit(&rs)) {
> if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
> @@ -513,7 +539,11 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
> }
>
> arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
> - return IRQ_HANDLED;
> + ret = IRQ_HANDLED;
> +
> +out_power_off:
> + pm_runtime_put_autosuspend(smmu->dev);
> + return ret;
> }
>
> static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
> @@ -683,7 +713,6 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
> enum io_pgtable_fmt fmt;
> struct iommu_domain *domain = &smmu_domain->domain;
> struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> - irqreturn_t (*context_fault)(int irq, void *dev);
>
> mutex_lock(&smmu_domain->init_mutex);
> if (smmu_domain->smmu)
> @@ -850,19 +879,14 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
> */
> irq = smmu->irqs[cfg->irptndx];
>
> - if (smmu->impl && smmu->impl->context_fault)
> - context_fault = smmu->impl->context_fault;
> - else
> - context_fault = arm_smmu_context_fault;
> -
> if (smmu->impl && smmu->impl->context_fault_needs_threaded_irq)
> ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
> - context_fault,
> + arm_smmu_context_fault,
> IRQF_ONESHOT | IRQF_SHARED,
> "arm-smmu-context-fault",
> smmu_domain);
> else
> - ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED,
> + ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault, IRQF_SHARED,
> "arm-smmu-context-fault", smmu_domain);
>
> if (ret < 0) {
> @@ -2125,7 +2149,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
> struct device *dev = &pdev->dev;
> int num_irqs, i, err;
> u32 global_irqs, pmu_irqs;
> - irqreturn_t (*global_fault)(int irq, void *dev);
>
> smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
> if (!smmu) {
> @@ -2205,18 +2228,13 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
> smmu->num_context_irqs = smmu->num_context_banks;
> }
>
> - if (smmu->impl && smmu->impl->global_fault)
> - global_fault = smmu->impl->global_fault;
> - else
> - global_fault = arm_smmu_global_fault;
> -
> for (i = 0; i < global_irqs; i++) {
> int irq = platform_get_irq(pdev, i);
>
> if (irq < 0)
> return irq;
>
> - err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
> + err = devm_request_irq(dev, irq, arm_smmu_global_fault, IRQF_SHARED,
> "arm-smmu global fault", smmu);
> if (err)
> return dev_err_probe(dev, err,
>
Thanks,
Praan
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-04-03 2:20 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-13 10:23 [PATCH v2] iommu/arm-smmu: Use pm_runtime in fault handlers Prakash Gupta
2026-04-03 2:20 ` Pranjal Shrivastava
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox