All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mostafa Saleh <smostafa@google.com>
To: Pranjal Shrivastava <praan@google.com>
Cc: iommu@lists.linux.dev, Will Deacon <will@kernel.org>,
	Joerg Roedel <joro@8bytes.org>,
	Robin Murphy <robin.murphy@arm.com>,
	Jason Gunthorpe <jgg@ziepe.ca>,
	Nicolin Chen <nicolinc@nvidia.com>,
	Daniel Mentz <danielmentz@google.com>,
	Ashish Mhetre <amhetre@nvidia.com>,
	linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH v8 09/12] iommu/arm-smmu-v3: Implement pm_runtime & system sleep ops
Date: Mon, 15 Jun 2026 18:20:27 +0000	[thread overview]
Message-ID: <ajBCazla1VoJX9Ms@google.com> (raw)
In-Reply-To: <20260601215909.3958732-10-praan@google.com>

On Mon, Jun 01, 2026 at 09:59:06PM +0000, Pranjal Shrivastava wrote:
> Implement pm_runtime and system sleep ops for arm-smmu-v3.
> 
> The suspend callback configures the SMMU to abort new transactions,
> disables the main translation unit and then drains the command queue
> to ensure completion of any in-flight commands. A software gate
> (STOP_FLAG) and synchronization barriers are used to quiesce the command
> submission pipeline and ensure state consistency before power-off.
> 
> To prevent software metadata flags from leaking into physical registers
> or polluting the tracking pointer, a newly introduced bitmask
> (CMDQ_PROD_IDX_MASK) is applied to all register writes and tracking
> updates.
> 
> The resume callback restores the MSI configuration and performs a full
> device reset via `arm_smmu_device_reset` to bring the SMMU back to an
> operational state. The MSIs are cached during the msi_write and are
> restored during the resume operation by using the helper. The STOP_FLAG
> is cleared only after the CMDQ is enabled in hardware.
> 
> Suggested-by: Daniel Mentz <danielmentz@google.com>
> Signed-off-by: Pranjal Shrivastava <praan@google.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 172 +++++++++++++++++++-
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  18 ++
>  2 files changed, 188 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index d31e50b64b50..542de3a3173a 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -28,6 +28,7 @@
>  #include <linux/platform_device.h>
>  #include <linux/sort.h>
>  #include <linux/string_choices.h>
> +#include <linux/pm_runtime.h>
>  #include <kunit/visibility.h>
>  #include <uapi/linux/iommufd.h>
>  
> @@ -110,6 +111,40 @@ static const char * const event_class_str[] = {
>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
>  static bool arm_smmu_ats_supported(struct arm_smmu_master *master);
>  
> +/* Runtime PM helpers */
> +__maybe_unused static int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
> +{
> +	int ret;
> +
> +	if (!pm_runtime_enabled(smmu->dev))
> +		return 0;
> +
> +	ret = pm_runtime_resume_and_get(smmu->dev);
> +	if (ret < 0) {
> +		dev_err(smmu->dev, "failed to resume device: %d\n", ret);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +__maybe_unused static void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
> +{
> +	int ret;
> +
> +	if (!pm_runtime_enabled(smmu->dev))
> +		return;
> +
> +	ret = pm_runtime_put_autosuspend(smmu->dev);
> +	if (ret < 0)
> +		dev_err(smmu->dev, "failed to suspend device: %d\n", ret);
> +}
> +
> +static inline u32 arm_smmu_cmdq_owner_prod_idx(struct arm_smmu_cmdq *cmdq)
> +{
> +	return atomic_read(&cmdq->owner_prod) & CMDQ_PROD_IDX_MASK;
> +}
> +
>  static void parse_driver_options(struct arm_smmu_device *smmu)
>  {
>  	int i = 0;
> @@ -789,7 +824,8 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
>  		/* b. Stop gathering work by clearing the owned flag */
>  		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
>  						   &cmdq->q.llq.atomic.prod);
> -		prod &= ~CMDQ_PROD_OWNED_FLAG;
> +		/* Strip all metadata flags */
> +		prod &= CMDQ_PROD_IDX_MASK;
>  
>  		/*
>  		 * c. Wait for any gathered work to be written to the queue.
> @@ -4828,7 +4864,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
>  
>  	/* Command queue */
>  	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
> -	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
> +	writel_relaxed(smmu->cmdq.q.llq.prod & CMDQ_PROD_IDX_MASK,
> +		       smmu->base + ARM_SMMU_CMDQ_PROD);
>  	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
>  
>  	enables = CR0_CMDQEN;
> @@ -4839,6 +4876,10 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
>  		return ret;
>  	}
>  
> +	/* Clear any flags from the previous life */
> +	atomic_andnot(CMDQ_PROD_STOP_FLAG, &smmu->cmdq.owner_prod);
> +	atomic_andnot(CMDQ_PROD_STOP_FLAG, &smmu->cmdq.q.llq.atomic.prod);

Should not that be done from the suspend call?

> +
>  	/* Invalidate any cached configuration */
>  	arm_smmu_cmdq_issue_cmd_with_sync(smmu, arm_smmu_make_cmd_cfgi_all());
>  
> @@ -4898,6 +4939,21 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
>  	if (is_kdump_kernel())
>  		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
>  
> +	/*
> +	 * While the SMMU was suspended, concurrent CPU threads may have
> +	 * updated in-memory structures (such as STEs, CDs, and PTEs).
> +	 * Any invalidations corresponding to those updates were safely
> +	 * elided because the command queue was stopped (STOP_FLAG == 1).
> +	 *
> +	 * Since the reset invalidate-all commands above have fully cleared
> +	 * the HW TLBs and config caches, the SMMU will fetch these descriptors
> +	 * directly from RAM as soon as translation is enabled.
> +	 *
> +	 * Add a memory barrier to collect all prior RAM writes to ensure the
> +	 * SMMU sees a consistent view of memory before translation is enabled.
> +	 */
> +	smp_mb();

Should not that be dma_wmb() as this is syncing with the HW?

> +
>  	/* Enable the SMMU interface */
>  	enables |= CR0_SMMUEN;
>  	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
> @@ -5580,6 +5636,117 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev)
>  	arm_smmu_device_disable(smmu);
>  }
>  
> +static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
> +{
> +	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
> +	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
> +	int timeout = ARM_SMMU_SUSPEND_TIMEOUT_US;
> +	u32 enables, target;
> +	int ret;
> +
> +	/* Abort all transactions before disable to avoid spurious bypass */
> +	arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
> +
> +	/* Disable the SMMU via CR0.EN and all queues except CMDQ */
> +	enables = CR0_CMDQEN;
> +	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
> +	if (ret) {
> +		dev_err(smmu->dev, "failed to disable SMMU\n");
> +		return ret;
> +	}
> +
> +	/*
> +	 * At this point the SMMU is completely disabled and won't access
> +	 * any translation/config structures, even speculative accesses
> +	 * aren't performed as per the IHI0070 spec (section 6.3.9.6).
> +	 */
> +
> +	/* Mark the CMDQ to stop and get the target index before the stop */
> +	target = atomic_fetch_or_relaxed(CMDQ_PROD_STOP_FLAG, &cmdq->q.llq.atomic.prod);

As Daniel mentioned, I think this shouldn't be relaxed.

> +	target &= CMDQ_PROD_IDX_MASK;
> +
> +
> +	/* Wait for the last committed owner to reach the hardware */
> +	while ((arm_smmu_cmdq_owner_prod_idx(cmdq) != target) && --timeout)
> +		udelay(1);

I think --timeout has an off-by-one.

Thanks,
Mostafa


  parent reply	other threads:[~2026-06-15 18:20 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-01 21:58 [PATCH v8 00/10] iommu/arm-smmu-v3: Implement Runtime/System Sleep ops Pranjal Shrivastava
2026-06-01 21:58 ` [PATCH v8 01/12] iommu/arm-smmu-v3: Refactor arm_smmu_setup_irqs Pranjal Shrivastava
2026-06-01 21:58 ` [PATCH v8 02/12] iommu/arm-smmu-v3: Add a helper to drain cmd queues Pranjal Shrivastava
2026-06-02  0:12   ` Nicolin Chen
2026-06-02  3:28     ` Pranjal Shrivastava
2026-06-02  5:21   ` Daniel Mentz
2026-06-01 21:59 ` [PATCH v8 03/12] iommu/tegra241-cmdqv: Add a helper to drain VCMDQs Pranjal Shrivastava
2026-06-15 16:58   ` Mostafa Saleh
2026-06-01 21:59 ` [PATCH v8 04/12] iommu/tegra241-cmdqv: Restore PROD and CONS after resume Pranjal Shrivastava
2026-06-15 17:01   ` Mostafa Saleh
2026-06-15 17:43     ` Pranjal Shrivastava
2026-06-01 21:59 ` [PATCH v8 05/12] iommu/arm-smmu-v3: Cache and restore MSI config Pranjal Shrivastava
2026-06-15 17:04   ` Mostafa Saleh
2026-06-01 21:59 ` [PATCH v8 06/12] iommu/arm-smmu-v3: Handle gerror during suspend Pranjal Shrivastava
2026-06-02  0:15   ` Nicolin Chen
2026-06-02  3:31     ` Pranjal Shrivastava
2026-06-01 21:59 ` [PATCH v8 07/12] iommu/arm-smmu-v3: Add CMDQ_PROD_STOP_FLAG to gate CMDQ submissions Pranjal Shrivastava
2026-06-07 21:42   ` Daniel Mentz
2026-06-08  6:19     ` Pranjal Shrivastava
2026-06-09  4:20       ` Daniel Mentz
2026-06-09 10:05         ` Pranjal Shrivastava
2026-06-09 18:20           ` Daniel Mentz
2026-06-09 18:58             ` Pranjal Shrivastava
2026-06-10 17:37               ` Daniel Mentz
2026-06-01 21:59 ` [PATCH v8 08/12] iommu/tegra241-cmdqv: Add a helper to quiesce VCMDQs Pranjal Shrivastava
2026-06-02  0:14   ` Nicolin Chen
2026-06-02  3:37     ` Pranjal Shrivastava
2026-06-02  5:59       ` Nicolin Chen
2026-06-02  6:21         ` Pranjal Shrivastava
2026-06-02  6:29           ` Nicolin Chen
2026-06-01 21:59 ` [PATCH v8 09/12] iommu/arm-smmu-v3: Implement pm_runtime & system sleep ops Pranjal Shrivastava
2026-06-02  5:25   ` Daniel Mentz
2026-06-02 12:12     ` Pranjal Shrivastava
2026-06-07 22:36       ` Daniel Mentz
2026-06-02 15:27   ` Daniel Mentz
2026-06-09 10:13     ` Pranjal Shrivastava
2026-06-07 21:53   ` Daniel Mentz
2026-06-09 10:12     ` Pranjal Shrivastava
2026-06-07 22:30   ` Daniel Mentz
2026-06-09 10:09     ` Pranjal Shrivastava
2026-06-15 18:20   ` Mostafa Saleh [this message]
2026-06-15 19:44     ` Pranjal Shrivastava
2026-06-01 21:59 ` [PATCH v8 10/12] iommu/arm-smmu-v3: Enable pm_runtime and setup devlinks Pranjal Shrivastava
2026-06-01 21:59 ` [PATCH v8 11/12] iommu/arm-smmu-v3: Invoke pm_runtime before hw access Pranjal Shrivastava
2026-06-02  0:24   ` Nicolin Chen
2026-06-02  3:59     ` Pranjal Shrivastava
2026-06-02  5:51       ` Nicolin Chen
2026-06-02  6:24         ` Pranjal Shrivastava
2026-06-03 20:28   ` Daniel Mentz
2026-06-04  6:27     ` Pranjal Shrivastava
2026-06-07 22:22       ` Daniel Mentz
2026-06-09 10:34         ` Pranjal Shrivastava
2026-06-09 18:34           ` Daniel Mentz
2026-06-09 20:11             ` Pranjal Shrivastava
2026-06-10 17:13               ` Daniel Mentz
2026-06-01 21:59 ` [PATCH v8 12/12] iommu/arm-smmu-v3: Add KUnit unit tests for Runtime PM Pranjal Shrivastava
2026-06-02  6:03 ` [PATCH v8 00/10] iommu/arm-smmu-v3: Implement Runtime/System Sleep ops Nicolin Chen
2026-06-02 12:04   ` Pranjal Shrivastava

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ajBCazla1VoJX9Ms@google.com \
    --to=smostafa@google.com \
    --cc=amhetre@nvidia.com \
    --cc=danielmentz@google.com \
    --cc=iommu@lists.linux.dev \
    --cc=jgg@ziepe.ca \
    --cc=joro@8bytes.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=nicolinc@nvidia.com \
    --cc=praan@google.com \
    --cc=robin.murphy@arm.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.