From: Pranjal Shrivastava <praan@google.com>
To: Cheng-Yang Chou <yphbchou0911@gmail.com>
Cc: will@kernel.org, robin.murphy@arm.com,
linux-arm-kernel@lists.infradead.org, iommu@lists.linux.dev,
jserv@ccns.ncku.edu.tw
Subject: Re: [PATCH] iommu/arm-smmu-v3: Allocate cmdq_batch on the heap
Date: Wed, 11 Mar 2026 14:22:50 +0000 [thread overview]
Message-ID: <abF6un_jrQ8zGZZ4@google.com> (raw)
In-Reply-To: <20260311094444.3714302-1-yphbchou0911@gmail.com>
On Wed, Mar 11, 2026 at 05:44:44PM +0800, Cheng-Yang Chou wrote:
> The arm_smmu_cmdq_batch structure is large and was being allocated on
> the stack in four call sites, causing stack frame sizes to exceed the
> 1024-byte limit:
>
> - arm_smmu_atc_inv_domain: 1120 bytes
> - arm_smmu_atc_inv_master: 1088 bytes
> - arm_smmu_sync_cd: 1088 bytes
> - __arm_smmu_tlb_inv_range: 1072 bytes
>
> Move these allocations to the heap using kmalloc_obj() and kfree() to
> eliminate the -Wframe-larger-than=1024 warnings and prevent potential
> stack overflows.
>
Thanks for the patch. I agree that we should address these warnings, but
moving these allocations to the heap via kmalloc_obj() in the fast path
is problematic. Introducing heap allocation adds unnecessary latency and
potential for allocation failure in hot paths.
So, yes, we are using a lot of stack but we're using it to do good
things..
IMO, if we really want to address these, instead of kmalloc, we could
potentially consider some pre-allocated per-CPU buffers (that's a lot of
additional book-keeping though) to keep the data off the stack or
something similar following a simple rule: The fast path must be
deterministic- no SLAB allocations and no introducing new failure points
The last thing we'd want is a graphic driver's shrinker calling
dma-unmaps when the system is already under heavy memory pressure and
calling kmalloc leading to a circular dependency or allocation failure
exactly when the system needs to perform the unmap the most.
Thanks,
Praan
> Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 66 +++++++++++++++------
> 1 file changed, 48 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 4d00d796f078..734546dc6a78 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1281,7 +1281,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
> int ssid, bool leaf)
> {
> size_t i;
> - struct arm_smmu_cmdq_batch cmds;
> + struct arm_smmu_cmdq_batch *cmds;
> struct arm_smmu_device *smmu = master->smmu;
> struct arm_smmu_cmdq_ent cmd = {
> .opcode = CMDQ_OP_CFGI_CD,
> @@ -1291,13 +1291,23 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
> },
> };
>
> - arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
> + cmds = kmalloc_obj(*cmds);
> + if (!cmds) {
> + struct arm_smmu_cmdq_ent cmd_all = { .opcode = CMDQ_OP_CFGI_ALL };
> +
> + WARN_ONCE(1, "arm-smmu-v3: failed to allocate cmdq_batch, falling back to full CD invalidation\n");
> + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd_all);
> + return;
> + }
> +
> + arm_smmu_cmdq_batch_init(smmu, cmds, &cmd);
> for (i = 0; i < master->num_streams; i++) {
> cmd.cfgi.sid = master->streams[i].id;
> - arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
> + arm_smmu_cmdq_batch_add(smmu, cmds, &cmd);
> }
>
> - arm_smmu_cmdq_batch_submit(smmu, &cmds);
> + arm_smmu_cmdq_batch_submit(smmu, cmds);
> + kfree(cmds);
> }
>
> static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst,
> @@ -2225,31 +2235,37 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
> static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
> ioasid_t ssid)
> {
> - int i;
> + int i, ret;
> struct arm_smmu_cmdq_ent cmd;
> - struct arm_smmu_cmdq_batch cmds;
> + struct arm_smmu_cmdq_batch *cmds;
>
> arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
>
> - arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
> + cmds = kmalloc_obj(*cmds);
> + if (!cmds)
> + return -ENOMEM;
> +
> + arm_smmu_cmdq_batch_init(master->smmu, cmds, &cmd);
> for (i = 0; i < master->num_streams; i++) {
> cmd.atc.sid = master->streams[i].id;
> - arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
> + arm_smmu_cmdq_batch_add(master->smmu, cmds, &cmd);
> }
>
> - return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
> + ret = arm_smmu_cmdq_batch_submit(master->smmu, cmds);
> + kfree(cmds);
> + return ret;
> }
>
> int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
> unsigned long iova, size_t size)
> {
> struct arm_smmu_master_domain *master_domain;
> - int i;
> + int i, ret;
> unsigned long flags;
> struct arm_smmu_cmdq_ent cmd = {
> .opcode = CMDQ_OP_ATC_INV,
> };
> - struct arm_smmu_cmdq_batch cmds;
> + struct arm_smmu_cmdq_batch *cmds;
>
> if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
> return 0;
> @@ -2271,7 +2287,11 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
> if (!atomic_read(&smmu_domain->nr_ats_masters))
> return 0;
>
> - arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
> + cmds = kmalloc_obj(*cmds);
> + if (!cmds)
> + return -ENOMEM;
> +
> + arm_smmu_cmdq_batch_init(smmu_domain->smmu, cmds, &cmd);
>
> spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> list_for_each_entry(master_domain, &smmu_domain->devices,
> @@ -2294,12 +2314,14 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
>
> for (i = 0; i < master->num_streams; i++) {
> cmd.atc.sid = master->streams[i].id;
> - arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
> + arm_smmu_cmdq_batch_add(smmu_domain->smmu, cmds, &cmd);
> }
> }
> spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
>
> - return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
> + ret = arm_smmu_cmdq_batch_submit(smmu_domain->smmu, cmds);
> + kfree(cmds);
> + return ret;
> }
>
> /* IO_PGTABLE API */
> @@ -2334,7 +2356,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
> struct arm_smmu_device *smmu = smmu_domain->smmu;
> unsigned long end = iova + size, num_pages = 0, tg = 0;
> size_t inv_range = granule;
> - struct arm_smmu_cmdq_batch cmds;
> + struct arm_smmu_cmdq_batch *cmds;
>
> if (!size)
> return;
> @@ -2362,7 +2384,14 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
> num_pages++;
> }
>
> - arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
> + cmds = kmalloc_obj(*cmds);
> + if (!cmds) {
> + WARN_ONCE(1, "arm-smmu-v3: failed to allocate cmdq_batch, falling back to full TLB invalidation\n");
> + arm_smmu_tlb_inv_context(smmu_domain);
> + return;
> + }
> +
> + arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
>
> while (iova < end) {
> if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
> @@ -2391,10 +2420,11 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
> }
>
> cmd->tlbi.addr = iova;
> - arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
> + arm_smmu_cmdq_batch_add(smmu, cmds, cmd);
> iova += inv_range;
> }
> - arm_smmu_cmdq_batch_submit(smmu, &cmds);
> + arm_smmu_cmdq_batch_submit(smmu, cmds);
> + kfree(cmds);
> }
>
> static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
> --
> 2.48.1
>
>
next prev parent reply other threads:[~2026-03-11 14:23 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-11 9:44 [PATCH] iommu/arm-smmu-v3: Allocate cmdq_batch on the heap Cheng-Yang Chou
2026-03-11 14:22 ` Pranjal Shrivastava [this message]
2026-03-12 18:24 ` Cheng-Yang Chou
2026-03-12 22:50 ` Nicolin Chen
2026-03-13 0:06 ` Pranjal Shrivastava
2026-03-17 13:38 ` Robin Murphy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=abF6un_jrQ8zGZZ4@google.com \
--to=praan@google.com \
--cc=iommu@lists.linux.dev \
--cc=jserv@ccns.ncku.edu.tw \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=robin.murphy@arm.com \
--cc=will@kernel.org \
--cc=yphbchou0911@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox