From: Mostafa Saleh <smostafa@google.com>
To: Jason Gunthorpe <jgg@nvidia.com>
Cc: iommu@lists.linux.dev, Jonathan Hunter <jonathanh@nvidia.com>,
Joerg Roedel <joro@8bytes.org>,
linux-arm-kernel@lists.infradead.org,
linux-tegra@vger.kernel.org, Robin Murphy <robin.murphy@arm.com>,
Thierry Reding <thierry.reding@kernel.org>,
Krishna Reddy <vdumpa@nvidia.com>, Will Deacon <will@kernel.org>,
David Matlack <dmatlack@google.com>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
patches@lists.linux.dev, Samiullah Khawaja <skhawaja@google.com>
Subject: Re: [PATCH 9/9] iommu/arm-smmu-v3: Directly encode TLBI commands
Date: Thu, 7 May 2026 09:24:30 +0000 [thread overview]
Message-ID: <afxaTs-RK_8hbMLO@google.com> (raw)
In-Reply-To: <9-v1-b7dc0a0d4aa0+3723d-smmu_no_cmdq_ent_jgg@nvidia.com>
On Fri, May 01, 2026 at 11:29:18AM -0300, Jason Gunthorpe wrote:
> TLBI is more complicated than all the other commands because the
> invalidation loop builds a template command from the struct
> arm_smmu_inv which is then expanded into many TLBI commands for the
> invalidation.
>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Mostafa Saleh <smostafa@google.com>
Thanks,
Mostafa
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 170 +++++++-------------
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 35 ++--
> 2 files changed, 71 insertions(+), 134 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 8147b9cdcc6b99..9be589d14a3bd4 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -268,53 +268,6 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
> }
>
> /* High-level queue accessors */
> -static int arm_smmu_cmdq_build_cmd(struct arm_smmu_cmd *cmd_out,
> - struct arm_smmu_cmdq_ent *ent)
> -{
> - u64 *cmd = cmd_out->data;
> -
> - memset(cmd_out, 0, sizeof(*cmd_out));
> - cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
> -
> - switch (ent->opcode) {
> - case CMDQ_OP_TLBI_NH_VA:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
> - fallthrough;
> - case CMDQ_OP_TLBI_EL2_VA:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
> - cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
> - break;
> - case CMDQ_OP_TLBI_S2_IPA:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
> - cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
> - break;
> - case CMDQ_OP_TLBI_NH_ASID:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
> - fallthrough;
> - case CMDQ_OP_TLBI_NH_ALL:
> - case CMDQ_OP_TLBI_S12_VMALL:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
> - break;
> - case CMDQ_OP_TLBI_EL2_ASID:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
> - break;
> - default:
> - return -ENOENT;
> - }
> -
> - return 0;
> -}
> -
> static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
> struct arm_smmu_cmd *cmd)
> {
> @@ -894,16 +847,6 @@ static void arm_smmu_cmdq_batch_init_cmd(struct arm_smmu_device *smmu,
> cmds->cmdq = arm_smmu_get_cmdq(smmu, cmd);
> }
>
> -static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
> - struct arm_smmu_cmdq_batch *cmds,
> - struct arm_smmu_cmdq_ent *ent)
> -{
> - struct arm_smmu_cmd cmd;
> -
> - arm_smmu_cmdq_build_cmd(&cmd, ent);
> - arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
> -}
> -
> static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
> struct arm_smmu_cmdq_batch *cmds,
> struct arm_smmu_cmd *cmd)
> @@ -934,21 +877,6 @@ static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
> arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, &__cmd); \
> })
>
> -static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
> - struct arm_smmu_cmdq_batch *cmds,
> - struct arm_smmu_cmdq_ent *ent)
> -{
> - struct arm_smmu_cmd cmd;
> -
> - if (unlikely(arm_smmu_cmdq_build_cmd(&cmd, ent))) {
> - dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
> - ent->opcode);
> - return;
> - }
> -
> - arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, &cmd);
> -}
> -
> static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
> struct arm_smmu_cmdq_batch *cmds)
> {
> @@ -2450,12 +2378,14 @@ static void arm_smmu_tlb_inv_context(void *cookie)
>
> static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
> struct arm_smmu_cmdq_batch *cmds,
> - struct arm_smmu_cmdq_ent *cmd,
> + struct arm_smmu_cmd *cmd, bool leaf,
> unsigned long iova, size_t size,
> size_t granule, size_t pgsize)
> {
> unsigned long end = iova + size, num_pages = 0, tg = pgsize;
> + u64 orig_data0 = cmd->data[0];
> size_t inv_range = granule;
> + u8 ttl = 0, tg_enc = 0;
>
> if (WARN_ON_ONCE(!size))
> return;
> @@ -2464,7 +2394,7 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
> num_pages = size >> tg;
>
> /* Convert page size of 12,14,16 (log2) to 1,2,3 */
> - cmd->tlbi.tg = (tg - 10) / 2;
> + tg_enc = (tg - 10) / 2;
>
> /*
> * Determine what level the granule is at. For non-leaf, both
> @@ -2474,8 +2404,8 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
> * want to use a range command, so avoid the SVA corner case
> * where both scale and num could be 0 as well.
> */
> - if (cmd->tlbi.leaf)
> - cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
> + if (leaf)
> + ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
> else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
> num_pages++;
> }
> @@ -2493,11 +2423,13 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
>
> /* Determine the power of 2 multiple number of pages */
> scale = __ffs(num_pages);
> - cmd->tlbi.scale = scale;
>
> /* Determine how many chunks of 2^scale size we have */
> num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
> - cmd->tlbi.num = num - 1;
> +
> + cmd->data[0] = orig_data0 |
> + FIELD_PREP(CMDQ_TLBI_0_NUM, num - 1) |
> + FIELD_PREP(CMDQ_TLBI_0_SCALE, scale);
>
> /* range is num * 2^scale * pgsize */
> inv_range = num << (scale + tg);
> @@ -2506,8 +2438,17 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
> num_pages -= num << scale;
> }
>
> - cmd->tlbi.addr = iova;
> - arm_smmu_cmdq_batch_add(smmu, cmds, cmd);
> + /*
> + * IPA has fewer bits than VA, but they are reserved in the
> + * command and something would be very broken if iova had them
> + * set.
> + */
> + cmd->data[1] = FIELD_PREP(CMDQ_TLBI_1_LEAF, leaf) |
> + FIELD_PREP(CMDQ_TLBI_1_TTL, ttl) |
> + FIELD_PREP(CMDQ_TLBI_1_TG, tg_enc) |
> + (iova & ~GENMASK_U64(11, 0));
> +
> + arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, cmd);
> iova += inv_range;
> }
> }
> @@ -2538,19 +2479,22 @@ static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size,
> /* Used by non INV_TYPE_ATS* invalidations */
> static void arm_smmu_inv_to_cmdq_batch(struct arm_smmu_inv *inv,
> struct arm_smmu_cmdq_batch *cmds,
> - struct arm_smmu_cmdq_ent *cmd,
> + struct arm_smmu_cmd *cmd,
> + bool leaf,
> unsigned long iova, size_t size,
> unsigned int granule)
> {
> if (arm_smmu_inv_size_too_big(inv->smmu, size, granule)) {
> - cmd->opcode = inv->nsize_opcode;
> - arm_smmu_cmdq_batch_add(inv->smmu, cmds, cmd);
> + struct arm_smmu_cmd nsize_cmd = *cmd;
> +
> + u64p_replace_bits(&nsize_cmd.data[0], inv->nsize_opcode,
> + CMDQ_0_OP);
> + arm_smmu_cmdq_batch_add_cmd_p(inv->smmu, cmds, &nsize_cmd);
> return;
> }
>
> - cmd->opcode = inv->size_opcode;
> - arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, iova, size, granule,
> - inv->pgsize);
> + arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, leaf,
> + iova, size, granule, inv->pgsize);
> }
>
> static inline bool arm_smmu_invs_end_batch(struct arm_smmu_inv *cur,
> @@ -2585,38 +2529,39 @@ static void __arm_smmu_domain_inv_range(struct arm_smmu_invs *invs,
> break;
> while (cur != end) {
> struct arm_smmu_device *smmu = cur->smmu;
> - struct arm_smmu_cmdq_ent cmd = {
> - /*
> - * Pick size_opcode to run arm_smmu_get_cmdq(). This can
> - * be changed to nsize_opcode, which would result in the
> - * same CMDQ pointer.
> - */
> - .opcode = cur->size_opcode,
> - };
> + /*
> + * Pick size_opcode to run arm_smmu_get_cmdq(). This can
> + * be changed to nsize_opcode, which would result in the
> + * same CMDQ pointer.
> + */
> + struct arm_smmu_cmd cmd =
> + arm_smmu_make_cmd_op(cur->size_opcode);
> struct arm_smmu_inv *next;
>
> if (!cmds.num)
> - arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
> + arm_smmu_cmdq_batch_init_cmd(smmu, &cmds, &cmd);
>
> switch (cur->type) {
> case INV_TYPE_S1_ASID:
> - cmd.tlbi.asid = cur->id;
> - cmd.tlbi.leaf = leaf;
> - arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,
> - granule);
> + cmd = arm_smmu_make_cmd_tlbi(cur->size_opcode,
> + cur->id, 0);
> + arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, leaf,
> + iova, size, granule);
> break;
> case INV_TYPE_S2_VMID:
> - cmd.tlbi.vmid = cur->id;
> - cmd.tlbi.leaf = leaf;
> - arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,
> - granule);
> + cmd = arm_smmu_make_cmd_tlbi(cur->size_opcode,
> + 0, cur->id);
> + arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, leaf,
> + iova, size, granule);
> break;
> case INV_TYPE_S2_VMID_S1_CLEAR:
> /* CMDQ_OP_TLBI_S12_VMALL already flushed S1 entries */
> if (arm_smmu_inv_size_too_big(cur->smmu, size, granule))
> break;
> - cmd.tlbi.vmid = cur->id;
> - arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
> + arm_smmu_cmdq_batch_add_cmd(
> + smmu, &cmds,
> + arm_smmu_make_cmd_tlbi(cur->size_opcode, 0,
> + cur->id));
> break;
> case INV_TYPE_ATS:
> arm_smmu_cmdq_batch_add_cmd(
> @@ -3359,24 +3304,21 @@ arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state)
>
> static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv)
> {
> - struct arm_smmu_cmdq_ent cmd = {};
> - struct arm_smmu_cmd hw_cmd;
> -
> switch (inv->type) {
> case INV_TYPE_S1_ASID:
> - cmd.tlbi.asid = inv->id;
> + arm_smmu_cmdq_issue_cmd_with_sync(
> + inv->smmu,
> + arm_smmu_make_cmd_tlbi(inv->nsize_opcode, inv->id, 0));
> break;
> case INV_TYPE_S2_VMID:
> /* S2_VMID using nsize_opcode covers S2_VMID_S1_CLEAR */
> - cmd.tlbi.vmid = inv->id;
> + arm_smmu_cmdq_issue_cmd_with_sync(
> + inv->smmu,
> + arm_smmu_make_cmd_tlbi(inv->nsize_opcode, 0, inv->id));
> break;
> default:
> return;
> }
> -
> - cmd.opcode = inv->nsize_opcode;
> - arm_smmu_cmdq_build_cmd(&hw_cmd, &cmd);
> - arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, hw_cmd);
> }
>
> /* Should be installed after arm_smmu_install_ste_for_dev() */
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 538380de7d48a0..16353596e08ad8 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -583,6 +583,21 @@ static inline struct arm_smmu_cmd arm_smmu_make_cmd_sync(unsigned int cs,
> return cmd;
> }
>
> +/*
> + * TLBI commands - the non-sized variants just need opcode + asid/vmid.
> + * For sized variants the caller sets up data[0] with the immutable fields
> + * (opcode + asid/vmid) and the range loop fills in per-iteration fields.
> + */
> +static inline struct arm_smmu_cmd
> +arm_smmu_make_cmd_tlbi(enum arm_smmu_cmdq_opcode op, u16 asid, u16 vmid)
> +{
> + struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(op);
> +
> + cmd.data[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, asid) |
> + FIELD_PREP(CMDQ_TLBI_0_VMID, vmid);
> + return cmd;
> +}
> +
> /* Event queue */
> #define EVTQ_ENT_SZ_SHIFT 5
> #define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
> @@ -643,26 +658,6 @@ static inline struct arm_smmu_cmd arm_smmu_make_cmd_sync(unsigned int cs,
> #define MSI_IOVA_BASE 0x8000000
> #define MSI_IOVA_LENGTH 0x100000
>
> -struct arm_smmu_cmdq_ent {
> - /* Common fields */
> - u8 opcode;
> - bool substream_valid;
> -
> - /* Command-specific fields */
> - union {
> - struct {
> - u8 num;
> - u8 scale;
> - u16 asid;
> - u16 vmid;
> - bool leaf;
> - u8 ttl;
> - u8 tg;
> - u64 addr;
> - } tlbi;
> - };
> -};
> -
> struct arm_smmu_ll_queue {
> union {
> u64 val;
> --
> 2.43.0
>
next prev parent reply other threads:[~2026-05-07 9:24 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 1/9] iommu/arm-smmu-v3: Add struct arm_smmu_cmd to represent the HW format command Jason Gunthorpe
2026-05-06 6:11 ` Nicolin Chen
2026-05-06 23:41 ` Samiullah Khawaja
2026-05-07 9:19 ` Mostafa Saleh
2026-05-08 7:29 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 2/9] iommu/arm-smmu-v3: Use the HW arm_smmu_cmd in cmdq selection functions Jason Gunthorpe
2026-05-07 9:21 ` Mostafa Saleh
2026-05-08 15:49 ` Jason Gunthorpe
2026-05-08 7:47 ` Pranjal Shrivastava
2026-05-08 15:54 ` Jason Gunthorpe
2026-05-08 16:58 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 3/9] iommu/arm-smmu-v3: Use the HW arm_smmu_cmd in cmdq submission functions Jason Gunthorpe
2026-05-07 9:21 ` Mostafa Saleh
2026-05-08 8:27 ` Pranjal Shrivastava
2026-05-08 16:00 ` Jason Gunthorpe
2026-05-08 17:00 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 4/9] iommu/arm-smmu-v3: Convert arm_smmu_cmdq_batch cmds to struct arm_smmu_cmd Jason Gunthorpe
2026-05-07 9:22 ` Mostafa Saleh
2026-05-08 9:26 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 5/9] iommu/arm-smmu-v3: Remove CMDQ_OP_CFGI_CD_ALL from arm_smmu_cmdq_build_cmd() Jason Gunthorpe
2026-05-07 9:22 ` Mostafa Saleh
2026-05-08 9:45 ` Pranjal Shrivastava
2026-05-08 16:02 ` Jason Gunthorpe
2026-05-08 17:17 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 6/9] iommu/arm-smmu-v3: Directly encode simple commands Jason Gunthorpe
2026-05-07 9:22 ` Mostafa Saleh
2026-05-08 11:33 ` Pranjal Shrivastava
2026-05-08 17:37 ` Jason Gunthorpe
2026-05-08 20:09 ` Pranjal Shrivastava
2026-05-08 23:36 ` Jason Gunthorpe
2026-05-10 18:59 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 7/9] iommu/arm-smmu-v3: Directly encode CMDQ_OP_ATC_INV Jason Gunthorpe
2026-05-07 9:23 ` Mostafa Saleh
2026-05-08 11:46 ` Pranjal Shrivastava
2026-05-09 16:54 ` Jason Gunthorpe
2026-05-11 10:34 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 8/9] iommu/arm-smmu-v3: Directly encode CMDQ_OP_SYNC Jason Gunthorpe
2026-05-07 9:23 ` Mostafa Saleh
2026-05-08 13:41 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 9/9] iommu/arm-smmu-v3: Directly encode TLBI commands Jason Gunthorpe
2026-05-07 9:24 ` Mostafa Saleh [this message]
2026-05-08 14:00 ` Pranjal Shrivastava
2026-05-07 9:26 ` [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Mostafa Saleh
2026-05-08 14:03 ` Pranjal Shrivastava
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=afxaTs-RK_8hbMLO@google.com \
--to=smostafa@google.com \
--cc=dmatlack@google.com \
--cc=iommu@lists.linux.dev \
--cc=jgg@nvidia.com \
--cc=jonathanh@nvidia.com \
--cc=joro@8bytes.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-tegra@vger.kernel.org \
--cc=pasha.tatashin@soleen.com \
--cc=patches@lists.linux.dev \
--cc=robin.murphy@arm.com \
--cc=skhawaja@google.com \
--cc=thierry.reding@kernel.org \
--cc=vdumpa@nvidia.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox