From: Mostafa Saleh <smostafa@google.com>
To: Jason Gunthorpe <jgg@nvidia.com>
Cc: iommu@lists.linux.dev, Jonathan Hunter <jonathanh@nvidia.com>,
Joerg Roedel <joro@8bytes.org>,
linux-arm-kernel@lists.infradead.org,
linux-tegra@vger.kernel.org, Robin Murphy <robin.murphy@arm.com>,
Thierry Reding <thierry.reding@kernel.org>,
Krishna Reddy <vdumpa@nvidia.com>, Will Deacon <will@kernel.org>,
David Matlack <dmatlack@google.com>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
patches@lists.linux.dev, Samiullah Khawaja <skhawaja@google.com>
Subject: Re: [PATCH 9/9] iommu/arm-smmu-v3: Directly encode TLBI commands
Date: Thu, 7 May 2026 09:24:30 +0000 [thread overview]
Message-ID: <afxaTs-RK_8hbMLO@google.com> (raw)
In-Reply-To: <9-v1-b7dc0a0d4aa0+3723d-smmu_no_cmdq_ent_jgg@nvidia.com>
On Fri, May 01, 2026 at 11:29:18AM -0300, Jason Gunthorpe wrote:
> TLBI is more complicated than all the other commands because the
> invalidation loop builds a template command from the struct
> arm_smmu_inv which is then expanded into many TLBI commands for the
> invalidation.
>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Mostafa Saleh <smostafa@google.com>
Thanks,
Mostafa
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 170 +++++++-------------
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 35 ++--
> 2 files changed, 71 insertions(+), 134 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 8147b9cdcc6b99..9be589d14a3bd4 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -268,53 +268,6 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
> }
>
> /* High-level queue accessors */
> -static int arm_smmu_cmdq_build_cmd(struct arm_smmu_cmd *cmd_out,
> - struct arm_smmu_cmdq_ent *ent)
> -{
> - u64 *cmd = cmd_out->data;
> -
> - memset(cmd_out, 0, sizeof(*cmd_out));
> - cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
> -
> - switch (ent->opcode) {
> - case CMDQ_OP_TLBI_NH_VA:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
> - fallthrough;
> - case CMDQ_OP_TLBI_EL2_VA:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
> - cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
> - break;
> - case CMDQ_OP_TLBI_S2_IPA:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
> - cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
> - cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
> - break;
> - case CMDQ_OP_TLBI_NH_ASID:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
> - fallthrough;
> - case CMDQ_OP_TLBI_NH_ALL:
> - case CMDQ_OP_TLBI_S12_VMALL:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
> - break;
> - case CMDQ_OP_TLBI_EL2_ASID:
> - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
> - break;
> - default:
> - return -ENOENT;
> - }
> -
> - return 0;
> -}
> -
> static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
> struct arm_smmu_cmd *cmd)
> {
> @@ -894,16 +847,6 @@ static void arm_smmu_cmdq_batch_init_cmd(struct arm_smmu_device *smmu,
> cmds->cmdq = arm_smmu_get_cmdq(smmu, cmd);
> }
>
> -static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
> - struct arm_smmu_cmdq_batch *cmds,
> - struct arm_smmu_cmdq_ent *ent)
> -{
> - struct arm_smmu_cmd cmd;
> -
> - arm_smmu_cmdq_build_cmd(&cmd, ent);
> - arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
> -}
> -
> static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
> struct arm_smmu_cmdq_batch *cmds,
> struct arm_smmu_cmd *cmd)
> @@ -934,21 +877,6 @@ static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
> arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, &__cmd); \
> })
>
> -static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
> - struct arm_smmu_cmdq_batch *cmds,
> - struct arm_smmu_cmdq_ent *ent)
> -{
> - struct arm_smmu_cmd cmd;
> -
> - if (unlikely(arm_smmu_cmdq_build_cmd(&cmd, ent))) {
> - dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
> - ent->opcode);
> - return;
> - }
> -
> - arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, &cmd);
> -}
> -
> static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
> struct arm_smmu_cmdq_batch *cmds)
> {
> @@ -2450,12 +2378,14 @@ static void arm_smmu_tlb_inv_context(void *cookie)
>
> static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
> struct arm_smmu_cmdq_batch *cmds,
> - struct arm_smmu_cmdq_ent *cmd,
> + struct arm_smmu_cmd *cmd, bool leaf,
> unsigned long iova, size_t size,
> size_t granule, size_t pgsize)
> {
> unsigned long end = iova + size, num_pages = 0, tg = pgsize;
> + u64 orig_data0 = cmd->data[0];
> size_t inv_range = granule;
> + u8 ttl = 0, tg_enc = 0;
>
> if (WARN_ON_ONCE(!size))
> return;
> @@ -2464,7 +2394,7 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
> num_pages = size >> tg;
>
> /* Convert page size of 12,14,16 (log2) to 1,2,3 */
> - cmd->tlbi.tg = (tg - 10) / 2;
> + tg_enc = (tg - 10) / 2;
>
> /*
> * Determine what level the granule is at. For non-leaf, both
> @@ -2474,8 +2404,8 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
> * want to use a range command, so avoid the SVA corner case
> * where both scale and num could be 0 as well.
> */
> - if (cmd->tlbi.leaf)
> - cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
> + if (leaf)
> + ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
> else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
> num_pages++;
> }
> @@ -2493,11 +2423,13 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
>
> /* Determine the power of 2 multiple number of pages */
> scale = __ffs(num_pages);
> - cmd->tlbi.scale = scale;
>
> /* Determine how many chunks of 2^scale size we have */
> num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
> - cmd->tlbi.num = num - 1;
> +
> + cmd->data[0] = orig_data0 |
> + FIELD_PREP(CMDQ_TLBI_0_NUM, num - 1) |
> + FIELD_PREP(CMDQ_TLBI_0_SCALE, scale);
>
> /* range is num * 2^scale * pgsize */
> inv_range = num << (scale + tg);
> @@ -2506,8 +2438,17 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
> num_pages -= num << scale;
> }
>
> - cmd->tlbi.addr = iova;
> - arm_smmu_cmdq_batch_add(smmu, cmds, cmd);
> + /*
> + * IPA has fewer bits than VA, but they are reserved in the
> + * command and something would be very broken if iova had them
> + * set.
> + */
> + cmd->data[1] = FIELD_PREP(CMDQ_TLBI_1_LEAF, leaf) |
> + FIELD_PREP(CMDQ_TLBI_1_TTL, ttl) |
> + FIELD_PREP(CMDQ_TLBI_1_TG, tg_enc) |
> + (iova & ~GENMASK_U64(11, 0));
> +
> + arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, cmd);
> iova += inv_range;
> }
> }
> @@ -2538,19 +2479,22 @@ static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size,
> /* Used by non INV_TYPE_ATS* invalidations */
> static void arm_smmu_inv_to_cmdq_batch(struct arm_smmu_inv *inv,
> struct arm_smmu_cmdq_batch *cmds,
> - struct arm_smmu_cmdq_ent *cmd,
> + struct arm_smmu_cmd *cmd,
> + bool leaf,
> unsigned long iova, size_t size,
> unsigned int granule)
> {
> if (arm_smmu_inv_size_too_big(inv->smmu, size, granule)) {
> - cmd->opcode = inv->nsize_opcode;
> - arm_smmu_cmdq_batch_add(inv->smmu, cmds, cmd);
> + struct arm_smmu_cmd nsize_cmd = *cmd;
> +
> + u64p_replace_bits(&nsize_cmd.data[0], inv->nsize_opcode,
> + CMDQ_0_OP);
> + arm_smmu_cmdq_batch_add_cmd_p(inv->smmu, cmds, &nsize_cmd);
> return;
> }
>
> - cmd->opcode = inv->size_opcode;
> - arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, iova, size, granule,
> - inv->pgsize);
> + arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, leaf,
> + iova, size, granule, inv->pgsize);
> }
>
> static inline bool arm_smmu_invs_end_batch(struct arm_smmu_inv *cur,
> @@ -2585,38 +2529,39 @@ static void __arm_smmu_domain_inv_range(struct arm_smmu_invs *invs,
> break;
> while (cur != end) {
> struct arm_smmu_device *smmu = cur->smmu;
> - struct arm_smmu_cmdq_ent cmd = {
> - /*
> - * Pick size_opcode to run arm_smmu_get_cmdq(). This can
> - * be changed to nsize_opcode, which would result in the
> - * same CMDQ pointer.
> - */
> - .opcode = cur->size_opcode,
> - };
> + /*
> + * Pick size_opcode to run arm_smmu_get_cmdq(). This can
> + * be changed to nsize_opcode, which would result in the
> + * same CMDQ pointer.
> + */
> + struct arm_smmu_cmd cmd =
> + arm_smmu_make_cmd_op(cur->size_opcode);
> struct arm_smmu_inv *next;
>
> if (!cmds.num)
> - arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
> + arm_smmu_cmdq_batch_init_cmd(smmu, &cmds, &cmd);
>
> switch (cur->type) {
> case INV_TYPE_S1_ASID:
> - cmd.tlbi.asid = cur->id;
> - cmd.tlbi.leaf = leaf;
> - arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,
> - granule);
> + cmd = arm_smmu_make_cmd_tlbi(cur->size_opcode,
> + cur->id, 0);
> + arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, leaf,
> + iova, size, granule);
> break;
> case INV_TYPE_S2_VMID:
> - cmd.tlbi.vmid = cur->id;
> - cmd.tlbi.leaf = leaf;
> - arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,
> - granule);
> + cmd = arm_smmu_make_cmd_tlbi(cur->size_opcode,
> + 0, cur->id);
> + arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, leaf,
> + iova, size, granule);
> break;
> case INV_TYPE_S2_VMID_S1_CLEAR:
> /* CMDQ_OP_TLBI_S12_VMALL already flushed S1 entries */
> if (arm_smmu_inv_size_too_big(cur->smmu, size, granule))
> break;
> - cmd.tlbi.vmid = cur->id;
> - arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
> + arm_smmu_cmdq_batch_add_cmd(
> + smmu, &cmds,
> + arm_smmu_make_cmd_tlbi(cur->size_opcode, 0,
> + cur->id));
> break;
> case INV_TYPE_ATS:
> arm_smmu_cmdq_batch_add_cmd(
> @@ -3359,24 +3304,21 @@ arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state)
>
> static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv)
> {
> - struct arm_smmu_cmdq_ent cmd = {};
> - struct arm_smmu_cmd hw_cmd;
> -
> switch (inv->type) {
> case INV_TYPE_S1_ASID:
> - cmd.tlbi.asid = inv->id;
> + arm_smmu_cmdq_issue_cmd_with_sync(
> + inv->smmu,
> + arm_smmu_make_cmd_tlbi(inv->nsize_opcode, inv->id, 0));
> break;
> case INV_TYPE_S2_VMID:
> /* S2_VMID using nsize_opcode covers S2_VMID_S1_CLEAR */
> - cmd.tlbi.vmid = inv->id;
> + arm_smmu_cmdq_issue_cmd_with_sync(
> + inv->smmu,
> + arm_smmu_make_cmd_tlbi(inv->nsize_opcode, 0, inv->id));
> break;
> default:
> return;
> }
> -
> - cmd.opcode = inv->nsize_opcode;
> - arm_smmu_cmdq_build_cmd(&hw_cmd, &cmd);
> - arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, hw_cmd);
> }
>
> /* Should be installed after arm_smmu_install_ste_for_dev() */
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 538380de7d48a0..16353596e08ad8 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -583,6 +583,21 @@ static inline struct arm_smmu_cmd arm_smmu_make_cmd_sync(unsigned int cs,
> return cmd;
> }
>
> +/*
> + * TLBI commands - the non-sized variants just need opcode + asid/vmid.
> + * For sized variants the caller sets up data[0] with the immutable fields
> + * (opcode + asid/vmid) and the range loop fills in per-iteration fields.
> + */
> +static inline struct arm_smmu_cmd
> +arm_smmu_make_cmd_tlbi(enum arm_smmu_cmdq_opcode op, u16 asid, u16 vmid)
> +{
> + struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(op);
> +
> + cmd.data[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, asid) |
> + FIELD_PREP(CMDQ_TLBI_0_VMID, vmid);
> + return cmd;
> +}
> +
> /* Event queue */
> #define EVTQ_ENT_SZ_SHIFT 5
> #define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
> @@ -643,26 +658,6 @@ static inline struct arm_smmu_cmd arm_smmu_make_cmd_sync(unsigned int cs,
> #define MSI_IOVA_BASE 0x8000000
> #define MSI_IOVA_LENGTH 0x100000
>
> -struct arm_smmu_cmdq_ent {
> - /* Common fields */
> - u8 opcode;
> - bool substream_valid;
> -
> - /* Command-specific fields */
> - union {
> - struct {
> - u8 num;
> - u8 scale;
> - u16 asid;
> - u16 vmid;
> - bool leaf;
> - u8 ttl;
> - u8 tg;
> - u64 addr;
> - } tlbi;
> - };
> -};
> -
> struct arm_smmu_ll_queue {
> union {
> u64 val;
> --
> 2.43.0
>
next prev parent reply other threads:[~2026-05-07 9:24 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 1/9] iommu/arm-smmu-v3: Add struct arm_smmu_cmd to represent the HW format command Jason Gunthorpe
2026-05-06 6:11 ` Nicolin Chen
2026-05-06 23:41 ` Samiullah Khawaja
2026-05-07 9:19 ` Mostafa Saleh
2026-05-08 7:29 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 2/9] iommu/arm-smmu-v3: Use the HW arm_smmu_cmd in cmdq selection functions Jason Gunthorpe
2026-05-07 9:21 ` Mostafa Saleh
2026-05-08 15:49 ` Jason Gunthorpe
2026-05-08 7:47 ` Pranjal Shrivastava
2026-05-08 15:54 ` Jason Gunthorpe
2026-05-08 16:58 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 3/9] iommu/arm-smmu-v3: Use the HW arm_smmu_cmd in cmdq submission functions Jason Gunthorpe
2026-05-07 9:21 ` Mostafa Saleh
2026-05-08 8:27 ` Pranjal Shrivastava
2026-05-08 16:00 ` Jason Gunthorpe
2026-05-08 17:00 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 4/9] iommu/arm-smmu-v3: Convert arm_smmu_cmdq_batch cmds to struct arm_smmu_cmd Jason Gunthorpe
2026-05-07 9:22 ` Mostafa Saleh
2026-05-08 9:26 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 5/9] iommu/arm-smmu-v3: Remove CMDQ_OP_CFGI_CD_ALL from arm_smmu_cmdq_build_cmd() Jason Gunthorpe
2026-05-07 9:22 ` Mostafa Saleh
2026-05-08 9:45 ` Pranjal Shrivastava
2026-05-08 16:02 ` Jason Gunthorpe
2026-05-08 17:17 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 6/9] iommu/arm-smmu-v3: Directly encode simple commands Jason Gunthorpe
2026-05-07 9:22 ` Mostafa Saleh
2026-05-08 11:33 ` Pranjal Shrivastava
2026-05-08 17:37 ` Jason Gunthorpe
2026-05-08 20:09 ` Pranjal Shrivastava
2026-05-08 23:36 ` Jason Gunthorpe
2026-05-10 18:59 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 7/9] iommu/arm-smmu-v3: Directly encode CMDQ_OP_ATC_INV Jason Gunthorpe
2026-05-07 9:23 ` Mostafa Saleh
2026-05-08 11:46 ` Pranjal Shrivastava
2026-05-09 16:54 ` Jason Gunthorpe
2026-05-11 10:34 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 8/9] iommu/arm-smmu-v3: Directly encode CMDQ_OP_SYNC Jason Gunthorpe
2026-05-07 9:23 ` Mostafa Saleh
2026-05-08 13:41 ` Pranjal Shrivastava
2026-05-01 14:29 ` [PATCH 9/9] iommu/arm-smmu-v3: Directly encode TLBI commands Jason Gunthorpe
2026-05-07 9:24 ` Mostafa Saleh [this message]
2026-05-08 14:00 ` Pranjal Shrivastava
2026-05-07 9:26 ` [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Mostafa Saleh
2026-05-08 14:03 ` Pranjal Shrivastava
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=afxaTs-RK_8hbMLO@google.com \
--to=smostafa@google.com \
--cc=dmatlack@google.com \
--cc=iommu@lists.linux.dev \
--cc=jgg@nvidia.com \
--cc=jonathanh@nvidia.com \
--cc=joro@8bytes.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-tegra@vger.kernel.org \
--cc=pasha.tatashin@soleen.com \
--cc=patches@lists.linux.dev \
--cc=robin.murphy@arm.com \
--cc=skhawaja@google.com \
--cc=thierry.reding@kernel.org \
--cc=vdumpa@nvidia.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.