* [PATCH 1/9] iommu/arm-smmu-v3: Add struct arm_smmu_cmd to represent the HW format command
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
@ 2026-05-01 14:29 ` Jason Gunthorpe
2026-05-06 6:11 ` Nicolin Chen
2026-05-06 23:41 ` Samiullah Khawaja
2026-05-01 14:29 ` [PATCH 2/9] iommu/arm-smmu-v3: Use the HW arm_smmu_cmd in cmdq selection functions Jason Gunthorpe
` (7 subsequent siblings)
8 siblings, 2 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2026-05-01 14:29 UTC (permalink / raw)
To: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon
Cc: David Matlack, Pasha Tatashin, patches, Samiullah Khawaja,
Mostafa Saleh
Like STE/CD, add a wrapper struct around the u64 array to represent the
already FIELD_PREP'd command data. Unlike the STE/CD this is a u64
because the command submission path will have the swap to le64.
This makes the API clearer when a u64 is referring to a formatted
command and makes the following changes easier to follow.
Move the command constants out of the struct and into an enum alongside
the rest of the constants defining the HW format so the entire HW format
is self contained and independent of struct arm_smmu_cmdq_ent.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 48 ++++++++++++---------
1 file changed, 28 insertions(+), 20 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index ef42df4753ec4d..092179f689e9f1 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -390,6 +390,10 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
+struct arm_smmu_cmd {
+ u64 data[CMDQ_ENT_DWORDS];
+};
+
/*
* This is used to size the command queue and therefore must be at least
* BITS_PER_LONG so that the valid_map works correctly (it relies on the
@@ -426,6 +430,8 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define CMDQ_ATC_1_SIZE GENMASK_ULL(5, 0)
#define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12)
+#define ATC_INV_SIZE_ALL 52
+
#define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
#define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
#define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
@@ -447,6 +453,28 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
#define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
+enum arm_smmu_cmdq_opcode {
+ CMDQ_OP_PREFETCH_CFG = 0x1,
+ CMDQ_OP_CFGI_STE = 0x3,
+ CMDQ_OP_CFGI_ALL = 0x4,
+ CMDQ_OP_CFGI_CD = 0x5,
+ CMDQ_OP_CFGI_CD_ALL = 0x6,
+ CMDQ_OP_TLBI_NH_ALL = 0x10,
+ CMDQ_OP_TLBI_NH_ASID = 0x11,
+ CMDQ_OP_TLBI_NH_VA = 0x12,
+ CMDQ_OP_TLBI_NH_VAA = 0x13,
+ CMDQ_OP_TLBI_EL2_ALL = 0x20,
+ CMDQ_OP_TLBI_EL2_ASID = 0x21,
+ CMDQ_OP_TLBI_EL2_VA = 0x22,
+ CMDQ_OP_TLBI_S12_VMALL = 0x28,
+ CMDQ_OP_TLBI_S2_IPA = 0x2a,
+ CMDQ_OP_TLBI_NSNH_ALL = 0x30,
+ CMDQ_OP_ATC_INV = 0x40,
+ CMDQ_OP_PRI_RESP = 0x41,
+ CMDQ_OP_RESUME = 0x44,
+ CMDQ_OP_CMD_SYNC = 0x46,
+};
+
/* Event queue */
#define EVTQ_ENT_SZ_SHIFT 5
#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
@@ -520,15 +548,10 @@ struct arm_smmu_cmdq_ent {
/* Command-specific fields */
union {
- #define CMDQ_OP_PREFETCH_CFG 0x1
struct {
u32 sid;
} prefetch;
- #define CMDQ_OP_CFGI_STE 0x3
- #define CMDQ_OP_CFGI_ALL 0x4
- #define CMDQ_OP_CFGI_CD 0x5
- #define CMDQ_OP_CFGI_CD_ALL 0x6
struct {
u32 sid;
u32 ssid;
@@ -538,16 +561,6 @@ struct arm_smmu_cmdq_ent {
};
} cfgi;
- #define CMDQ_OP_TLBI_NH_ALL 0x10
- #define CMDQ_OP_TLBI_NH_ASID 0x11
- #define CMDQ_OP_TLBI_NH_VA 0x12
- #define CMDQ_OP_TLBI_NH_VAA 0x13
- #define CMDQ_OP_TLBI_EL2_ALL 0x20
- #define CMDQ_OP_TLBI_EL2_ASID 0x21
- #define CMDQ_OP_TLBI_EL2_VA 0x22
- #define CMDQ_OP_TLBI_S12_VMALL 0x28
- #define CMDQ_OP_TLBI_S2_IPA 0x2a
- #define CMDQ_OP_TLBI_NSNH_ALL 0x30
struct {
u8 num;
u8 scale;
@@ -559,8 +572,6 @@ struct arm_smmu_cmdq_ent {
u64 addr;
} tlbi;
- #define CMDQ_OP_ATC_INV 0x40
- #define ATC_INV_SIZE_ALL 52
struct {
u32 sid;
u32 ssid;
@@ -569,7 +580,6 @@ struct arm_smmu_cmdq_ent {
bool global;
} atc;
- #define CMDQ_OP_PRI_RESP 0x41
struct {
u32 sid;
u32 ssid;
@@ -577,14 +587,12 @@ struct arm_smmu_cmdq_ent {
enum pri_resp resp;
} pri;
- #define CMDQ_OP_RESUME 0x44
struct {
u32 sid;
u16 stag;
u8 resp;
} resume;
- #define CMDQ_OP_CMD_SYNC 0x46
struct {
u64 msiaddr;
} sync;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH 1/9] iommu/arm-smmu-v3: Add struct arm_smmu_cmd to represent the HW format command
2026-05-01 14:29 ` [PATCH 1/9] iommu/arm-smmu-v3: Add struct arm_smmu_cmd to represent the HW format command Jason Gunthorpe
@ 2026-05-06 6:11 ` Nicolin Chen
2026-05-06 23:41 ` Samiullah Khawaja
1 sibling, 0 replies; 12+ messages in thread
From: Nicolin Chen @ 2026-05-06 6:11 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon, David Matlack, Pasha Tatashin, patches,
Samiullah Khawaja, Mostafa Saleh
On Fri, May 01, 2026 at 11:29:10AM -0300, Jason Gunthorpe wrote:
> Like STE/CD, add a wrapper struct around the u64 array to represent the
> already FIELD_PREP'd command data. Unlike the STE/CD this is a u64
> because the command submission path will have the swap to le64.
>
> This makes the API clearer when a u64 is referring to a formatted
> command and makes the following changes easier to follow.
>
> Move the command constants out of the struct and into an enum alongside
> the rest of the constants defining the HW format so the entire HW format
> is self contained and independent of struct arm_smmu_cmdq_ent.
>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/9] iommu/arm-smmu-v3: Add struct arm_smmu_cmd to represent the HW format command
2026-05-01 14:29 ` [PATCH 1/9] iommu/arm-smmu-v3: Add struct arm_smmu_cmd to represent the HW format command Jason Gunthorpe
2026-05-06 6:11 ` Nicolin Chen
@ 2026-05-06 23:41 ` Samiullah Khawaja
1 sibling, 0 replies; 12+ messages in thread
From: Samiullah Khawaja @ 2026-05-06 23:41 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon, David Matlack, Pasha Tatashin, patches,
Mostafa Saleh
On Fri, May 01, 2026 at 11:29:10AM -0300, Jason Gunthorpe wrote:
>Like STE/CD, add a wrapper struct around the u64 array to represent the
>already FIELD_PREP'd command data. Unlike the STE/CD this is a u64
>because the command submission path will have the swap to le64.
>
>This makes the API clearer when a u64 is referring to a formatted
>command and makes the following changes easier to follow.
>
>Move the command constants out of the struct and into an enum alongside
>the rest of the constants defining the HW format so the entire HW format
>is self contained and independent of struct arm_smmu_cmdq_ent.
>
>Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
>---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 48 ++++++++++++---------
> 1 file changed, 28 insertions(+), 20 deletions(-)
>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 2/9] iommu/arm-smmu-v3: Use the HW arm_smmu_cmd in cmdq selection functions
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 1/9] iommu/arm-smmu-v3: Add struct arm_smmu_cmd to represent the HW format command Jason Gunthorpe
@ 2026-05-01 14:29 ` Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 3/9] iommu/arm-smmu-v3: Use the HW arm_smmu_cmd in cmdq submission functions Jason Gunthorpe
` (6 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2026-05-01 14:29 UTC (permalink / raw)
To: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon
Cc: David Matlack, Pasha Tatashin, patches, Samiullah Khawaja,
Mostafa Saleh
Start removing the use of struct arm_smmu_cmdq_ent, starting at the
lower levels of the call chain. Change the functions that determine what
cmdq to issue the batch to into using struct arm_smmu_cmd directly.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 47 ++++++++++++-------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 8 ++--
.../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 8 ++--
3 files changed, 38 insertions(+), 25 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e8d7dbe495f030..5cdeaec890592f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -371,12 +371,12 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
}
static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent)
+ struct arm_smmu_cmd *cmd)
{
struct arm_smmu_cmdq *cmdq = NULL;
if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
- cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
+ cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, cmd);
return cmdq ?: &smmu->cmdq;
}
@@ -924,16 +924,16 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_ent *ent,
bool sync)
{
- u64 cmd[CMDQ_ENT_DWORDS];
+ struct arm_smmu_cmd cmd;
- if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
+ if (unlikely(arm_smmu_cmdq_build_cmd(cmd.data, ent))) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
ent->opcode);
return -EINVAL;
}
return arm_smmu_cmdq_issue_cmdlist(
- smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
+ smmu, arm_smmu_get_cmdq(smmu, &cmd), cmd.data, 1, sync);
}
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -948,42 +948,55 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
}
+static void arm_smmu_cmdq_batch_init_cmd(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmd *cmd)
+{
+ cmds->num = 0;
+ cmds->cmdq = arm_smmu_get_cmdq(smmu, cmd);
+}
+
static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
struct arm_smmu_cmdq_ent *ent)
{
- cmds->num = 0;
- cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
+ struct arm_smmu_cmd cmd;
+
+ arm_smmu_cmdq_build_cmd(cmd.data, ent);
+ arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
}
static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
- struct arm_smmu_cmdq_ent *cmd)
+ struct arm_smmu_cmdq_ent *ent)
{
- bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
+ struct arm_smmu_cmd cmd;
+ bool unsupported_cmd;
int index;
+ if (unlikely(arm_smmu_cmdq_build_cmd(cmd.data, ent))) {
+ dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
+ ent->opcode);
+ return;
+ }
+
+ unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, &cmd);
if (force_sync || unsupported_cmd) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
cmds->num, true);
- arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
+ arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
}
if (cmds->num == CMDQ_BATCH_ENTRIES) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
cmds->num, false);
- arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
+ arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
}
index = cmds->num * CMDQ_ENT_DWORDS;
- if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
- dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
- cmd->opcode);
- return;
- }
-
+ memcpy(&cmds->cmds[index], cmd.data, sizeof(cmd.data));
cmds->num++;
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 092179f689e9f1..6d73f6b63e64a9 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -641,13 +641,13 @@ struct arm_smmu_cmdq {
atomic_long_t *valid_map;
atomic_t owner_prod;
atomic_t lock;
- bool (*supports_cmd)(struct arm_smmu_cmdq_ent *ent);
+ bool (*supports_cmd)(struct arm_smmu_cmd *cmd);
};
static inline bool arm_smmu_cmdq_supports_cmd(struct arm_smmu_cmdq *cmdq,
- struct arm_smmu_cmdq_ent *ent)
+ struct arm_smmu_cmd *cmd)
{
- return cmdq->supports_cmd ? cmdq->supports_cmd(ent) : true;
+ return cmdq->supports_cmd ? cmdq->supports_cmd(cmd) : true;
}
struct arm_smmu_cmdq_batch {
@@ -815,7 +815,7 @@ struct arm_smmu_impl_ops {
void (*device_remove)(struct arm_smmu_device *smmu);
int (*init_structures)(struct arm_smmu_device *smmu);
struct arm_smmu_cmdq *(*get_secondary_cmdq)(
- struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent);
+ struct arm_smmu_device *smmu, struct arm_smmu_cmd *cmd);
/*
* An implementation should define its own type other than the default
* IOMMU_HW_INFO_TYPE_ARM_SMMUV3. And it must validate the input @type
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index 83f6e9f6c51d6b..b4d8c1f2fd3878 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -367,9 +367,9 @@ static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid)
/* Command Queue Function */
-static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmdq_ent *ent)
+static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmd *cmd)
{
- switch (ent->opcode) {
+ switch (FIELD_GET(CMDQ_0_OP, cmd->data[0])) {
case CMDQ_OP_TLBI_NH_ASID:
case CMDQ_OP_TLBI_NH_VA:
case CMDQ_OP_ATC_INV:
@@ -381,7 +381,7 @@ static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmdq_ent *ent)
static struct arm_smmu_cmdq *
tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent)
+ struct arm_smmu_cmd *cmd)
{
struct tegra241_cmdqv *cmdqv =
container_of(smmu, struct tegra241_cmdqv, smmu);
@@ -409,7 +409,7 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu,
return NULL;
/* Unsupported CMD goes for smmu->cmdq pathway */
- if (!arm_smmu_cmdq_supports_cmd(&vcmdq->cmdq, ent))
+ if (!arm_smmu_cmdq_supports_cmd(&vcmdq->cmdq, cmd))
return NULL;
return &vcmdq->cmdq;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 3/9] iommu/arm-smmu-v3: Use the HW arm_smmu_cmd in cmdq submission functions
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 1/9] iommu/arm-smmu-v3: Add struct arm_smmu_cmd to represent the HW format command Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 2/9] iommu/arm-smmu-v3: Use the HW arm_smmu_cmd in cmdq selection functions Jason Gunthorpe
@ 2026-05-01 14:29 ` Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 4/9] iommu/arm-smmu-v3: Convert arm_smmu_cmdq_batch cmds to struct arm_smmu_cmd Jason Gunthorpe
` (5 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2026-05-01 14:29 UTC (permalink / raw)
To: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon
Cc: David Matlack, Pasha Tatashin, patches, Samiullah Khawaja,
Mostafa Saleh
Continue removing struct arm_smmu_cmdq_ent in favour of the HW based
struct arm_smmu_cmd. Switch the lower level issue commands to work on
the native struct by lifting arm_smmu_cmdq_build_cmd() into all the
callers.
Following patches will revise each of the arm_smmu_cmdq_build_cmd()
call sites to replace it with the HW struct.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 53 ++++++++++++---------
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 5cdeaec890592f..67d23e9c54804e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -921,31 +921,23 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
}
static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent,
+ struct arm_smmu_cmd *cmd,
bool sync)
{
- struct arm_smmu_cmd cmd;
-
- if (unlikely(arm_smmu_cmdq_build_cmd(cmd.data, ent))) {
- dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
- ent->opcode);
- return -EINVAL;
- }
-
return arm_smmu_cmdq_issue_cmdlist(
- smmu, arm_smmu_get_cmdq(smmu, &cmd), cmd.data, 1, sync);
+ smmu, arm_smmu_get_cmdq(smmu, cmd), cmd->data, 1, sync);
}
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent)
+ struct arm_smmu_cmd *cmd)
{
- return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
+ return __arm_smmu_cmdq_issue_cmd(smmu, cmd, false);
}
static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent)
+ struct arm_smmu_cmd *cmd)
{
- return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
+ return __arm_smmu_cmdq_issue_cmd(smmu, cmd, true);
}
static void arm_smmu_cmdq_batch_init_cmd(struct arm_smmu_device *smmu,
@@ -1013,6 +1005,7 @@ static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused
struct arm_smmu_cmdq_ent cmd = {0};
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
int sid = master->streams[0].id;
+ struct arm_smmu_cmd hw_cmd;
if (WARN_ON(!master->stall_enabled))
return;
@@ -1032,7 +1025,9 @@ static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused
break;
}
- arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
+ arm_smmu_cmdq_build_cmd(hw_cmd.data, &cmd);
+ arm_smmu_cmdq_issue_cmd(master->smmu, &hw_cmd);
+
/*
* Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
* RESUME consumption guarantees that the stalled transaction will be
@@ -1861,14 +1856,16 @@ static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
{
struct arm_smmu_ste_writer *ste_writer =
container_of(writer, struct arm_smmu_ste_writer, writer);
- struct arm_smmu_cmdq_ent cmd = {
+ struct arm_smmu_cmdq_ent ent = {
.opcode = CMDQ_OP_CFGI_STE,
.cfgi = {
.sid = ste_writer->sid,
.leaf = true,
},
};
+ struct arm_smmu_cmd cmd;
+ arm_smmu_cmdq_build_cmd(cmd.data, &ent);
arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
}
@@ -1896,11 +1893,13 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
/* It's likely that we'll want to use the new STE soon */
if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
struct arm_smmu_cmdq_ent
- prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
+ prefetch_ent = { .opcode = CMDQ_OP_PREFETCH_CFG,
.prefetch = {
.sid = sid,
} };
+ struct arm_smmu_cmd prefetch_cmd;
+ arm_smmu_cmdq_build_cmd(prefetch_cmd.data, &prefetch_ent);
arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
}
}
@@ -2328,7 +2327,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
evt[1] & PRIQ_1_ADDR_MASK);
if (last) {
- struct arm_smmu_cmdq_ent cmd = {
+ struct arm_smmu_cmdq_ent ent = {
.opcode = CMDQ_OP_PRI_RESP,
.substream_valid = ssv,
.pri = {
@@ -2338,7 +2337,9 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
.resp = PRI_RESP_DENY,
},
};
+ struct arm_smmu_cmd cmd;
+ arm_smmu_cmdq_build_cmd(cmd.data, &ent);
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
}
}
@@ -3446,6 +3447,7 @@ arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state)
static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv)
{
struct arm_smmu_cmdq_ent cmd = {};
+ struct arm_smmu_cmd hw_cmd;
switch (inv->type) {
case INV_TYPE_S1_ASID:
@@ -3460,7 +3462,8 @@ static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv)
}
cmd.opcode = inv->nsize_opcode;
- arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, &cmd);
+ arm_smmu_cmdq_build_cmd(hw_cmd.data, &cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, &hw_cmd);
}
/* Should be installed after arm_smmu_install_ste_for_dev() */
@@ -4823,7 +4826,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
{
int ret;
u32 reg, enables;
- struct arm_smmu_cmdq_ent cmd;
+ struct arm_smmu_cmdq_ent ent;
+ struct arm_smmu_cmd cmd;
/* Clear CR0 and sync (disables SMMU and queue processing) */
reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
@@ -4870,16 +4874,19 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
}
/* Invalidate any cached configuration */
- cmd.opcode = CMDQ_OP_CFGI_ALL;
+ ent.opcode = CMDQ_OP_CFGI_ALL;
+ arm_smmu_cmdq_build_cmd(cmd.data, &ent);
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
/* Invalidate any stale TLB entries */
if (smmu->features & ARM_SMMU_FEAT_HYP) {
- cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
+ ent.opcode = CMDQ_OP_TLBI_EL2_ALL;
+ arm_smmu_cmdq_build_cmd(cmd.data, &ent);
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
- cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
+ ent.opcode = CMDQ_OP_TLBI_NSNH_ALL;
+ arm_smmu_cmdq_build_cmd(cmd.data, &ent);
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
/* Event queue */
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 4/9] iommu/arm-smmu-v3: Convert arm_smmu_cmdq_batch cmds to struct arm_smmu_cmd
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
` (2 preceding siblings ...)
2026-05-01 14:29 ` [PATCH 3/9] iommu/arm-smmu-v3: Use the HW arm_smmu_cmd in cmdq submission functions Jason Gunthorpe
@ 2026-05-01 14:29 ` Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 5/9] iommu/arm-smmu-v3: Remove CMDQ_OP_CFGI_CD_ALL from arm_smmu_cmdq_build_cmd() Jason Gunthorpe
` (4 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2026-05-01 14:29 UTC (permalink / raw)
To: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon
Cc: David Matlack, Pasha Tatashin, patches, Samiullah Khawaja,
Mostafa Saleh
Convert the batch's type to also get the remaining helper functions to
use the new type and complete replacing naked u64s with the new struct.
The low-level queue_write()/queue_read()/queue_remove_raw() functions
remain u64-based since they are shared by event and PRI queues which
have different entry sizes.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
.../arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 24 +++---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 74 ++++++++++---------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 +-
.../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 8 +-
4 files changed, 58 insertions(+), 53 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
index ddae0b07c76b50..1e9f7d2de34414 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
@@ -300,7 +300,7 @@ static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid)
/* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
struct arm_vsmmu_invalidation_cmd {
union {
- u64 cmd[2];
+ struct arm_smmu_cmd cmd;
struct iommu_viommu_arm_smmuv3_invalidate ucmd;
};
};
@@ -316,32 +316,32 @@ static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
struct arm_vsmmu_invalidation_cmd *cmd)
{
/* Commands are le64 stored in u64 */
- cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
- cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
+ cmd->cmd.data[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
+ cmd->cmd.data[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
- switch (cmd->cmd[0] & CMDQ_0_OP) {
+ switch (cmd->cmd.data[0] & CMDQ_0_OP) {
case CMDQ_OP_TLBI_NSNH_ALL:
/* Convert to NH_ALL */
- cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
+ cmd->cmd.data[0] = CMDQ_OP_TLBI_NH_ALL |
FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
- cmd->cmd[1] = 0;
+ cmd->cmd.data[1] = 0;
break;
case CMDQ_OP_TLBI_NH_VA:
case CMDQ_OP_TLBI_NH_VAA:
case CMDQ_OP_TLBI_NH_ALL:
case CMDQ_OP_TLBI_NH_ASID:
- cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
- cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
+ cmd->cmd.data[0] &= ~CMDQ_TLBI_0_VMID;
+ cmd->cmd.data[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
break;
case CMDQ_OP_ATC_INV:
case CMDQ_OP_CFGI_CD:
case CMDQ_OP_CFGI_CD_ALL: {
- u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);
+ u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd.data[0]);
if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
return -EIO;
- cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
- cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
+ cmd->cmd.data[0] &= ~CMDQ_CFGI_0_SID;
+ cmd->cmd.data[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
break;
}
default:
@@ -386,7 +386,7 @@ int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
continue;
/* FIXME always uses the main cmdq rather than trying to group by type */
- ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
+ ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, &last->cmd,
cur - last, true);
if (ret) {
cur--;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 67d23e9c54804e..b3ef001ce80d23 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -268,9 +268,12 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
}
/* High-level queue accessors */
-static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
+static int arm_smmu_cmdq_build_cmd(struct arm_smmu_cmd *cmd_out,
+ struct arm_smmu_cmdq_ent *ent)
{
- memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
+ u64 *cmd = cmd_out->data;
+
+ memset(cmd_out, 0, sizeof(*cmd_out));
cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
switch (ent->opcode) {
@@ -390,7 +393,8 @@ static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
}
-static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
+static void arm_smmu_cmdq_build_sync_cmd(struct arm_smmu_cmd *cmd,
+ struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq, u32 prod)
{
struct arm_smmu_queue *q = &cmdq->q;
@@ -409,7 +413,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
arm_smmu_cmdq_build_cmd(cmd, &ent);
if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
- u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
+ u64p_replace_bits(&cmd->data[0], CMDQ_SYNC_0_CS_NONE,
+ CMDQ_SYNC_0_CS);
}
void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
@@ -422,9 +427,8 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
[CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
};
struct arm_smmu_queue *q = &cmdq->q;
-
int i;
- u64 cmd[CMDQ_ENT_DWORDS];
+ struct arm_smmu_cmd cmd;
u32 cons = readl_relaxed(q->cons_reg);
u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
struct arm_smmu_cmdq_ent cmd_sync = {
@@ -457,17 +461,18 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
* We may have concurrent producers, so we need to be careful
* not to touch any of the shadow cmdq state.
*/
- queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
+ queue_read(cmd.data, Q_ENT(q, cons), q->ent_dwords);
dev_err(smmu->dev, "skipping command in error state:\n");
- for (i = 0; i < ARRAY_SIZE(cmd); ++i)
- dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
+ for (i = 0; i < ARRAY_SIZE(cmd.data); ++i)
+ dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd.data[i]);
/* Convert the erroneous command into a CMD_SYNC */
- arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
+ arm_smmu_cmdq_build_cmd(&cmd, &cmd_sync);
if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
- u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
+ u64p_replace_bits(&cmd.data[0], CMDQ_SYNC_0_CS_NONE,
+ CMDQ_SYNC_0_CS);
- queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
+ queue_write(Q_ENT(q, cons), cmd.data, q->ent_dwords);
}
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
@@ -767,7 +772,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
}
-static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
+static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_cmd *cmds,
u32 prod, int n)
{
int i;
@@ -777,10 +783,9 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
};
for (i = 0; i < n; ++i) {
- u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
-
prod = queue_inc_prod_n(&llq, i);
- queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
+ queue_write(Q_ENT(&cmdq->q, prod), cmds[i].data,
+ ARRAY_SIZE(cmds[i].data));
}
}
@@ -801,10 +806,11 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
* CPU will appear before any of the commands from the other CPU.
*/
int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
+ struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_cmd *cmds, int n,
bool sync)
{
- u64 cmd_sync[CMDQ_ENT_DWORDS];
+ struct arm_smmu_cmd cmd_sync;
u32 prod;
unsigned long flags;
bool owner;
@@ -847,8 +853,9 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
if (sync) {
prod = queue_inc_prod_n(&llq, n);
- arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
- queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
+ arm_smmu_cmdq_build_sync_cmd(&cmd_sync, smmu, cmdq, prod);
+ queue_write(Q_ENT(&cmdq->q, prod), cmd_sync.data,
+ ARRAY_SIZE(cmd_sync.data));
/*
* In order to determine completion of our CMD_SYNC, we must
@@ -925,7 +932,7 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
bool sync)
{
return arm_smmu_cmdq_issue_cmdlist(
- smmu, arm_smmu_get_cmdq(smmu, cmd), cmd->data, 1, sync);
+ smmu, arm_smmu_get_cmdq(smmu, cmd), cmd, 1, sync);
}
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -954,7 +961,7 @@ static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
{
struct arm_smmu_cmd cmd;
- arm_smmu_cmdq_build_cmd(cmd.data, ent);
+ arm_smmu_cmdq_build_cmd(&cmd, ent);
arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
}
@@ -966,9 +973,8 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
struct arm_smmu_cmd cmd;
bool unsupported_cmd;
- int index;
- if (unlikely(arm_smmu_cmdq_build_cmd(cmd.data, ent))) {
+ if (unlikely(arm_smmu_cmdq_build_cmd(&cmd, ent))) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
ent->opcode);
return;
@@ -987,9 +993,7 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
}
- index = cmds->num * CMDQ_ENT_DWORDS;
- memcpy(&cmds->cmds[index], cmd.data, sizeof(cmd.data));
- cmds->num++;
+ cmds->cmds[cmds->num++] = cmd;
}
static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
@@ -1025,7 +1029,7 @@ static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused
break;
}
- arm_smmu_cmdq_build_cmd(hw_cmd.data, &cmd);
+ arm_smmu_cmdq_build_cmd(&hw_cmd, &cmd);
arm_smmu_cmdq_issue_cmd(master->smmu, &hw_cmd);
/*
@@ -1865,7 +1869,7 @@ static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
};
struct arm_smmu_cmd cmd;
- arm_smmu_cmdq_build_cmd(cmd.data, &ent);
+ arm_smmu_cmdq_build_cmd(&cmd, &ent);
arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
}
@@ -1899,7 +1903,7 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
} };
struct arm_smmu_cmd prefetch_cmd;
- arm_smmu_cmdq_build_cmd(prefetch_cmd.data, &prefetch_ent);
+ arm_smmu_cmdq_build_cmd(&prefetch_cmd, &prefetch_ent);
arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
}
}
@@ -2339,7 +2343,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
};
struct arm_smmu_cmd cmd;
- arm_smmu_cmdq_build_cmd(cmd.data, &ent);
+ arm_smmu_cmdq_build_cmd(&cmd, &ent);
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
}
}
@@ -3462,7 +3466,7 @@ static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv)
}
cmd.opcode = inv->nsize_opcode;
- arm_smmu_cmdq_build_cmd(hw_cmd.data, &cmd);
+ arm_smmu_cmdq_build_cmd(&hw_cmd, &cmd);
arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, &hw_cmd);
}
@@ -4875,18 +4879,18 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
/* Invalidate any cached configuration */
ent.opcode = CMDQ_OP_CFGI_ALL;
- arm_smmu_cmdq_build_cmd(cmd.data, &ent);
+ arm_smmu_cmdq_build_cmd(&cmd, &ent);
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
/* Invalidate any stale TLB entries */
if (smmu->features & ARM_SMMU_FEAT_HYP) {
ent.opcode = CMDQ_OP_TLBI_EL2_ALL;
- arm_smmu_cmdq_build_cmd(cmd.data, &ent);
+ arm_smmu_cmdq_build_cmd(&cmd, &ent);
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
ent.opcode = CMDQ_OP_TLBI_NSNH_ALL;
- arm_smmu_cmdq_build_cmd(cmd.data, &ent);
+ arm_smmu_cmdq_build_cmd(&cmd, &ent);
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
/* Event queue */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 6d73f6b63e64a9..1fe6917448b774 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -651,7 +651,7 @@ static inline bool arm_smmu_cmdq_supports_cmd(struct arm_smmu_cmdq *cmdq,
}
struct arm_smmu_cmdq_batch {
- u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
+ struct arm_smmu_cmd cmds[CMDQ_BATCH_ENTRIES];
struct arm_smmu_cmdq *cmdq;
int num;
};
@@ -1148,7 +1148,8 @@ void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
const struct arm_smmu_ste *target);
int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
+ struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_cmd *cmds, int n,
bool sync);
#ifdef CONFIG_ARM_SMMU_V3_SVA
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index b4d8c1f2fd3878..67be62a6e7640a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -427,16 +427,16 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu,
static void tegra241_vcmdq_hw_flush_timeout(struct tegra241_vcmdq *vcmdq)
{
struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu;
- u64 cmd_sync[CMDQ_ENT_DWORDS] = {};
+ struct arm_smmu_cmd cmd_sync = {};
- cmd_sync[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) |
- FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
+ cmd_sync.data[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) |
+ FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
/*
* It does not hurt to insert another CMD_SYNC, taking advantage of the
* arm_smmu_cmdq_issue_cmdlist() that waits for the CMD_SYNC completion.
*/
- arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, cmd_sync, 1, true);
+ arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, &cmd_sync, 1, true);
}
/* This function is for LVCMDQ, so @vcmdq must not be unmapped yet */
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 5/9] iommu/arm-smmu-v3: Remove CMDQ_OP_CFGI_CD_ALL from arm_smmu_cmdq_build_cmd()
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
` (3 preceding siblings ...)
2026-05-01 14:29 ` [PATCH 4/9] iommu/arm-smmu-v3: Convert arm_smmu_cmdq_batch cmds to struct arm_smmu_cmd Jason Gunthorpe
@ 2026-05-01 14:29 ` Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 6/9] iommu/arm-smmu-v3: Directly encode simple commands Jason Gunthorpe
` (3 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2026-05-01 14:29 UTC (permalink / raw)
To: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon
Cc: David Matlack, Pasha Tatashin, patches, Samiullah Khawaja,
Mostafa Saleh
Nothing uses this, just dead code.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index b3ef001ce80d23..ef0907b1a2204f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -290,9 +290,6 @@ static int arm_smmu_cmdq_build_cmd(struct arm_smmu_cmd *cmd_out,
cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
break;
- case CMDQ_OP_CFGI_CD_ALL:
- cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
- break;
case CMDQ_OP_CFGI_ALL:
/* Cover the entire SID range */
cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 6/9] iommu/arm-smmu-v3: Directly encode simple commands
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
` (4 preceding siblings ...)
2026-05-01 14:29 ` [PATCH 5/9] iommu/arm-smmu-v3: Remove CMDQ_OP_CFGI_CD_ALL from arm_smmu_cmdq_build_cmd() Jason Gunthorpe
@ 2026-05-01 14:29 ` Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 7/9] iommu/arm-smmu-v3: Directly encode CMDQ_OP_ATC_INV Jason Gunthorpe
` (2 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2026-05-01 14:29 UTC (permalink / raw)
To: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon
Cc: David Matlack, Pasha Tatashin, patches, Samiullah Khawaja,
Mostafa Saleh
Add make functions to build commands for
CMDQ_OP_TLBI_EL2_ALL
CMDQ_OP_TLBI_NSNH_ALL
CMDQ_OP_CFGI_ALL
CMDQ_OP_PREFETCH_CFG
CMDQ_OP_CFGI_STE
CMDQ_OP_CFGI_CD
CMDQ_OP_RESUME
CMDQ_OP_PRI_RESP
Convert all of these call sites to use the make function instead of
going through arm_smmu_cmdq_build_cmd(). Use a #define so the general
pattern is always:
arm_smmu_cmdq_issue_cmd(smmu, arm_smmu_make_cmd_XX(..));
Add arm_smmu_cmdq_batch_add_cmd() which takes struct arm_smmu_cmd
directly to match the new flow.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 213 +++++++-------------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 109 +++++++---
2 files changed, 151 insertions(+), 171 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index ef0907b1a2204f..f9c25ca9a9e7b8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -277,23 +277,6 @@ static int arm_smmu_cmdq_build_cmd(struct arm_smmu_cmd *cmd_out,
cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
switch (ent->opcode) {
- case CMDQ_OP_TLBI_EL2_ALL:
- case CMDQ_OP_TLBI_NSNH_ALL:
- break;
- case CMDQ_OP_PREFETCH_CFG:
- cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
- break;
- case CMDQ_OP_CFGI_CD:
- cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
- fallthrough;
- case CMDQ_OP_CFGI_STE:
- cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
- cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
- break;
- case CMDQ_OP_CFGI_ALL:
- /* Cover the entire SID range */
- cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
- break;
case CMDQ_OP_TLBI_NH_VA:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
fallthrough;
@@ -333,26 +316,6 @@ static int arm_smmu_cmdq_build_cmd(struct arm_smmu_cmd *cmd_out,
cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
break;
- case CMDQ_OP_PRI_RESP:
- cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
- cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
- cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
- cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
- switch (ent->pri.resp) {
- case PRI_RESP_DENY:
- case PRI_RESP_FAIL:
- case PRI_RESP_SUCC:
- break;
- default:
- return -EINVAL;
- }
- cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
- break;
- case CMDQ_OP_RESUME:
- cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
- cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
- cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
- break;
case CMDQ_OP_CMD_SYNC:
if (ent->sync.msiaddr) {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
@@ -924,25 +887,24 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
return ret;
}
-static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
- struct arm_smmu_cmd *cmd,
- bool sync)
+static int arm_smmu_cmdq_issue_cmd_p(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmd *cmd, bool sync)
{
return arm_smmu_cmdq_issue_cmdlist(
smmu, arm_smmu_get_cmdq(smmu, cmd), cmd, 1, sync);
}
-static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
- struct arm_smmu_cmd *cmd)
-{
- return __arm_smmu_cmdq_issue_cmd(smmu, cmd, false);
-}
+#define arm_smmu_cmdq_issue_cmd(smmu, cmd) \
+ ({ \
+ struct arm_smmu_cmd __cmd = cmd; \
+ arm_smmu_cmdq_issue_cmd_p(smmu, &__cmd, false); \
+ })
-static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
- struct arm_smmu_cmd *cmd)
-{
- return __arm_smmu_cmdq_issue_cmd(smmu, cmd, true);
-}
+#define arm_smmu_cmdq_issue_cmd_with_sync(smmu, cmd) \
+ ({ \
+ struct arm_smmu_cmd __cmd = cmd; \
+ arm_smmu_cmdq_issue_cmd_p(smmu, &__cmd, true); \
+ })
static void arm_smmu_cmdq_batch_init_cmd(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
@@ -962,14 +924,41 @@ static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
}
+static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmd *cmd)
+{
+ bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
+ (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
+ bool unsupported_cmd;
+
+ unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
+ if (force_sync || unsupported_cmd) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, true);
+ arm_smmu_cmdq_batch_init_cmd(smmu, cmds, cmd);
+ }
+
+ if (cmds->num == CMDQ_BATCH_ENTRIES) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, false);
+ arm_smmu_cmdq_batch_init_cmd(smmu, cmds, cmd);
+ }
+
+ cmds->cmds[cmds->num++] = *cmd;
+}
+
+#define arm_smmu_cmdq_batch_add_cmd(smmu, cmds, cmd) \
+ ({ \
+ struct arm_smmu_cmd __cmd = cmd; \
+ arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, &__cmd); \
+ })
+
static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
struct arm_smmu_cmdq_ent *ent)
{
- bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
- (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
struct arm_smmu_cmd cmd;
- bool unsupported_cmd;
if (unlikely(arm_smmu_cmdq_build_cmd(&cmd, ent))) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
@@ -977,20 +966,7 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
return;
}
- unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, &cmd);
- if (force_sync || unsupported_cmd) {
- arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
- cmds->num, true);
- arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
- }
-
- if (cmds->num == CMDQ_BATCH_ENTRIES) {
- arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
- cmds->num, false);
- arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
- }
-
- cmds->cmds[cmds->num++] = cmd;
+ arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, &cmd);
}
static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
@@ -1003,32 +979,29 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
struct iommu_page_response *resp)
{
- struct arm_smmu_cmdq_ent cmd = {0};
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- int sid = master->streams[0].id;
- struct arm_smmu_cmd hw_cmd;
+ u8 resume_resp;
if (WARN_ON(!master->stall_enabled))
return;
- cmd.opcode = CMDQ_OP_RESUME;
- cmd.resume.sid = sid;
- cmd.resume.stag = resp->grpid;
switch (resp->code) {
case IOMMU_PAGE_RESP_INVALID:
case IOMMU_PAGE_RESP_FAILURE:
- cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
+ resume_resp = CMDQ_RESUME_0_RESP_ABORT;
break;
case IOMMU_PAGE_RESP_SUCCESS:
- cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
+ resume_resp = CMDQ_RESUME_0_RESP_RETRY;
break;
default:
+ resume_resp = CMDQ_RESUME_0_RESP_TERM;
break;
}
- arm_smmu_cmdq_build_cmd(&hw_cmd, &cmd);
- arm_smmu_cmdq_issue_cmd(master->smmu, &hw_cmd);
-
+ arm_smmu_cmdq_issue_cmd(master->smmu,
+ arm_smmu_make_cmd_resume(master->streams[0].id,
+ resp->grpid,
+ resume_resp));
/*
* Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
* RESUME consumption guarantees that the stalled transaction will be
@@ -1552,19 +1525,14 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
size_t i;
struct arm_smmu_cmdq_batch cmds;
struct arm_smmu_device *smmu = master->smmu;
- struct arm_smmu_cmdq_ent cmd = {
- .opcode = CMDQ_OP_CFGI_CD,
- .cfgi = {
- .ssid = ssid,
- .leaf = leaf,
- },
- };
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_cfgi_cd(0, ssid, leaf);
- arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
- for (i = 0; i < master->num_streams; i++) {
- cmd.cfgi.sid = master->streams[i].id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
- }
+ arm_smmu_cmdq_batch_init_cmd(smmu, &cmds, &cmd);
+ for (i = 0; i < master->num_streams; i++)
+ arm_smmu_cmdq_batch_add_cmd(
+ smmu, &cmds,
+ arm_smmu_make_cmd_cfgi_cd(master->streams[i].id, ssid,
+ leaf));
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
@@ -1857,17 +1825,10 @@ static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
{
struct arm_smmu_ste_writer *ste_writer =
container_of(writer, struct arm_smmu_ste_writer, writer);
- struct arm_smmu_cmdq_ent ent = {
- .opcode = CMDQ_OP_CFGI_STE,
- .cfgi = {
- .sid = ste_writer->sid,
- .leaf = true,
- },
- };
- struct arm_smmu_cmd cmd;
- arm_smmu_cmdq_build_cmd(&cmd, &ent);
- arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(
+ writer->master->smmu,
+ arm_smmu_make_cmd_cfgi_ste(ste_writer->sid, true));
}
static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
@@ -1892,17 +1853,9 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
/* It's likely that we'll want to use the new STE soon */
- if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
- struct arm_smmu_cmdq_ent
- prefetch_ent = { .opcode = CMDQ_OP_PREFETCH_CFG,
- .prefetch = {
- .sid = sid,
- } };
- struct arm_smmu_cmd prefetch_cmd;
-
- arm_smmu_cmdq_build_cmd(&prefetch_cmd, &prefetch_ent);
- arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
- }
+ if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
+ arm_smmu_cmdq_issue_cmd(smmu,
+ arm_smmu_make_cmd_prefetch_cfg(sid));
}
void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
@@ -2327,22 +2280,10 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
evt[1] & PRIQ_1_ADDR_MASK);
- if (last) {
- struct arm_smmu_cmdq_ent ent = {
- .opcode = CMDQ_OP_PRI_RESP,
- .substream_valid = ssv,
- .pri = {
- .sid = sid,
- .ssid = ssid,
- .grpid = grpid,
- .resp = PRI_RESP_DENY,
- },
- };
- struct arm_smmu_cmd cmd;
-
- arm_smmu_cmdq_build_cmd(&cmd, &ent);
- arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- }
+ if (last)
+ arm_smmu_cmdq_issue_cmd(
+ smmu, arm_smmu_make_cmd_pri_resp(sid, ssid, ssv, grpid,
+ PRI_RESP_DENY));
}
static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
@@ -3464,7 +3405,7 @@ static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv)
cmd.opcode = inv->nsize_opcode;
arm_smmu_cmdq_build_cmd(&hw_cmd, &cmd);
- arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, &hw_cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, hw_cmd);
}
/* Should be installed after arm_smmu_install_ste_for_dev() */
@@ -4827,8 +4768,6 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
{
int ret;
u32 reg, enables;
- struct arm_smmu_cmdq_ent ent;
- struct arm_smmu_cmd cmd;
/* Clear CR0 and sync (disables SMMU and queue processing) */
reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
@@ -4875,20 +4814,16 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
}
/* Invalidate any cached configuration */
- ent.opcode = CMDQ_OP_CFGI_ALL;
- arm_smmu_cmdq_build_cmd(&cmd, &ent);
- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, arm_smmu_make_cmd_cfgi_all());
/* Invalidate any stale TLB entries */
if (smmu->features & ARM_SMMU_FEAT_HYP) {
- ent.opcode = CMDQ_OP_TLBI_EL2_ALL;
- arm_smmu_cmdq_build_cmd(&cmd, &ent);
- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(
+ smmu, arm_smmu_make_cmd_op(CMDQ_OP_TLBI_EL2_ALL));
}
- ent.opcode = CMDQ_OP_TLBI_NSNH_ALL;
- arm_smmu_cmdq_build_cmd(&cmd, &ent);
- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(
+ smmu, arm_smmu_make_cmd_op(CMDQ_OP_TLBI_NSNH_ALL));
/* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 1fe6917448b774..10b3d95d9ee660 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -437,6 +437,12 @@ struct arm_smmu_cmd {
#define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
#define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
+enum pri_resp {
+ PRI_RESP_DENY = 0,
+ PRI_RESP_FAIL = 1,
+ PRI_RESP_SUCC = 2,
+};
+
#define CMDQ_RESUME_0_RESP_TERM 0UL
#define CMDQ_RESUME_0_RESP_RETRY 1UL
#define CMDQ_RESUME_0_RESP_ABORT 2UL
@@ -475,6 +481,77 @@ enum arm_smmu_cmdq_opcode {
CMDQ_OP_CMD_SYNC = 0x46,
};
+static inline struct arm_smmu_cmd
+arm_smmu_make_cmd_op(enum arm_smmu_cmdq_opcode op)
+{
+ struct arm_smmu_cmd cmd = {};
+
+ cmd.data[0] = FIELD_PREP(CMDQ_0_OP, op);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_cfgi_all(void)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_CFGI_ALL);
+
+ cmd.data[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_prefetch_cfg(u32 sid)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_PREFETCH_CFG);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, sid);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_cfgi_ste(u32 sid, bool leaf)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_CFGI_STE);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
+ cmd.data[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, leaf);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_cfgi_cd(u32 sid, u32 ssid,
+ bool leaf)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_CFGI_CD);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid) |
+ FIELD_PREP(CMDQ_CFGI_0_SSID, ssid);
+ cmd.data[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, leaf);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_resume(u32 sid, u16 stag,
+ u8 resp)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_RESUME);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, sid) |
+ FIELD_PREP(CMDQ_RESUME_0_RESP, resp);
+ cmd.data[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, stag);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_pri_resp(u32 sid, u32 ssid,
+ bool ssv,
+ u16 grpid,
+ enum pri_resp resp)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_PRI_RESP);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_0_SSV, ssv) |
+ FIELD_PREP(CMDQ_PRI_0_SID, sid) |
+ FIELD_PREP(CMDQ_PRI_0_SSID, ssid);
+ cmd.data[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, grpid) |
+ FIELD_PREP(CMDQ_PRI_1_RESP, resp);
+ return cmd;
+}
+
/* Event queue */
#define EVTQ_ENT_SZ_SHIFT 5
#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
@@ -535,12 +612,6 @@ enum arm_smmu_cmdq_opcode {
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
-enum pri_resp {
- PRI_RESP_DENY = 0,
- PRI_RESP_FAIL = 1,
- PRI_RESP_SUCC = 2,
-};
-
struct arm_smmu_cmdq_ent {
/* Common fields */
u8 opcode;
@@ -548,19 +619,6 @@ struct arm_smmu_cmdq_ent {
/* Command-specific fields */
union {
- struct {
- u32 sid;
- } prefetch;
-
- struct {
- u32 sid;
- u32 ssid;
- union {
- bool leaf;
- u8 span;
- };
- } cfgi;
-
struct {
u8 num;
u8 scale;
@@ -580,19 +638,6 @@ struct arm_smmu_cmdq_ent {
bool global;
} atc;
- struct {
- u32 sid;
- u32 ssid;
- u16 grpid;
- enum pri_resp resp;
- } pri;
-
- struct {
- u32 sid;
- u16 stag;
- u8 resp;
- } resume;
-
struct {
u64 msiaddr;
} sync;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 7/9] iommu/arm-smmu-v3: Directly encode CMDQ_OP_ATC_INV
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
` (5 preceding siblings ...)
2026-05-01 14:29 ` [PATCH 6/9] iommu/arm-smmu-v3: Directly encode simple commands Jason Gunthorpe
@ 2026-05-01 14:29 ` Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 8/9] iommu/arm-smmu-v3: Directly encode CMDQ_OP_SYNC Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 9/9] iommu/arm-smmu-v3: Directly encode TLBI commands Jason Gunthorpe
8 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2026-05-01 14:29 UTC (permalink / raw)
To: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon
Cc: David Matlack, Pasha Tatashin, patches, Samiullah Khawaja,
Mostafa Saleh
Add a new command make function and convert all the places using
ATC_INV.
Split out full invalidation to directly make the cmd instead of
overloading size=0 to mean full invalidation.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 59 ++++++++-------------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 27 +++++++---
2 files changed, 40 insertions(+), 46 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index f9c25ca9a9e7b8..0cdf0752ff6d62 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -308,14 +308,6 @@ static int arm_smmu_cmdq_build_cmd(struct arm_smmu_cmd *cmd_out,
case CMDQ_OP_TLBI_EL2_ASID:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
break;
- case CMDQ_OP_ATC_INV:
- cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
- cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
- cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
- cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
- cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
- cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
- break;
case CMDQ_OP_CMD_SYNC:
if (ent->sync.msiaddr) {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
@@ -2371,9 +2363,8 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
return IRQ_WAKE_THREAD;
}
-static void
-arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
- struct arm_smmu_cmdq_ent *cmd)
+static struct arm_smmu_cmd
+arm_smmu_atc_inv_to_cmd(u32 sid, int ssid, unsigned long iova, size_t size)
{
size_t log2_span;
size_t span_mask;
@@ -2395,17 +2386,6 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
* This has the unpleasant side-effect of invalidating all PASID-tagged
* ATC entries within the address range.
*/
- *cmd = (struct arm_smmu_cmdq_ent) {
- .opcode = CMDQ_OP_ATC_INV,
- .substream_valid = (ssid != IOMMU_NO_PASID),
- .atc.ssid = ssid,
- };
-
- if (!size) {
- cmd->atc.size = ATC_INV_SIZE_ALL;
- return;
- }
-
page_start = iova >> inval_grain_shift;
page_end = (iova + size - 1) >> inval_grain_shift;
@@ -2434,24 +2414,25 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
page_start &= ~span_mask;
- cmd->atc.addr = page_start << inval_grain_shift;
- cmd->atc.size = log2_span;
+ return arm_smmu_make_cmd_atc_inv(sid, ssid,
+ page_start << inval_grain_shift,
+ log2_span);
}
static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
ioasid_t ssid)
{
int i;
- struct arm_smmu_cmdq_ent cmd;
+ struct arm_smmu_cmd cmd;
struct arm_smmu_cmdq_batch cmds;
- arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
-
- arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
- for (i = 0; i < master->num_streams; i++) {
- cmd.atc.sid = master->streams[i].id;
- arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
- }
+ cmd = arm_smmu_make_cmd_atc_inv_all(0, IOMMU_NO_PASID);
+ arm_smmu_cmdq_batch_init_cmd(master->smmu, &cmds, &cmd);
+ for (i = 0; i < master->num_streams; i++)
+ arm_smmu_cmdq_batch_add_cmd(
+ master->smmu, &cmds,
+ arm_smmu_make_cmd_atc_inv_all(master->streams[i].id,
+ ssid));
return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
}
@@ -2650,14 +2631,16 @@ static void __arm_smmu_domain_inv_range(struct arm_smmu_invs *invs,
arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
break;
case INV_TYPE_ATS:
- arm_smmu_atc_inv_to_cmd(cur->ssid, iova, size, &cmd);
- cmd.atc.sid = cur->id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ arm_smmu_cmdq_batch_add_cmd(
+ smmu, &cmds,
+ arm_smmu_atc_inv_to_cmd(cur->id, cur->ssid,
+ iova, size));
break;
case INV_TYPE_ATS_FULL:
- arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
- cmd.atc.sid = cur->id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ arm_smmu_cmdq_batch_add_cmd(
+ smmu, &cmds,
+ arm_smmu_make_cmd_atc_inv_all(cur->id,
+ IOMMU_NO_PASID));
break;
default:
WARN_ON_ONCE(1);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 10b3d95d9ee660..194f73cabef5c9 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -552,6 +552,25 @@ static inline struct arm_smmu_cmd arm_smmu_make_cmd_pri_resp(u32 sid, u32 ssid,
return cmd;
}
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_atc_inv(u32 sid, u32 ssid,
+ u64 addr, u8 size)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_ATC_INV);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_0_SSV, ssid != IOMMU_NO_PASID) |
+ FIELD_PREP(CMDQ_ATC_0_SSID, ssid) |
+ FIELD_PREP(CMDQ_ATC_0_SID, sid);
+ cmd.data[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, size) |
+ (addr & CMDQ_ATC_1_ADDR_MASK);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_atc_inv_all(u32 sid,
+ u32 ssid)
+{
+ return arm_smmu_make_cmd_atc_inv(sid, ssid, 0, ATC_INV_SIZE_ALL);
+}
+
/* Event queue */
#define EVTQ_ENT_SZ_SHIFT 5
#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
@@ -630,14 +649,6 @@ struct arm_smmu_cmdq_ent {
u64 addr;
} tlbi;
- struct {
- u32 sid;
- u32 ssid;
- u64 addr;
- u8 size;
- bool global;
- } atc;
-
struct {
u64 msiaddr;
} sync;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 8/9] iommu/arm-smmu-v3: Directly encode CMDQ_OP_SYNC
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
` (6 preceding siblings ...)
2026-05-01 14:29 ` [PATCH 7/9] iommu/arm-smmu-v3: Directly encode CMDQ_OP_ATC_INV Jason Gunthorpe
@ 2026-05-01 14:29 ` Jason Gunthorpe
2026-05-01 14:29 ` [PATCH 9/9] iommu/arm-smmu-v3: Directly encode TLBI commands Jason Gunthorpe
8 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2026-05-01 14:29 UTC (permalink / raw)
To: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon
Cc: David Matlack, Pasha Tatashin, patches, Samiullah Khawaja,
Mostafa Saleh
Change the flow so the caller controls the CS field and remove the
weird u64p_replace_bits() thing to override it.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 42 ++++++++-------------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 16 ++++++--
2 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 0cdf0752ff6d62..8147b9cdcc6b99 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -308,16 +308,6 @@ static int arm_smmu_cmdq_build_cmd(struct arm_smmu_cmd *cmd_out,
case CMDQ_OP_TLBI_EL2_ASID:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
break;
- case CMDQ_OP_CMD_SYNC:
- if (ent->sync.msiaddr) {
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
- cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
- } else {
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
- }
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
- break;
default:
return -ENOENT;
}
@@ -350,23 +340,24 @@ static void arm_smmu_cmdq_build_sync_cmd(struct arm_smmu_cmd *cmd,
struct arm_smmu_cmdq *cmdq, u32 prod)
{
struct arm_smmu_queue *q = &cmdq->q;
- struct arm_smmu_cmdq_ent ent = {
- .opcode = CMDQ_OP_CMD_SYNC,
- };
+ u64 msiaddr = 0;
+ unsigned int cs;
/*
* Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
* payload, so the write will zero the entire command on that platform.
*/
- if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
- ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
- q->ent_dwords * 8;
+ if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) {
+ cs = CMDQ_SYNC_0_CS_NONE;
+ } else if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
+ cs = CMDQ_SYNC_0_CS_IRQ;
+ msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
+ q->ent_dwords * 8;
+ } else {
+ cs = CMDQ_SYNC_0_CS_SEV;
}
- arm_smmu_cmdq_build_cmd(cmd, &ent);
- if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
- u64p_replace_bits(&cmd->data[0], CMDQ_SYNC_0_CS_NONE,
- CMDQ_SYNC_0_CS);
+ *cmd = arm_smmu_make_cmd_sync(cs, msiaddr);
}
void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
@@ -383,9 +374,6 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
struct arm_smmu_cmd cmd;
u32 cons = readl_relaxed(q->cons_reg);
u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
- struct arm_smmu_cmdq_ent cmd_sync = {
- .opcode = CMDQ_OP_CMD_SYNC,
- };
dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
@@ -419,10 +407,10 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd.data[i]);
/* Convert the erroneous command into a CMD_SYNC */
- arm_smmu_cmdq_build_cmd(&cmd, &cmd_sync);
- if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
- u64p_replace_bits(&cmd.data[0], CMDQ_SYNC_0_CS_NONE,
- CMDQ_SYNC_0_CS);
+ cmd = arm_smmu_make_cmd_sync(
+ arm_smmu_cmdq_needs_busy_polling(smmu, cmdq) ?
+ CMDQ_SYNC_0_CS_NONE : CMDQ_SYNC_0_CS_SEV,
+ 0);
queue_write(Q_ENT(q, cons), cmd.data, q->ent_dwords);
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 194f73cabef5c9..538380de7d48a0 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -571,6 +571,18 @@ static inline struct arm_smmu_cmd arm_smmu_make_cmd_atc_inv_all(u32 sid,
return arm_smmu_make_cmd_atc_inv(sid, ssid, 0, ATC_INV_SIZE_ALL);
}
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_sync(unsigned int cs,
+ u64 msiaddr)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_CMD_SYNC);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, cs) |
+ FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
+ FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+ cmd.data[1] |= msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
+ return cmd;
+}
+
/* Event queue */
#define EVTQ_ENT_SZ_SHIFT 5
#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
@@ -648,10 +660,6 @@ struct arm_smmu_cmdq_ent {
u8 tg;
u64 addr;
} tlbi;
-
- struct {
- u64 msiaddr;
- } sync;
};
};
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 9/9] iommu/arm-smmu-v3: Directly encode TLBI commands
2026-05-01 14:29 [PATCH 0/9] Remove SMMUv3 struct arm_smmu_cmdq_ent Jason Gunthorpe
` (7 preceding siblings ...)
2026-05-01 14:29 ` [PATCH 8/9] iommu/arm-smmu-v3: Directly encode CMDQ_OP_SYNC Jason Gunthorpe
@ 2026-05-01 14:29 ` Jason Gunthorpe
8 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2026-05-01 14:29 UTC (permalink / raw)
To: iommu, Jonathan Hunter, Joerg Roedel, linux-arm-kernel,
linux-tegra, Robin Murphy, Thierry Reding, Krishna Reddy,
Will Deacon
Cc: David Matlack, Pasha Tatashin, patches, Samiullah Khawaja,
Mostafa Saleh
TLBI is more complicated than all the other commands because the
invalidation loop builds a template command from the struct
arm_smmu_inv which is then expanded into many TLBI commands for the
invalidation.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 170 +++++++-------------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 35 ++--
2 files changed, 71 insertions(+), 134 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 8147b9cdcc6b99..9be589d14a3bd4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -268,53 +268,6 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
}
/* High-level queue accessors */
-static int arm_smmu_cmdq_build_cmd(struct arm_smmu_cmd *cmd_out,
- struct arm_smmu_cmdq_ent *ent)
-{
- u64 *cmd = cmd_out->data;
-
- memset(cmd_out, 0, sizeof(*cmd_out));
- cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
-
- switch (ent->opcode) {
- case CMDQ_OP_TLBI_NH_VA:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
- fallthrough;
- case CMDQ_OP_TLBI_EL2_VA:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
- cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
- break;
- case CMDQ_OP_TLBI_S2_IPA:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
- cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
- break;
- case CMDQ_OP_TLBI_NH_ASID:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
- fallthrough;
- case CMDQ_OP_TLBI_NH_ALL:
- case CMDQ_OP_TLBI_S12_VMALL:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
- break;
- case CMDQ_OP_TLBI_EL2_ASID:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
- break;
- default:
- return -ENOENT;
- }
-
- return 0;
-}
-
static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
struct arm_smmu_cmd *cmd)
{
@@ -894,16 +847,6 @@ static void arm_smmu_cmdq_batch_init_cmd(struct arm_smmu_device *smmu,
cmds->cmdq = arm_smmu_get_cmdq(smmu, cmd);
}
-static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_batch *cmds,
- struct arm_smmu_cmdq_ent *ent)
-{
- struct arm_smmu_cmd cmd;
-
- arm_smmu_cmdq_build_cmd(&cmd, ent);
- arm_smmu_cmdq_batch_init_cmd(smmu, cmds, &cmd);
-}
-
static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
struct arm_smmu_cmd *cmd)
@@ -934,21 +877,6 @@ static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, &__cmd); \
})
-static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_batch *cmds,
- struct arm_smmu_cmdq_ent *ent)
-{
- struct arm_smmu_cmd cmd;
-
- if (unlikely(arm_smmu_cmdq_build_cmd(&cmd, ent))) {
- dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
- ent->opcode);
- return;
- }
-
- arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, &cmd);
-}
-
static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds)
{
@@ -2450,12 +2378,14 @@ static void arm_smmu_tlb_inv_context(void *cookie)
static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
- struct arm_smmu_cmdq_ent *cmd,
+ struct arm_smmu_cmd *cmd, bool leaf,
unsigned long iova, size_t size,
size_t granule, size_t pgsize)
{
unsigned long end = iova + size, num_pages = 0, tg = pgsize;
+ u64 orig_data0 = cmd->data[0];
size_t inv_range = granule;
+ u8 ttl = 0, tg_enc = 0;
if (WARN_ON_ONCE(!size))
return;
@@ -2464,7 +2394,7 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
num_pages = size >> tg;
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
- cmd->tlbi.tg = (tg - 10) / 2;
+ tg_enc = (tg - 10) / 2;
/*
* Determine what level the granule is at. For non-leaf, both
@@ -2474,8 +2404,8 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
* want to use a range command, so avoid the SVA corner case
* where both scale and num could be 0 as well.
*/
- if (cmd->tlbi.leaf)
- cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
+ if (leaf)
+ ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
num_pages++;
}
@@ -2493,11 +2423,13 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
/* Determine the power of 2 multiple number of pages */
scale = __ffs(num_pages);
- cmd->tlbi.scale = scale;
/* Determine how many chunks of 2^scale size we have */
num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
- cmd->tlbi.num = num - 1;
+
+ cmd->data[0] = orig_data0 |
+ FIELD_PREP(CMDQ_TLBI_0_NUM, num - 1) |
+ FIELD_PREP(CMDQ_TLBI_0_SCALE, scale);
/* range is num * 2^scale * pgsize */
inv_range = num << (scale + tg);
@@ -2506,8 +2438,17 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
num_pages -= num << scale;
}
- cmd->tlbi.addr = iova;
- arm_smmu_cmdq_batch_add(smmu, cmds, cmd);
+ /*
+ * IPA has fewer bits than VA, but they are reserved in the
+ * command and something would be very broken if iova had them
+ * set.
+ */
+ cmd->data[1] = FIELD_PREP(CMDQ_TLBI_1_LEAF, leaf) |
+ FIELD_PREP(CMDQ_TLBI_1_TTL, ttl) |
+ FIELD_PREP(CMDQ_TLBI_1_TG, tg_enc) |
+ (iova & ~GENMASK_U64(11, 0));
+
+ arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, cmd);
iova += inv_range;
}
}
@@ -2538,19 +2479,22 @@ static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size,
/* Used by non INV_TYPE_ATS* invalidations */
static void arm_smmu_inv_to_cmdq_batch(struct arm_smmu_inv *inv,
struct arm_smmu_cmdq_batch *cmds,
- struct arm_smmu_cmdq_ent *cmd,
+ struct arm_smmu_cmd *cmd,
+ bool leaf,
unsigned long iova, size_t size,
unsigned int granule)
{
if (arm_smmu_inv_size_too_big(inv->smmu, size, granule)) {
- cmd->opcode = inv->nsize_opcode;
- arm_smmu_cmdq_batch_add(inv->smmu, cmds, cmd);
+ struct arm_smmu_cmd nsize_cmd = *cmd;
+
+ u64p_replace_bits(&nsize_cmd.data[0], inv->nsize_opcode,
+ CMDQ_0_OP);
+ arm_smmu_cmdq_batch_add_cmd_p(inv->smmu, cmds, &nsize_cmd);
return;
}
- cmd->opcode = inv->size_opcode;
- arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, iova, size, granule,
- inv->pgsize);
+ arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, leaf,
+ iova, size, granule, inv->pgsize);
}
static inline bool arm_smmu_invs_end_batch(struct arm_smmu_inv *cur,
@@ -2585,38 +2529,39 @@ static void __arm_smmu_domain_inv_range(struct arm_smmu_invs *invs,
break;
while (cur != end) {
struct arm_smmu_device *smmu = cur->smmu;
- struct arm_smmu_cmdq_ent cmd = {
- /*
- * Pick size_opcode to run arm_smmu_get_cmdq(). This can
- * be changed to nsize_opcode, which would result in the
- * same CMDQ pointer.
- */
- .opcode = cur->size_opcode,
- };
+ /*
+ * Pick size_opcode to run arm_smmu_get_cmdq(). This can
+ * be changed to nsize_opcode, which would result in the
+ * same CMDQ pointer.
+ */
+ struct arm_smmu_cmd cmd =
+ arm_smmu_make_cmd_op(cur->size_opcode);
struct arm_smmu_inv *next;
if (!cmds.num)
- arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
+ arm_smmu_cmdq_batch_init_cmd(smmu, &cmds, &cmd);
switch (cur->type) {
case INV_TYPE_S1_ASID:
- cmd.tlbi.asid = cur->id;
- cmd.tlbi.leaf = leaf;
- arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,
- granule);
+ cmd = arm_smmu_make_cmd_tlbi(cur->size_opcode,
+ cur->id, 0);
+ arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, leaf,
+ iova, size, granule);
break;
case INV_TYPE_S2_VMID:
- cmd.tlbi.vmid = cur->id;
- cmd.tlbi.leaf = leaf;
- arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,
- granule);
+ cmd = arm_smmu_make_cmd_tlbi(cur->size_opcode,
+ 0, cur->id);
+ arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, leaf,
+ iova, size, granule);
break;
case INV_TYPE_S2_VMID_S1_CLEAR:
/* CMDQ_OP_TLBI_S12_VMALL already flushed S1 entries */
if (arm_smmu_inv_size_too_big(cur->smmu, size, granule))
break;
- cmd.tlbi.vmid = cur->id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ arm_smmu_cmdq_batch_add_cmd(
+ smmu, &cmds,
+ arm_smmu_make_cmd_tlbi(cur->size_opcode, 0,
+ cur->id));
break;
case INV_TYPE_ATS:
arm_smmu_cmdq_batch_add_cmd(
@@ -3359,24 +3304,21 @@ arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state)
static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv)
{
- struct arm_smmu_cmdq_ent cmd = {};
- struct arm_smmu_cmd hw_cmd;
-
switch (inv->type) {
case INV_TYPE_S1_ASID:
- cmd.tlbi.asid = inv->id;
+ arm_smmu_cmdq_issue_cmd_with_sync(
+ inv->smmu,
+ arm_smmu_make_cmd_tlbi(inv->nsize_opcode, inv->id, 0));
break;
case INV_TYPE_S2_VMID:
/* S2_VMID using nsize_opcode covers S2_VMID_S1_CLEAR */
- cmd.tlbi.vmid = inv->id;
+ arm_smmu_cmdq_issue_cmd_with_sync(
+ inv->smmu,
+ arm_smmu_make_cmd_tlbi(inv->nsize_opcode, 0, inv->id));
break;
default:
return;
}
-
- cmd.opcode = inv->nsize_opcode;
- arm_smmu_cmdq_build_cmd(&hw_cmd, &cmd);
- arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, hw_cmd);
}
/* Should be installed after arm_smmu_install_ste_for_dev() */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 538380de7d48a0..16353596e08ad8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -583,6 +583,21 @@ static inline struct arm_smmu_cmd arm_smmu_make_cmd_sync(unsigned int cs,
return cmd;
}
+/*
+ * TLBI commands - the non-sized variants just need opcode + asid/vmid.
+ * For sized variants the caller sets up data[0] with the immutable fields
+ * (opcode + asid/vmid) and the range loop fills in per-iteration fields.
+ */
+static inline struct arm_smmu_cmd
+arm_smmu_make_cmd_tlbi(enum arm_smmu_cmdq_opcode op, u16 asid, u16 vmid)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(op);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, asid) |
+ FIELD_PREP(CMDQ_TLBI_0_VMID, vmid);
+ return cmd;
+}
+
/* Event queue */
#define EVTQ_ENT_SZ_SHIFT 5
#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
@@ -643,26 +658,6 @@ static inline struct arm_smmu_cmd arm_smmu_make_cmd_sync(unsigned int cs,
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
-struct arm_smmu_cmdq_ent {
- /* Common fields */
- u8 opcode;
- bool substream_valid;
-
- /* Command-specific fields */
- union {
- struct {
- u8 num;
- u8 scale;
- u16 asid;
- u16 vmid;
- bool leaf;
- u8 ttl;
- u8 tg;
- u64 addr;
- } tlbi;
- };
-};
-
struct arm_smmu_ll_queue {
union {
u64 val;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread