* [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU
@ 2025-12-29 0:23 Dawei Li
2026-01-02 18:41 ` Jason Gunthorpe
2026-01-05 13:33 ` Robin Murphy
0 siblings, 2 replies; 9+ messages in thread
From: Dawei Li @ 2025-12-29 0:23 UTC (permalink / raw)
To: will, robin.murphy, joro
Cc: jgg, linux-arm-kernel, iommu, linux-kernel, set_pte_at, Dawei Li,
stable
According to SMMUv3 architecture specification, IO-coherent access for
SMMU is supported for:
- Translation table walks.
- Fetches of L1STD, STE, L1CD and CD.
- Command queue, Event queue and PRI queue access.
- GERROR, CMD_SYNC, Event queue and PRI queue MSIs, if supported
In other words, if a SMMU implementation doesn't support IO coherent
access(SMMUIDR0.COHACC==0), all fields above accessed by SMMU is
expected to be non-coherent and S/W is supposed to maintain proper
access attributes to achieve expected behavior.
Unfortunately, for non-coherent SMMU implementation, current codebase
only takes care of translation table walking. Fix it by providing right
attributes(cacheability, shareablility, cache allocation hints, e.g) to
other fields.
Fixes: 4f41845b3407 ("iommu/io-pgtable: Replace IO_PGTABLE_QUIRK_NO_DMA with specific flag")
Fixes: 9e6ea59f3ff3 ("iommu/io-pgtable: Support non-coherent page tables")
Cc: stable@vger.kernel.org
Signed-off-by: Dawei Li <dawei.li@linux.dev>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 73 +++++++++++++++------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
2 files changed, 54 insertions(+), 20 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index d16d35c78c06..4f00bf53d26c 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -265,8 +265,14 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
return 0;
}
+static __always_inline bool smmu_coherent(struct arm_smmu_device *smmu)
+{
+ return !!(smmu->features & ARM_SMMU_FEAT_COHERENCY);
+}
+
/* High-level queue accessors */
-static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
+static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent,
+ struct arm_smmu_device *smmu)
{
memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
@@ -358,8 +364,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
} else {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
}
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+ if (smmu_coherent(smmu)) {
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+ } else {
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_OSH);
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OINC);
+ }
break;
default:
return -ENOENT;
@@ -405,7 +416,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
q->ent_dwords * 8;
}
- arm_smmu_cmdq_build_cmd(cmd, &ent);
+ arm_smmu_cmdq_build_cmd(cmd, &ent, smmu);
if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
}
@@ -461,7 +472,7 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
/* Convert the erroneous command into a CMD_SYNC */
- arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
+ arm_smmu_cmdq_build_cmd(cmd, &cmd_sync, smmu);
if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
@@ -913,7 +924,7 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
{
u64 cmd[CMDQ_ENT_DWORDS];
- if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
+ if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent, smmu))) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
ent->opcode);
return -EINVAL;
@@ -965,7 +976,7 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
}
index = cmds->num * CMDQ_ENT_DWORDS;
- if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
+ if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd, smmu))) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
cmd->opcode);
return;
@@ -1603,6 +1614,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
{
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
struct arm_smmu_device *smmu = master->smmu;
+ u64 val;
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
@@ -1612,11 +1624,18 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
+ if (smmu_coherent(smmu)) {
+ val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+ FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+ FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
+ } else {
+ val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_NC) |
+ FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_NC) |
+ FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_OSH);
+ }
+
target->data[1] = cpu_to_le64(
- FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
- FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
- FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
- FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
+ FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) | val |
((smmu->features & ARM_SMMU_FEAT_STALLS &&
!master->stall_enabled) ?
STRTAB_STE_1_S1STALLD :
@@ -3746,7 +3765,7 @@ int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
q->cons_reg = page + cons_off;
q->ent_dwords = dwords;
- q->q_base = Q_BASE_RWA;
+ q->q_base = smmu_coherent(smmu) ? Q_BASE_RWA : 0;
q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
@@ -4093,6 +4112,7 @@ static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
dma_addr_t dma;
u32 reg;
+ u64 val;
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
@@ -4107,8 +4127,12 @@ static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
dma = cfg->linear.ste_dma;
}
- writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
- smmu->base + ARM_SMMU_STRTAB_BASE);
+
+ val = dma & STRTAB_BASE_ADDR_MASK;
+ if (smmu_coherent(smmu))
+ val |= STRTAB_BASE_RA;
+
+ writeq_relaxed(val, smmu->base + ARM_SMMU_STRTAB_BASE);
writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
}
@@ -4130,12 +4154,21 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
return ret;
/* CR1 (table and queue memory attributes) */
- reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
- FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
- FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
- FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
- FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
- FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
+ if (smmu_coherent(smmu)) {
+ reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
+ FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
+ FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
+ FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
+ FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
+ FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
+ } else {
+ reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_OSH) |
+ FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_NC) |
+ FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_NC) |
+ FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_OSH) |
+ FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_NC) |
+ FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_NC);
+ }
writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
/* CR2 (random crap) */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index ae23aacc3840..7a5f76e165dc 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -183,6 +183,7 @@ struct arm_vsmmu;
#define ARM_SMMU_SH_ISH 3
#define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
#define ARM_SMMU_MEMATTR_OIWB 0xf
+#define ARM_SMMU_MEMATTR_OINC 0x5
#define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
#define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
--
2.25.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU
2025-12-29 0:23 [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU Dawei Li
@ 2026-01-02 18:41 ` Jason Gunthorpe
2026-01-04 13:35 ` Dawei Li
2026-01-05 13:33 ` Robin Murphy
1 sibling, 1 reply; 9+ messages in thread
From: Jason Gunthorpe @ 2026-01-02 18:41 UTC (permalink / raw)
To: Dawei Li
Cc: will, robin.murphy, joro, linux-arm-kernel, iommu, linux-kernel,
set_pte_at, stable
On Mon, Dec 29, 2025 at 08:23:54AM +0800, Dawei Li wrote:
> According to SMMUv3 architecture specification, IO-coherent access for
> SMMU is supported for:
> - Translation table walks.
> - Fetches of L1STD, STE, L1CD and CD.
> - Command queue, Event queue and PRI queue access.
> - GERROR, CMD_SYNC, Event queue and PRI queue MSIs, if supported
I was recently looking at this too.. IMHO this is not really a clean
description of what this patch is doing.
I would write this description as:
When the SMMU does a DMA for itself it can set various memory access
attributes which control how the interconnect should execute the
DMA. Linux uses these to differentiate DMA that must snoop the cache
and DMA that must bypass it because Linux has allocated non-coherent
on the CPU.
In Table "13.8 Attributes for SMMU-originated accesses" each of the
different types of DMA is categorized and the specific bits
controlling the memory attribute for the fetch are identified.
Make this consisent globally. If Linux has cache flushed the buffer,
or allocated a DMA incoherenet buffer, then it should set the
non-caching memory attribute so the DMA matches.
This is important for some of the allocations where Linux is currently
allocating DMA coherent memory, meaning nothing has made the CPU cache
coherent and doing any coherent access to that memory may result in
cache inconsistencies.
This may solve problems in systems where the SMMU driver thinks the
SMMU is non-coherent, but in fact, the SMMU and the interconnect
selectively supports coherence and setting the wrong memory attributes
will cause non-working cached access.
[and then if you have a specific SOC that shows an issue please
describe the HW]
> +static __always_inline bool smmu_coherent(struct arm_smmu_device *smmu)
> +{
> + return !!(smmu->features & ARM_SMMU_FEAT_COHERENCY);
> +}
> +
> /* High-level queue accessors */
> -static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
> +static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent,
> + struct arm_smmu_device *smmu)
> {
> memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
> cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
> @@ -358,8 +364,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
> } else {
> cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> }
> - cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
> - cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> + if (smmu_coherent(smmu)) {
> + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
> + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> + } else {
> + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_OSH);
> + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OINC);
> + }
And then please go through your patch and add comments actually
explaining what the DMA is and what memory is being reached by it -
since it is not always very clear from the ARM mnemonics
For instance, this is:
/* DMA for "CMDQ MSI" which targets q->base_dma allocated by arm_smmu_init_one_queue() */
> @@ -1612,11 +1624,18 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
> (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
>
> + if (smmu_coherent(smmu)) {
> + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
> + } else {
> + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_NC) |
> + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_NC) |
> + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_OSH);
> + }
This one is "CD fetch" allocated by arm_smmu_alloc_cd_ptr()
etc
And note that the above will need this hunk too:
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
@@ -432,6 +432,14 @@ size_t arm_smmu_get_viommu_size(struct device *dev,
!(smmu->features & ARM_SMMU_FEAT_S2FWB))
return 0;
+ /*
+ * When running non-coherent we can't suppot S2FWB since it will also
+ * force a coherent CD fetch, aside from the question of what
+ * S2FWB/CANWBS even does with non-coherent SMMUs.
+ */
+ if (!smmu_coherent(smmu))
+ return 0;
> @@ -3746,7 +3765,7 @@ int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
> q->cons_reg = page + cons_off;
> q->ent_dwords = dwords;
>
> - q->q_base = Q_BASE_RWA;
> + q->q_base = smmu_coherent(smmu) ? Q_BASE_RWA : 0;
CMDQ fetch, though do we even need to manage RWA? Isn't it ignored if
IC/OC/SH are set to their non-cachable values?
etc..
Jason
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU
2026-01-02 18:41 ` Jason Gunthorpe
@ 2026-01-04 13:35 ` Dawei Li
2026-01-05 0:02 ` Jason Gunthorpe
0 siblings, 1 reply; 9+ messages in thread
From: Dawei Li @ 2026-01-04 13:35 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: will, robin.murphy, joro, linux-arm-kernel, iommu, linux-kernel,
set_pte_at, stable, dawei.li
Jeson,
On Fri, Jan 02, 2026 at 02:41:13PM -0400, Jason Gunthorpe wrote:
> On Mon, Dec 29, 2025 at 08:23:54AM +0800, Dawei Li wrote:
> > According to SMMUv3 architecture specification, IO-coherent access for
> > SMMU is supported for:
> > - Translation table walks.
> > - Fetches of L1STD, STE, L1CD and CD.
> > - Command queue, Event queue and PRI queue access.
> > - GERROR, CMD_SYNC, Event queue and PRI queue MSIs, if supported
>
> I was recently looking at this too.. IMHO this is not really a clean
> description of what this patch is doing.
>
> I would write this description as:
>
> When the SMMU does a DMA for itself it can set various memory access
> attributes which control how the interconnect should execute the
> DMA. Linux uses these to differentiate DMA that must snoop the cache
> and DMA that must bypass it because Linux has allocated non-coherent
> on the CPU.
>
> In Table "13.8 Attributes for SMMU-originated accesses" each of the
> different types of DMA is categorized and the specific bits
> controlling the memory attribute for the fetch are identified.
Turns out I missed some of DMA types listed in 13.8, thanks for the
headsup.
>
> Make this consisent globally. If Linux has cache flushed the buffer,
> or allocated a DMA incoherenet buffer, then it should set the
> non-caching memory attribute so the DMA matches.
>
> This is important for some of the allocations where Linux is currently
> allocating DMA coherent memory, meaning nothing has made the CPU cache
> coherent and doing any coherent access to that memory may result in
> cache inconsistencies.
>
> This may solve problems in systems where the SMMU driver thinks the
> SMMU is non-coherent, but in fact, the SMMU and the interconnect
> selectively supports coherence and setting the wrong memory attributes
> will cause non-working cached access.
>
> [and then if you have a specific SOC that shows an issue please
> describe the HW]
>
> > +static __always_inline bool smmu_coherent(struct arm_smmu_device *smmu)
> > +{
> > + return !!(smmu->features & ARM_SMMU_FEAT_COHERENCY);
> > +}
> > +
> > /* High-level queue accessors */
> > -static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
> > +static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent,
> > + struct arm_smmu_device *smmu)
> > {
> > memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
> > cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
> > @@ -358,8 +364,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
> > } else {
> > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> > }
> > - cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
> > - cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> > + if (smmu_coherent(smmu)) {
> > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
> > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> > + } else {
> > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_OSH);
> > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OINC);
> > + }
>
> And then please go through your patch and add comments actually
> explaining what the DMA is and what memory is being reached by it -
> since it is not always very clear from the ARM mnemonics
Acked.
>
> For instance, this is:
> /* DMA for "CMDQ MSI" which targets q->base_dma allocated by arm_smmu_init_one_queue() */
>
> > @@ -1612,11 +1624,18 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
> > (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
> >
> > + if (smmu_coherent(smmu)) {
> > + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
> > + } else {
> > + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_NC) |
> > + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_NC) |
> > + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_OSH);
> > + }
>
> This one is "CD fetch" allocated by arm_smmu_alloc_cd_ptr()
>
> etc
>
> And note that the above will need this hunk too:
>
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> @@ -432,6 +432,14 @@ size_t arm_smmu_get_viommu_size(struct device *dev,
> !(smmu->features & ARM_SMMU_FEAT_S2FWB))
> return 0;
>
> + /*
> + * When running non-coherent we can't suppot S2FWB since it will also
> + * force a coherent CD fetch, aside from the question of what
> + * S2FWB/CANWBS even does with non-coherent SMMUs.
> + */
> + if (!smmu_coherent(smmu))
> + return 0;
I was wondering why S2FWB can affect CD fetching before reading 13.8.
Does diff below suffice?
@@ -1614,8 +1619,12 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
{
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
struct arm_smmu_device *smmu = master->smmu;
+ bool coherent, fwb;
u64 val;
+ coherent = smmu_coherent(smmu);
+ fwb = !!(smmu->features & ARM_SMMU_FEAT_S2FWB);
+
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
STRTAB_STE_0_V |
@@ -1624,14 +1633,24 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
+ /*
+ * DMA for "CD fetch" targets cd_table->linear.table or cd_table->l2.l1tab
+ * allocated by arm_smmu_alloc_cd_ptr().
+ */
if (smmu_coherent(smmu)) {
val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
} else {
- val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_NC) |
- FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_NC) |
- FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_OSH);
+ if (!fwb) {
+ val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_NC) |
+ FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_NC) |
+ FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_OSH);
+ } else {
+ dev_warn(&smmu->dev, "Inconsitency between COHACC & S2FWB\n");
+ /* FIX ME */
+ return;
+ }
}
>
> > @@ -3746,7 +3765,7 @@ int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
> > q->cons_reg = page + cons_off;
> > q->ent_dwords = dwords;
> >
> > - q->q_base = Q_BASE_RWA;
> > + q->q_base = smmu_coherent(smmu) ? Q_BASE_RWA : 0;
>
> CMDQ fetch, though do we even need to manage RWA? Isn't it ignored if
> IC/OC/SH are set to their non-cachable values?
My first thought was "why don't we just whack them all?", yet the spec
reads:
"Cache allocation hints are present in each _BASE register and are ignored
unless a cacheable type is used for the table or queue to which the
register corresponds"
You are right, gonna remove it.
>
> etc..
>
> Jason
Thanks,
Dawei
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU
2026-01-04 13:35 ` Dawei Li
@ 2026-01-05 0:02 ` Jason Gunthorpe
0 siblings, 0 replies; 9+ messages in thread
From: Jason Gunthorpe @ 2026-01-05 0:02 UTC (permalink / raw)
To: Dawei Li
Cc: will, robin.murphy, joro, linux-arm-kernel, iommu, linux-kernel,
set_pte_at, stable
> > /* DMA for "CMDQ MSI" which targets q->base_dma allocated by arm_smmu_init_one_queue() */
> >
> > > @@ -1612,11 +1624,18 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
> > > (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > > FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
> > >
> > > + if (smmu_coherent(smmu)) {
> > > + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > > + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > > + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
> > > + } else {
> > > + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_NC) |
> > > + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_NC) |
> > > + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_OSH);
> > > + }
> >
> > This one is "CD fetch" allocated by arm_smmu_alloc_cd_ptr()
> >
> > etc
> >
> > And note that the above will need this hunk too:
> >
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> > @@ -432,6 +432,14 @@ size_t arm_smmu_get_viommu_size(struct device *dev,
> > !(smmu->features & ARM_SMMU_FEAT_S2FWB))
> > return 0;
> >
> > + /*
> > + * When running non-coherent we can't suppot S2FWB since it will also
> > + * force a coherent CD fetch, aside from the question of what
> > + * S2FWB/CANWBS even does with non-coherent SMMUs.
> > + */
> > + if (!smmu_coherent(smmu))
> > + return 0;
>
> I was wondering why S2FWB can affect CD fetching before reading 13.8.
Yeah, that table is super useful
> + if (!fwb) {
> + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_NC) |
> + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_NC) |
> + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_OSH);
> + } else {
> + dev_warn(&smmu->dev, "Inconsitency between COHACC & S2FWB\n");
> + /* FIX ME */
> + return;
I prefer we block it as I showed instead of printing a warning, cache
inconsistencies in virtualization can become security problems.
Though it is probably good to add a comment here about s2fwb also
Jason
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU
2025-12-29 0:23 [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU Dawei Li
2026-01-02 18:41 ` Jason Gunthorpe
@ 2026-01-05 13:33 ` Robin Murphy
2026-01-05 14:53 ` Jason Gunthorpe
2026-01-05 15:46 ` Dawei Li
1 sibling, 2 replies; 9+ messages in thread
From: Robin Murphy @ 2026-01-05 13:33 UTC (permalink / raw)
To: Dawei Li, will, joro
Cc: jgg, linux-arm-kernel, iommu, linux-kernel, set_pte_at, stable
On 2025-12-29 12:23 am, Dawei Li wrote:
> According to SMMUv3 architecture specification, IO-coherent access for
> SMMU is supported for:
> - Translation table walks.
> - Fetches of L1STD, STE, L1CD and CD.
> - Command queue, Event queue and PRI queue access.
> - GERROR, CMD_SYNC, Event queue and PRI queue MSIs, if supported
>
> In other words, if a SMMU implementation doesn't support IO coherent
> access(SMMUIDR0.COHACC==0), all fields above accessed by SMMU is
> expected to be non-coherent and S/W is supposed to maintain proper
> access attributes to achieve expected behavior.
>
> Unfortunately, for non-coherent SMMU implementation, current codebase
> only takes care of translation table walking. Fix it by providing right
> attributes(cacheability, shareablility, cache allocation hints, e.g) to
> other fields.
The assumption is that if the SMMU is not I/O-coherent, then the Normal
Cacheable attribute will inherently degrade to a non-snooping (and thus
effectively Normal Non-Cacheable) one, as that's essentially what AXI
will do in practice, and thus the attribute doesn't actually matter all
that much in terms of functional correctness. If the SMMU _is_ capable
of snooping but is not described as such then frankly firmware is wrong.
If prople have a good reason for wanting to use a coherent SMMU
non-coherently (and/or control of allocation hints), then that should
really be some kind of driver-level option - it would need a bit of
additional DMA API work (which has been low down my to-do list for a few
years now...), but it's perfectly achievable, and I think it's still
preferable to abusing the COHACC override in firmware.
> Fixes: 4f41845b3407 ("iommu/io-pgtable: Replace IO_PGTABLE_QUIRK_NO_DMA with specific flag")
That commit didn't change any behaviour though?
> Fixes: 9e6ea59f3ff3 ("iommu/io-pgtable: Support non-coherent page tables")
And that was a change to io-pgtable which did exactly what it intended
and claimed to do, entirely orthogonal to arm-smmu-v3. (And IIRC it was
really more about platforms using arm-smmu (v2) where the SMMU is
non-coherent but the Outer Cacheable attribute acts as an allocation
hint for a transparent system cache.)
Thanks,
Robin.
> Cc: stable@vger.kernel.org
> Signed-off-by: Dawei Li <dawei.li@linux.dev>
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 73 +++++++++++++++------
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
> 2 files changed, 54 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index d16d35c78c06..4f00bf53d26c 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -265,8 +265,14 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
> return 0;
> }
>
> +static __always_inline bool smmu_coherent(struct arm_smmu_device *smmu)
> +{
> + return !!(smmu->features & ARM_SMMU_FEAT_COHERENCY);
> +}
> +
> /* High-level queue accessors */
> -static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
> +static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent,
> + struct arm_smmu_device *smmu)
> {
> memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
> cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
> @@ -358,8 +364,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
> } else {
> cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> }
> - cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
> - cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> + if (smmu_coherent(smmu)) {
> + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
> + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> + } else {
> + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_OSH);
> + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OINC);
> + }
> break;
> default:
> return -ENOENT;
> @@ -405,7 +416,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> q->ent_dwords * 8;
> }
>
> - arm_smmu_cmdq_build_cmd(cmd, &ent);
> + arm_smmu_cmdq_build_cmd(cmd, &ent, smmu);
> if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
> u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
> }
> @@ -461,7 +472,7 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
> dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
>
> /* Convert the erroneous command into a CMD_SYNC */
> - arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
> + arm_smmu_cmdq_build_cmd(cmd, &cmd_sync, smmu);
> if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
> u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
>
> @@ -913,7 +924,7 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
> {
> u64 cmd[CMDQ_ENT_DWORDS];
>
> - if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
> + if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent, smmu))) {
> dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
> ent->opcode);
> return -EINVAL;
> @@ -965,7 +976,7 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
> }
>
> index = cmds->num * CMDQ_ENT_DWORDS;
> - if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
> + if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd, smmu))) {
> dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
> cmd->opcode);
> return;
> @@ -1603,6 +1614,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
> {
> struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> struct arm_smmu_device *smmu = master->smmu;
> + u64 val;
>
> memset(target, 0, sizeof(*target));
> target->data[0] = cpu_to_le64(
> @@ -1612,11 +1624,18 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
> (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
>
> + if (smmu_coherent(smmu)) {
> + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
> + } else {
> + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_NC) |
> + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_NC) |
> + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_OSH);
> + }
> +
> target->data[1] = cpu_to_le64(
> - FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
> - FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> - FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> - FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
> + FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) | val |
> ((smmu->features & ARM_SMMU_FEAT_STALLS &&
> !master->stall_enabled) ?
> STRTAB_STE_1_S1STALLD :
> @@ -3746,7 +3765,7 @@ int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
> q->cons_reg = page + cons_off;
> q->ent_dwords = dwords;
>
> - q->q_base = Q_BASE_RWA;
> + q->q_base = smmu_coherent(smmu) ? Q_BASE_RWA : 0;
> q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
> q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
>
> @@ -4093,6 +4112,7 @@ static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
> struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
> dma_addr_t dma;
> u32 reg;
> + u64 val;
>
> if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
> reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
> @@ -4107,8 +4127,12 @@ static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
> FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
> dma = cfg->linear.ste_dma;
> }
> - writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
> - smmu->base + ARM_SMMU_STRTAB_BASE);
> +
> + val = dma & STRTAB_BASE_ADDR_MASK;
> + if (smmu_coherent(smmu))
> + val |= STRTAB_BASE_RA;
> +
> + writeq_relaxed(val, smmu->base + ARM_SMMU_STRTAB_BASE);
> writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
> }
>
> @@ -4130,12 +4154,21 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
> return ret;
>
> /* CR1 (table and queue memory attributes) */
> - reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
> - FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
> - FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
> - FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
> - FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
> - FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
> + if (smmu_coherent(smmu)) {
> + reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
> + FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
> + FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
> + FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
> + FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
> + FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
> + } else {
> + reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_OSH) |
> + FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_NC) |
> + FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_NC) |
> + FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_OSH) |
> + FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_NC) |
> + FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_NC);
> + }
> writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
>
> /* CR2 (random crap) */
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index ae23aacc3840..7a5f76e165dc 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -183,6 +183,7 @@ struct arm_vsmmu;
> #define ARM_SMMU_SH_ISH 3
> #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
> #define ARM_SMMU_MEMATTR_OIWB 0xf
> +#define ARM_SMMU_MEMATTR_OINC 0x5
>
> #define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
> #define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU
2026-01-05 13:33 ` Robin Murphy
@ 2026-01-05 14:53 ` Jason Gunthorpe
2026-01-05 16:02 ` Robin Murphy
2026-01-05 15:46 ` Dawei Li
1 sibling, 1 reply; 9+ messages in thread
From: Jason Gunthorpe @ 2026-01-05 14:53 UTC (permalink / raw)
To: Robin Murphy
Cc: Dawei Li, will, joro, linux-arm-kernel, iommu, linux-kernel,
set_pte_at, stable
On Mon, Jan 05, 2026 at 01:33:34PM +0000, Robin Murphy wrote:
> The assumption is that if the SMMU is not I/O-coherent, then the Normal
> Cacheable attribute will inherently degrade to a non-snooping (and thus
> effectively Normal Non-Cacheable) one, as that's essentially what AXI will
> do in practice, and thus the attribute doesn't actually matter all that much
> in terms of functional correctness. If the SMMU _is_ capable of snooping but
> is not described as such then frankly firmware is wrong.
Sadly I am aware of people doing this.. Either a HW bug or some other
weird issue forces the FW to set a non-coherent FW attribute even
though the HW is partially or fully able to process cachable AXI
attributes.
It is reasonable that Linux will set the attributes properly based on
what it is doing. Setting the wrong attributes and expecting the HW to
ignore them seems like a hacky direction.
I didn't see anything in the spec that says COHACC means the memory
attributes are ignored and forced to non-coherent, even though that is
the current assumption of the driver.
> If prople have a good reason for wanting to use a coherent SMMU
> non-coherently (and/or control of allocation hints), then that should really
> be some kind of driver-level option - it would need a bit of additional DMA
> API work (which has been low down my to-do list for a few years now...), but
> it's perfectly achievable, and I think it's still preferable to abusing the
> COHACC override in firmware.
IMHO, this is a different topic, and something that will probably
become interesting this year. I'm aware of some HW/drivers that needs
optional non-coherent mappings for isochronous flows - but it is not
the DMA API that is the main issue but the page table walks :\
Jason
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU
2026-01-05 13:33 ` Robin Murphy
2026-01-05 14:53 ` Jason Gunthorpe
@ 2026-01-05 15:46 ` Dawei Li
1 sibling, 0 replies; 9+ messages in thread
From: Dawei Li @ 2026-01-05 15:46 UTC (permalink / raw)
To: Robin Murphy
Cc: will, joro, jgg, linux-arm-kernel, iommu, linux-kernel,
set_pte_at, stable, dawei.li
Robin,
Thanks for the review.
On Mon, Jan 05, 2026 at 01:33:34PM +0000, Robin Murphy wrote:
> On 2025-12-29 12:23 am, Dawei Li wrote:
> > According to SMMUv3 architecture specification, IO-coherent access for
> > SMMU is supported for:
> > - Translation table walks.
> > - Fetches of L1STD, STE, L1CD and CD.
> > - Command queue, Event queue and PRI queue access.
> > - GERROR, CMD_SYNC, Event queue and PRI queue MSIs, if supported
> >
> > In other words, if a SMMU implementation doesn't support IO coherent
> > access(SMMUIDR0.COHACC==0), all fields above accessed by SMMU is
> > expected to be non-coherent and S/W is supposed to maintain proper
> > access attributes to achieve expected behavior.
> >
> > Unfortunately, for non-coherent SMMU implementation, current codebase
> > only takes care of translation table walking. Fix it by providing right
> > attributes(cacheability, shareablility, cache allocation hints, e.g) to
> > other fields.
>
> The assumption is that if the SMMU is not I/O-coherent, then the Normal
> Cacheable attribute will inherently degrade to a non-snooping (and thus
> effectively Normal Non-Cacheable) one, as that's essentially what AXI will
> do in practice, and thus the attribute doesn't actually matter all that much
Even to a system with non-coherent interconnect and system cache?
Per my understanding, the ultimate request routing(axcache) depends on:
- AXI master who initiates the request.
- Interconnect who routes the request to downstream(Routing rules of
course).
- Anything downstream to memory, cache included.
- Access attributes defined by S/W(prot of Page Table, all the
attributes for SMMU in this case).
Not every SoC is designed with full-coherent interconnects. And final
axcache of master and routing rules are implementation defined.
And smmu-v3 driver, just as what its name is suggesting, is developed on
the basis of SMMU v3 spec, independent of any specific implementation.
> in terms of functional correctness. If the SMMU _is_ capable of snooping but
> is not described as such then frankly firmware is wrong.
>
> If prople have a good reason for wanting to use a coherent SMMU
> non-coherently (and/or control of allocation hints), then that should really
> be some kind of driver-level option - it would need a bit of additional DMA
> API work (which has been low down my to-do list for a few years now...), but
> it's perfectly achievable, and I think it's still preferable to abusing the
> COHACC override in firmware.
>
> > Fixes: 4f41845b3407 ("iommu/io-pgtable: Replace IO_PGTABLE_QUIRK_NO_DMA with specific flag")
>
> That commit didn't change any behaviour though?
>
> > Fixes: 9e6ea59f3ff3 ("iommu/io-pgtable: Support non-coherent page tables")
>
> And that was a change to io-pgtable which did exactly what it intended and
> claimed to do, entirely orthogonal to arm-smmu-v3. (And IIRC it was really
> more about platforms using arm-smmu (v2) where the SMMU is non-coherent but
> the Outer Cacheable attribute acts as an allocation hint for a transparent
> system cache.)
>
> Thanks,
> Robin.
>
Thanks,
Dawei
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Dawei Li <dawei.li@linux.dev>
> > ---
> > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 73 +++++++++++++++------
> > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
> > 2 files changed, 54 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index d16d35c78c06..4f00bf53d26c 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -265,8 +265,14 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
> > return 0;
> > }
> > +static __always_inline bool smmu_coherent(struct arm_smmu_device *smmu)
> > +{
> > + return !!(smmu->features & ARM_SMMU_FEAT_COHERENCY);
> > +}
> > +
> > /* High-level queue accessors */
> > -static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
> > +static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent,
> > + struct arm_smmu_device *smmu)
> > {
> > memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
> > cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
> > @@ -358,8 +364,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
> > } else {
> > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> > }
> > - cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
> > - cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> > + if (smmu_coherent(smmu)) {
> > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
> > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> > + } else {
> > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_OSH);
> > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OINC);
> > + }
> > break;
> > default:
> > return -ENOENT;
> > @@ -405,7 +416,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> > q->ent_dwords * 8;
> > }
> > - arm_smmu_cmdq_build_cmd(cmd, &ent);
> > + arm_smmu_cmdq_build_cmd(cmd, &ent, smmu);
> > if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
> > u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
> > }
> > @@ -461,7 +472,7 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
> > dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
> > /* Convert the erroneous command into a CMD_SYNC */
> > - arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
> > + arm_smmu_cmdq_build_cmd(cmd, &cmd_sync, smmu);
> > if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
> > u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
> > @@ -913,7 +924,7 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
> > {
> > u64 cmd[CMDQ_ENT_DWORDS];
> > - if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
> > + if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent, smmu))) {
> > dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
> > ent->opcode);
> > return -EINVAL;
> > @@ -965,7 +976,7 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
> > }
> > index = cmds->num * CMDQ_ENT_DWORDS;
> > - if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
> > + if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd, smmu))) {
> > dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
> > cmd->opcode);
> > return;
> > @@ -1603,6 +1614,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
> > {
> > struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> > struct arm_smmu_device *smmu = master->smmu;
> > + u64 val;
> > memset(target, 0, sizeof(*target));
> > target->data[0] = cpu_to_le64(
> > @@ -1612,11 +1624,18 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
> > (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
> > + if (smmu_coherent(smmu)) {
> > + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
> > + } else {
> > + val = FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_NC) |
> > + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_NC) |
> > + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_OSH);
> > + }
> > +
> > target->data[1] = cpu_to_le64(
> > - FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
> > - FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > - FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > - FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
> > + FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) | val |
> > ((smmu->features & ARM_SMMU_FEAT_STALLS &&
> > !master->stall_enabled) ?
> > STRTAB_STE_1_S1STALLD :
> > @@ -3746,7 +3765,7 @@ int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
> > q->cons_reg = page + cons_off;
> > q->ent_dwords = dwords;
> > - q->q_base = Q_BASE_RWA;
> > + q->q_base = smmu_coherent(smmu) ? Q_BASE_RWA : 0;
> > q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
> > q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
> > @@ -4093,6 +4112,7 @@ static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
> > struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
> > dma_addr_t dma;
> > u32 reg;
> > + u64 val;
> > if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
> > reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
> > @@ -4107,8 +4127,12 @@ static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
> > FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
> > dma = cfg->linear.ste_dma;
> > }
> > - writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
> > - smmu->base + ARM_SMMU_STRTAB_BASE);
> > +
> > + val = dma & STRTAB_BASE_ADDR_MASK;
> > + if (smmu_coherent(smmu))
> > + val |= STRTAB_BASE_RA;
> > +
> > + writeq_relaxed(val, smmu->base + ARM_SMMU_STRTAB_BASE);
> > writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
> > }
> > @@ -4130,12 +4154,21 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
> > return ret;
> > /* CR1 (table and queue memory attributes) */
> > - reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
> > - FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
> > - FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
> > - FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
> > - FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
> > - FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
> > + if (smmu_coherent(smmu)) {
> > + reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
> > + FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
> > + FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
> > + FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
> > + FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
> > + FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
> > + } else {
> > + reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_OSH) |
> > + FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_NC) |
> > + FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_NC) |
> > + FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_OSH) |
> > + FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_NC) |
> > + FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_NC);
> > + }
> > writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
> > /* CR2 (random crap) */
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > index ae23aacc3840..7a5f76e165dc 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > @@ -183,6 +183,7 @@ struct arm_vsmmu;
> > #define ARM_SMMU_SH_ISH 3
> > #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
> > #define ARM_SMMU_MEMATTR_OIWB 0xf
> > +#define ARM_SMMU_MEMATTR_OINC 0x5
> > #define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
> > #define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU
2026-01-05 14:53 ` Jason Gunthorpe
@ 2026-01-05 16:02 ` Robin Murphy
2026-01-05 18:54 ` Jason Gunthorpe
0 siblings, 1 reply; 9+ messages in thread
From: Robin Murphy @ 2026-01-05 16:02 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Dawei Li, will, joro, linux-arm-kernel, iommu, linux-kernel,
set_pte_at, stable
On 2026-01-05 2:53 pm, Jason Gunthorpe wrote:
> On Mon, Jan 05, 2026 at 01:33:34PM +0000, Robin Murphy wrote:
>> The assumption is that if the SMMU is not I/O-coherent, then the Normal
>> Cacheable attribute will inherently degrade to a non-snooping (and thus
>> effectively Normal Non-Cacheable) one, as that's essentially what AXI will
>> do in practice, and thus the attribute doesn't actually matter all that much
>> in terms of functional correctness. If the SMMU _is_ capable of snooping but
>> is not described as such then frankly firmware is wrong.
>
> Sadly I am aware of people doing this.. Either a HW bug or some other
> weird issue forces the FW to set a non-coherent FW attribute even
> though the HW is partially or fully able to process cachable AXI
> attributes.
What I've seen more often is that firmware authors ignore (or don't even
realise) I/O-coherency, then someone ends up trying to "fix" Linux with
wacky DMA API patches when things start going wrong on mis-described
hardware... Yes, they might happen to get away with it with stuff like
SMMUs or Mali GPUs where a "well-behaved" driver might have control of
the source attributes, but where AxCACHE/AxDOMAIN are hard-wired there's
really no option other than to describe the hardware correctly, hence
that should always be the primary consideration.
And as I say, if there *is* some reason to specifically avoid using
certain attributes, then deliberately describing the hardware
incorrectly, in the hope that OS drivers might be "well-behaved" in a
way that just happens to lead to them not using those attributes, is
really not a robust workaround anyway.
> It is reasonable that Linux will set the attributes properly based on
> what it is doing. Setting the wrong attributes and expecting the HW to
> ignore them seems like a hacky direction.
Oh, I'm not saying that we *shouldn't* set our attributes more exactly -
this would still be needed for doing things the "right" way too - I just
want to be very clear on the reasons why. The current assumption is not
a bug per se, and although it's indeed not 100% robust, the cases where
it doesn't hold are more often than not for the wrong reason. Therefore
I would say doing this purely for the sake of working around bad
firmware - and especially errata - is just as hacky if not more so.
> I didn't see anything in the spec that says COHACC means the memory
> attributes are ignored and forced to non-coherent, even though that is
> the current assumption of the driver.
It kinda works the other way round: COHACC==1 says that the Cacheability
and Shareability attributes *can* be configured to snoop CPU caches
(although do not have to be); therefore the "If a system does not
support IO-coherent access from the SMMU, SMMU_IDR0.COHACC must be 0"
case implies that the SMMU is incapable of snooping regardless of how
those attributes are set, thus must at least be in a different Inner
Shareability domain from the CPUs, at which point the Cacheability
domains probably aren't too meaningful either. I would consider it would
be unexpectedly incorrect behaviour for an SMMU reporting COHACC==0 to
actually be capable of snooping.
>> If prople have a good reason for wanting to use a coherent SMMU
>> non-coherently (and/or control of allocation hints), then that should really
>> be some kind of driver-level option - it would need a bit of additional DMA
>> API work (which has been low down my to-do list for a few years now...), but
>> it's perfectly achievable, and I think it's still preferable to abusing the
>> COHACC override in firmware.
>
> IMHO, this is a different topic, and something that will probably
> become interesting this year. I'm aware of some HW/drivers that needs
> optional non-coherent mappings for isochronous flows - but it is not
> the DMA API that is the main issue but the page table walks :\
Hmm, yeah, potentially configuring PTW attributes for a DMA domain is
something I hadn't even thought about - the DMA API aspect I mean is
that in general we need some sort of DMA_ATTR_NO_SNOOP when
mapping/allocating such isochronous buffers/pagetables etc., to make the
DMA layer still do the cache maintenance/non-cacheable remaps despite
dev_is_dma_coherent() being true.
Thanks,
Robin.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU
2026-01-05 16:02 ` Robin Murphy
@ 2026-01-05 18:54 ` Jason Gunthorpe
0 siblings, 0 replies; 9+ messages in thread
From: Jason Gunthorpe @ 2026-01-05 18:54 UTC (permalink / raw)
To: Robin Murphy
Cc: Dawei Li, will, joro, linux-arm-kernel, iommu, linux-kernel,
set_pte_at, stable
On Mon, Jan 05, 2026 at 04:02:34PM +0000, Robin Murphy wrote:
> > It is reasonable that Linux will set the attributes properly based on
> > what it is doing. Setting the wrong attributes and expecting the HW to
> > ignore them seems like a hacky direction.
>
> Oh, I'm not saying that we *shouldn't* set our attributes more exactly -
> this would still be needed for doing things the "right" way too - I just
> want to be very clear on the reasons why.
At least I know of HW where the SMMU fetches covered by COHACC:
* Translation table walks.
* Fetches of L1STD, STE, L1CD and CD.
* Command queue, Event queue and PRI queue access.
* GERROR, CMD_SYNC, Event queue and PRI queue MSIs, if supported.
Have a mixture of coherency support. The SOC has multiple fabrics, one
non-coherent one specifically for isochronous traffic. In HW some
SMMU sub-units (like the table walk) been wired to the isochronous
fabric, while others are using the normal coherent fabric.
So when it comes to this statement:
If either the SMMU or system cannot *fully* support IO-coherent
access to SMMU structures/queues/translations, this reads as 0.
The HW is "partially" IO-coherent, so COHACC is 0.
It has been lucky that so far the incorrect attributes haven't caused a
problem, but the next spin of this HW may have issue here. I'd like to
see it fixed.
> bug per se, and although it's indeed not 100% robust, the cases where it
> doesn't hold are more often than not for the wrong reason. Therefore I would
> say doing this purely for the sake of working around bad firmware - and
> especially errata - is just as hacky if not more so.
Yeah, maybe, I am also curious what is motivating Dawei to do this work..
> the DMA API aspect I mean is that in
> general we need some sort of DMA_ATTR_NO_SNOOP when mapping/allocating such
> isochronous buffers/pagetables etc., to make the DMA layer still do the
> cache maintenance/non-cacheable remaps despite dev_is_dma_coherent() being
> true.
I have a feeling this missing support underlies some of the reasons
why FW might lie and set COHACC=0 as it resolves "how does the GPU
driver cache flush things" with no effort..
Jason
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2026-01-05 18:54 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-29 0:23 [PATCH] iommu/arm-smmu-v3: Maintain valid access attributes for non-coherent SMMU Dawei Li
2026-01-02 18:41 ` Jason Gunthorpe
2026-01-04 13:35 ` Dawei Li
2026-01-05 0:02 ` Jason Gunthorpe
2026-01-05 13:33 ` Robin Murphy
2026-01-05 14:53 ` Jason Gunthorpe
2026-01-05 16:02 ` Robin Murphy
2026-01-05 18:54 ` Jason Gunthorpe
2026-01-05 15:46 ` Dawei Li
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).