* [PATCH 1/5] iommupt: Add the RISC-V page table format
2025-11-04 19:00 [PATCH 0/5] Convert riscv to use the generic iommu page table Jason Gunthorpe
@ 2025-11-04 19:00 ` Jason Gunthorpe
2025-11-04 23:17 ` Emil Renner Berthing
2025-11-04 19:00 ` [PATCH 2/5] iommu/riscv: Disable SADE Jason Gunthorpe
` (3 subsequent siblings)
4 siblings, 1 reply; 8+ messages in thread
From: Jason Gunthorpe @ 2025-11-04 19:00 UTC (permalink / raw)
To: Alexandre Ghiti, Albert Ou, iommu, Joerg Roedel, linux-riscv,
Palmer Dabbelt, Paul Walmsley, Robin Murphy, Tomasz Jeznach,
Will Deacon
Cc: lihangjing, Xu Lu, patches, xieyongji
The RISC-V format is a fairly simple 5 level page table not unlike the x86
one. It has optional support for a single contiguous page size of 64k (16
x 4k).
The specification describes a 32-bit format, the general code can support
it via a #define but the iommu side implementation has been left off until
a user comes.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/generic_pt/.kunitconfig | 1 +
drivers/iommu/generic_pt/Kconfig | 11 +
drivers/iommu/generic_pt/fmt/Makefile | 2 +
drivers/iommu/generic_pt/fmt/defs_riscv.h | 29 ++
drivers/iommu/generic_pt/fmt/iommu_riscv64.c | 11 +
drivers/iommu/generic_pt/fmt/riscv.h | 313 +++++++++++++++++++
include/linux/generic_pt/common.h | 15 +
include/linux/generic_pt/iommu.h | 11 +
8 files changed, 393 insertions(+)
create mode 100644 drivers/iommu/generic_pt/fmt/defs_riscv.h
create mode 100644 drivers/iommu/generic_pt/fmt/iommu_riscv64.c
create mode 100644 drivers/iommu/generic_pt/fmt/riscv.h
diff --git a/drivers/iommu/generic_pt/.kunitconfig b/drivers/iommu/generic_pt/.kunitconfig
index 2016c5e5ac0fe9..5265d884e79cea 100644
--- a/drivers/iommu/generic_pt/.kunitconfig
+++ b/drivers/iommu/generic_pt/.kunitconfig
@@ -3,6 +3,7 @@ CONFIG_GENERIC_PT=y
CONFIG_DEBUG_GENERIC_PT=y
CONFIG_IOMMU_PT=y
CONFIG_IOMMU_PT_AMDV1=y
+CONFIG_IOMMU_PT_RISCV64=y
CONFIG_IOMMU_PT_X86_64=y
CONFIG_IOMMU_PT_KUNIT_TEST=y
diff --git a/drivers/iommu/generic_pt/Kconfig b/drivers/iommu/generic_pt/Kconfig
index 6dcb771b3c582a..dd12699fcbc9c8 100644
--- a/drivers/iommu/generic_pt/Kconfig
+++ b/drivers/iommu/generic_pt/Kconfig
@@ -42,6 +42,16 @@ config IOMMU_PT_AMDV1
Selected automatically by an IOMMU driver that uses this format.
+config IOMMU_PT_RISCV64
+ tristate "IOMMU page table for RISC-V 64 bit Sv57/Sv48/Sv39"
+ depends on !GENERIC_ATOMIC64 # for cmpxchg64
+ help
+ iommu_domain implementation for RISC-V 64 bit 3/4/5 level page table.
+ It supports 4K/2M/1G/512G/256T page sizes and can decode a sign
+ extended portion of the 64 bit IOVA space.
+
+ Selected automatically by an IOMMU driver that uses this format.
+
config IOMMU_PT_X86_64
tristate "IOMMU page table for x86 64-bit, 4/5 levels"
depends on !GENERIC_ATOMIC64 # for cmpxchg64
@@ -56,6 +66,7 @@ config IOMMU_PT_KUNIT_TEST
tristate "IOMMU Page Table KUnit Test" if !KUNIT_ALL_TESTS
depends on KUNIT
depends on IOMMU_PT_AMDV1 || !IOMMU_PT_AMDV1
+ depends on IOMMU_PT_RISCV64 || !IOMMU_PT_RISCV64
depends on IOMMU_PT_X86_64 || !IOMMU_PT_X86_64
default KUNIT_ALL_TESTS
help
diff --git a/drivers/iommu/generic_pt/fmt/Makefile b/drivers/iommu/generic_pt/fmt/Makefile
index 5a3379107999f5..9c0edc4d5396b3 100644
--- a/drivers/iommu/generic_pt/fmt/Makefile
+++ b/drivers/iommu/generic_pt/fmt/Makefile
@@ -3,6 +3,8 @@
iommu_pt_fmt-$(CONFIG_IOMMU_PT_AMDV1) += amdv1
iommu_pt_fmt-$(CONFIG_IOMMUFD_TEST) += mock
+iommu_pt_fmt-$(CONFIG_IOMMU_PT_RISCV64) += riscv64
+
iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86_64) += x86_64
IOMMU_PT_KUNIT_TEST :=
diff --git a/drivers/iommu/generic_pt/fmt/defs_riscv.h b/drivers/iommu/generic_pt/fmt/defs_riscv.h
new file mode 100644
index 00000000000000..cf67474d5ebaeb
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/defs_riscv.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
+ *
+ */
+#ifndef __GENERIC_PT_FMT_DEFS_RISCV_H
+#define __GENERIC_PT_FMT_DEFS_RISCV_H
+
+#include <linux/generic_pt/common.h>
+#include <linux/types.h>
+
+#ifdef PT_RISCV_32BIT
+typedef u32 pt_riscv_entry_t;
+#define riscvpt_write_attrs riscv32pt_write_attrs
+#else
+typedef u64 pt_riscv_entry_t;
+#define riscvpt_write_attrs riscv64pt_write_attrs
+#endif
+
+typedef pt_riscv_entry_t pt_vaddr_t;
+typedef u64 pt_oaddr_t;
+
+struct riscvpt_write_attrs {
+ pt_riscv_entry_t descriptor_bits;
+ gfp_t gfp;
+};
+#define pt_write_attrs riscvpt_write_attrs
+
+#endif
diff --git a/drivers/iommu/generic_pt/fmt/iommu_riscv64.c b/drivers/iommu/generic_pt/fmt/iommu_riscv64.c
new file mode 100644
index 00000000000000..a39fc3cca773d2
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/iommu_riscv64.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
+ */
+#define PT_FMT riscv
+#define PT_FMT_VARIANT 64
+#define PT_SUPPORTED_FEATURES \
+ (BIT(PT_FEAT_SIGN_EXTEND) | BIT(PT_FEAT_FLUSH_RANGE) | \
+ BIT(PT_FEAT_RSICV_SVNAPOT_64K))
+
+#include "iommu_template.h"
diff --git a/drivers/iommu/generic_pt/fmt/riscv.h b/drivers/iommu/generic_pt/fmt/riscv.h
new file mode 100644
index 00000000000000..1cf9082e2a4131
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/riscv.h
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
+ *
+ * RISC-V page table
+ *
+ * This is described in Sections:
+ * 12.3. Sv32: Page-Based 32-bit Virtual-Memory Systems
+ * 12.4. Sv39: Page-Based 39-bit Virtual-Memory System
+ * 12.5. Sv48: Page-Based 48-bit Virtual-Memory System
+ * 12.6. Sv57: Page-Based 57-bit Virtual-Memory System
+ * of the "The RISC-V Instruction Set Manual: Volume II"
+ *
+ * This includes the contiguous page extension from:
+ * Chapter 13. "Svnapot" Extension for NAPOT Translation Contiguity,
+ * Version 1.0
+ *
+ * The table format is sign extended and supports leafs in every level. The spec
+ * doesn't talk a lot about levels, but level here is the same as i=LEVELS-1 in
+ * the spec.
+ */
+#ifndef __GENERIC_PT_FMT_RISCV_H
+#define __GENERIC_PT_FMT_RISCV_H
+
+#include "defs_riscv.h"
+#include "../pt_defs.h"
+
+#include <linux/bitfield.h>
+#include <linux/container_of.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+enum {
+ PT_ITEM_WORD_SIZE = sizeof(pt_riscv_entry_t),
+#ifdef PT_RISCV_32BIT
+ PT_MAX_VA_ADDRESS_LG2 = 32,
+ PT_MAX_OUTPUT_ADDRESS_LG2 = 34,
+ PT_MAX_TOP_LEVEL = 1,
+#else
+ PT_MAX_VA_ADDRESS_LG2 = 57,
+ PT_MAX_OUTPUT_ADDRESS_LG2 = 56,
+ PT_MAX_TOP_LEVEL = 4,
+#endif
+ PT_GRANULE_LG2SZ = 12,
+ PT_TABLEMEM_LG2SZ = 12,
+
+ /* fsc.PPN is 44 bits wide, all PPNs are 4k aligned */
+ PT_TOP_PHYS_MASK = GENMASK_ULL(55, 12),
+};
+
+/* PTE bits */
+enum {
+ RISCVPT_V = BIT(0),
+ RISCVPT_R = BIT(1),
+ RISCVPT_W = BIT(2),
+ RISCVPT_X = BIT(3),
+ RISCVPT_U = BIT(4),
+ RISCVPT_G = BIT(5),
+ RISCVPT_A = BIT(6),
+ RISCVPT_D = BIT(7),
+ RISCVPT_RSW = GENMASK(9, 8),
+ RISCVPT_PPN32 = GENMASK(31, 10),
+
+ RISCVPT_PPN64 = GENMASK_ULL(53, 10),
+ RISCVPT_PPN64_64K = GENMASK_ULL(53, 14),
+ RISCVPT_PBMT = GENMASK_ULL(62, 61),
+ RISCVPT_N = BIT_ULL(63),
+
+ /* Svnapot encodings for ppn[0] */
+ RISCVPT_PPN64_64K_SZ = BIT(13),
+};
+
+#ifdef PT_RISCV_32BIT
+#define RISCVPT_PPN RISCVPT_PPN32
+#define pt_riscv pt_riscv_32
+#else
+#define RISCVPT_PPN RISCVPT_PPN64
+#define pt_riscv pt_riscv_64
+#endif
+
+#define common_to_riscvpt(common_ptr) \
+ container_of_const(common_ptr, struct pt_riscv, common)
+#define to_riscvpt(pts) common_to_riscvpt((pts)->range->common)
+
+static inline pt_oaddr_t riscvpt_table_pa(const struct pt_state *pts)
+{
+ return oalog2_mul(FIELD_GET(RISCVPT_PPN, pts->entry), PT_GRANULE_LG2SZ);
+}
+#define pt_table_pa riscvpt_table_pa
+
+static inline pt_oaddr_t riscvpt_entry_oa(const struct pt_state *pts)
+{
+ if (pts_feature(pts, PT_FEAT_RSICV_SVNAPOT_64K) &&
+ pts->entry & RISCVPT_N) {
+ PT_WARN_ON(pts->level != 0);
+ return oalog2_mul(FIELD_GET(RISCVPT_PPN64_64K, pts->entry),
+ ilog2(SZ_64K));
+ }
+ return oalog2_mul(FIELD_GET(RISCVPT_PPN, pts->entry), PT_GRANULE_LG2SZ);
+}
+#define pt_entry_oa riscvpt_entry_oa
+
+static inline bool riscvpt_can_have_leaf(const struct pt_state *pts)
+{
+ return true;
+}
+#define pt_can_have_leaf riscvpt_can_have_leaf
+
+/* Body in pt_fmt_defaults.h */
+static inline unsigned int pt_table_item_lg2sz(const struct pt_state *pts);
+
+static inline unsigned int
+riscvpt_entry_num_contig_lg2(const struct pt_state *pts)
+{
+ if (PT_SUPPORTED_FEATURE(PT_FEAT_RSICV_SVNAPOT_64K) &&
+ pts->entry & RISCVPT_N) {
+ PT_WARN_ON(!pts_feature(pts, PT_FEAT_RSICV_SVNAPOT_64K));
+ PT_WARN_ON(pts->level);
+ return ilog2(16);
+ }
+ return ilog2(1);
+}
+#define pt_entry_num_contig_lg2 riscvpt_entry_num_contig_lg2
+
+static inline unsigned int riscvpt_num_items_lg2(const struct pt_state *pts)
+{
+ return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
+}
+#define pt_num_items_lg2 riscvpt_num_items_lg2
+
+static inline unsigned short
+riscvpt_contig_count_lg2(const struct pt_state *pts)
+{
+ if (pts->level == 0 && pts_feature(pts, PT_FEAT_RSICV_SVNAPOT_64K))
+ return ilog2(16);
+ return ilog2(1);
+}
+#define pt_contig_count_lg2 riscvpt_contig_count_lg2
+
+static inline enum pt_entry_type riscvpt_load_entry_raw(struct pt_state *pts)
+{
+ const pt_riscv_entry_t *tablep = pt_cur_table(pts, pt_riscv_entry_t);
+ pt_riscv_entry_t entry;
+
+ pts->entry = entry = READ_ONCE(tablep[pts->index]);
+ if (!(entry & RISCVPT_V))
+ return PT_ENTRY_EMPTY;
+ if (pts->level == 0 ||
+ ((entry & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) != 0))
+ return PT_ENTRY_OA;
+ return PT_ENTRY_TABLE;
+}
+#define pt_load_entry_raw riscvpt_load_entry_raw
+
+static inline void
+riscvpt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
+ unsigned int oasz_lg2,
+ const struct pt_write_attrs *attrs)
+{
+ pt_riscv_entry_t *tablep = pt_cur_table(pts, pt_riscv_entry_t);
+ pt_riscv_entry_t entry;
+
+ if (!pt_check_install_leaf_args(pts, oa, oasz_lg2))
+ return;
+
+ entry = RISCVPT_V |
+ FIELD_PREP(RISCVPT_PPN, log2_div(oa, PT_GRANULE_LG2SZ)) |
+ attrs->descriptor_bits;
+
+ if (pts_feature(pts, PT_FEAT_RSICV_SVNAPOT_64K) && pts->level == 0 &&
+ oasz_lg2 != PT_GRANULE_LG2SZ) {
+ u64 *end;
+
+ entry |= RISCVPT_N | RISCVPT_PPN64_64K_SZ;
+ tablep += pts->index;
+ end = tablep + log2_div(SZ_64K, PT_GRANULE_LG2SZ);
+ for (; tablep != end; tablep++)
+ WRITE_ONCE(*tablep, entry);
+ } else {
+ /* FIXME does riscv need this to be cmpxchg? */
+ WRITE_ONCE(tablep[pts->index], entry);
+ }
+ pts->entry = entry;
+}
+#define pt_install_leaf_entry riscvpt_install_leaf_entry
+
+static inline bool riscvpt_install_table(struct pt_state *pts,
+ pt_oaddr_t table_pa,
+ const struct pt_write_attrs *attrs)
+{
+ pt_riscv_entry_t entry;
+
+ entry = RISCVPT_V |
+ FIELD_PREP(RISCVPT_PPN, log2_div(table_pa, PT_GRANULE_LG2SZ));
+ return pt_table_install64(pts, entry);
+}
+#define pt_install_table riscvpt_install_table
+
+static inline void riscvpt_attr_from_entry(const struct pt_state *pts,
+ struct pt_write_attrs *attrs)
+{
+ attrs->descriptor_bits =
+ pts->entry & (RISCVPT_R | RISCVPT_W | RISCVPT_X | RISCVPT_U |
+ RISCVPT_G | RISCVPT_A | RISCVPT_D);
+}
+#define pt_attr_from_entry riscvpt_attr_from_entry
+
+/* --- iommu */
+#include <linux/generic_pt/iommu.h>
+#include <linux/iommu.h>
+
+#define pt_iommu_table pt_iommu_riscv_64
+
+/* The common struct is in the per-format common struct */
+static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
+{
+ return &container_of(iommu_table, struct pt_iommu_table, iommu)
+ ->riscv_64pt.common;
+}
+
+static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
+{
+ return &container_of(common, struct pt_iommu_table, riscv_64pt.common)
+ ->iommu;
+}
+
+static inline int riscvpt_iommu_set_prot(struct pt_common *common,
+ struct pt_write_attrs *attrs,
+ unsigned int iommu_prot)
+{
+ u64 pte;
+
+ pte = RISCVPT_A | RISCVPT_U;
+ if (iommu_prot & IOMMU_WRITE)
+ pte |= RISCVPT_W | RISCVPT_R | RISCVPT_D;
+ if (iommu_prot & IOMMU_READ)
+ pte |= RISCVPT_R;
+ if (!(iommu_prot & IOMMU_NOEXEC))
+ pte |= RISCVPT_X;
+
+ /* Caller must specify a supported combination of flags */
+ if (unlikely((pte & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) == 0))
+ return -EOPNOTSUPP;
+
+ attrs->descriptor_bits = pte;
+ return 0;
+}
+#define pt_iommu_set_prot riscvpt_iommu_set_prot
+
+static inline int
+riscvpt_iommu_fmt_init(struct pt_iommu_riscv_64 *iommu_table,
+ const struct pt_iommu_riscv_64_cfg *cfg)
+{
+ struct pt_riscv *table = &iommu_table->riscv_64pt;
+
+ switch (cfg->common.hw_max_vasz_lg2) {
+ case 39:
+ pt_top_set_level(&table->common, 2);
+ break;
+ case 48:
+ pt_top_set_level(&table->common, 3);
+ break;
+ case 57:
+ pt_top_set_level(&table->common, 4);
+ break;
+ default:
+ return -EINVAL;
+ }
+ table->common.max_oasz_lg2 =
+ min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2);
+ return 0;
+}
+#define pt_iommu_fmt_init riscvpt_iommu_fmt_init
+
+static inline void
+riscvpt_iommu_fmt_hw_info(struct pt_iommu_riscv_64 *table,
+ const struct pt_range *top_range,
+ struct pt_iommu_riscv_64_hw_info *info)
+{
+ phys_addr_t top_phys = virt_to_phys(top_range->top_table);
+
+ info->ppn = oalog2_div(top_phys, PT_GRANULE_LG2SZ);
+ PT_WARN_ON(top_phys & ~PT_TOP_PHYS_MASK);
+
+ /*
+ * See Table 3. Encodings of iosatp.MODE field" for DC.tx.SXL = 0:
+ * 8 = Sv39 = top level 2
+ * 9 = Sv38 = top level 3
+ * 10 = Sv57 = top level 4
+ */
+ info->fsc_iosatp_mode = top_range->top_level + 6;
+}
+#define pt_iommu_fmt_hw_info riscvpt_iommu_fmt_hw_info
+
+#if defined(GENERIC_PT_KUNIT)
+static const struct pt_iommu_riscv_64_cfg riscv_64_kunit_fmt_cfgs[] = {
+ [0] = { .common.features = BIT(PT_FEAT_RSICV_SVNAPOT_64K),
+ .common.hw_max_oasz_lg2 = 56,
+ .common.hw_max_vasz_lg2 = 39 },
+ [1] = { .common.features = 0,
+ .common.hw_max_oasz_lg2 = 56,
+ .common.hw_max_vasz_lg2 = 48 },
+ [2] = { .common.features = BIT(PT_FEAT_RSICV_SVNAPOT_64K),
+ .common.hw_max_oasz_lg2 = 56,
+ .common.hw_max_vasz_lg2 = 57 },
+};
+#define kunit_fmt_cfgs riscv_64_kunit_fmt_cfgs
+enum {
+ KUNIT_FMT_FEATURES = BIT(PT_FEAT_RSICV_SVNAPOT_64K),
+};
+#endif
+
+#endif
diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
index 96f8a6a7d60e10..10b8250659b98b 100644
--- a/include/linux/generic_pt/common.h
+++ b/include/linux/generic_pt/common.h
@@ -151,6 +151,21 @@ enum {
PT_FEAT_AMDV1_FORCE_COHERENCE,
};
+struct pt_riscv_32 {
+ struct pt_common common;
+};
+
+struct pt_riscv_64 {
+ struct pt_common common;
+};
+
+enum {
+ /*
+ * Support the 64k contiguous page size following the Svnapot extension.
+ */
+ PT_FEAT_RSICV_SVNAPOT_64K = PT_FEAT_FMT_START,
+};
+
struct pt_x86_64 {
struct pt_common common;
};
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index fde7ccf007c50c..afe6e581f12f96 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -255,6 +255,17 @@ IOMMU_FORMAT(amdv1, amdpt);
struct pt_iommu_amdv1_mock_hw_info;
IOMMU_PROTOTYPES(amdv1_mock);
+struct pt_iommu_riscv_64_cfg {
+ struct pt_iommu_cfg common;
+};
+
+struct pt_iommu_riscv_64_hw_info {
+ u64 ppn;
+ u8 fsc_iosatp_mode;
+};
+
+IOMMU_FORMAT(riscv_64, riscv_64pt);
+
struct pt_iommu_x86_64_cfg {
struct pt_iommu_cfg common;
};
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH 1/5] iommupt: Add the RISC-V page table format
2025-11-04 19:00 ` [PATCH 1/5] iommupt: Add the RISC-V page table format Jason Gunthorpe
@ 2025-11-04 23:17 ` Emil Renner Berthing
2025-11-06 0:24 ` Jason Gunthorpe
0 siblings, 1 reply; 8+ messages in thread
From: Emil Renner Berthing @ 2025-11-04 23:17 UTC (permalink / raw)
To: Albert Ou, Alexandre Ghiti, Jason Gunthorpe, Joerg Roedel,
Palmer Dabbelt, Paul Walmsley, Robin Murphy, Tomasz Jeznach,
Will Deacon, iommu, linux-riscv
Cc: lihangjing, Xu Lu, patches, xieyongji
Quoting Jason Gunthorpe (2025-11-04 20:00:40)
> diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
> index 96f8a6a7d60e10..10b8250659b98b 100644
> --- a/include/linux/generic_pt/common.h
> +++ b/include/linux/generic_pt/common.h
> @@ -151,6 +151,21 @@ enum {
> PT_FEAT_AMDV1_FORCE_COHERENCE,
> };
>
> +struct pt_riscv_32 {
> + struct pt_common common;
> +};
> +
> +struct pt_riscv_64 {
> + struct pt_common common;
> +};
> +
> +enum {
> + /*
> + * Support the 64k contiguous page size following the Svnapot extension.
> + */
> + PT_FEAT_RSICV_SVNAPOT_64K = PT_FEAT_FMT_START,
Is RSICV (not RISCV) on purpose?
/Emil
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH 1/5] iommupt: Add the RISC-V page table format
2025-11-04 23:17 ` Emil Renner Berthing
@ 2025-11-06 0:24 ` Jason Gunthorpe
0 siblings, 0 replies; 8+ messages in thread
From: Jason Gunthorpe @ 2025-11-06 0:24 UTC (permalink / raw)
To: Emil Renner Berthing
Cc: Albert Ou, Alexandre Ghiti, Joerg Roedel, Palmer Dabbelt,
Paul Walmsley, Robin Murphy, Tomasz Jeznach, Will Deacon, iommu,
linux-riscv, lihangjing, Xu Lu, patches, xieyongji
On Wed, Nov 05, 2025 at 08:17:38AM +0900, Emil Renner Berthing wrote:
> Quoting Jason Gunthorpe (2025-11-04 20:00:40)
> > diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
> > index 96f8a6a7d60e10..10b8250659b98b 100644
> > --- a/include/linux/generic_pt/common.h
> > +++ b/include/linux/generic_pt/common.h
> > @@ -151,6 +151,21 @@ enum {
> > PT_FEAT_AMDV1_FORCE_COHERENCE,
> > };
> >
> > +struct pt_riscv_32 {
> > + struct pt_common common;
> > +};
> > +
> > +struct pt_riscv_64 {
> > + struct pt_common common;
> > +};
> > +
> > +enum {
> > + /*
> > + * Support the 64k contiguous page size following the Svnapot extension.
> > + */
> > + PT_FEAT_RSICV_SVNAPOT_64K = PT_FEAT_FMT_START,
>
> Is RSICV (not RISCV) on purpose?
No it is a typo, I fixed it.
Thanks,
Jason
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/5] iommu/riscv: Disable SADE
2025-11-04 19:00 [PATCH 0/5] Convert riscv to use the generic iommu page table Jason Gunthorpe
2025-11-04 19:00 ` [PATCH 1/5] iommupt: Add the RISC-V page table format Jason Gunthorpe
@ 2025-11-04 19:00 ` Jason Gunthorpe
2025-11-04 19:00 ` [PATCH 3/5] iommu/riscv: Use the generic iommu page table Jason Gunthorpe
` (2 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Jason Gunthorpe @ 2025-11-04 19:00 UTC (permalink / raw)
To: Alexandre Ghiti, Albert Ou, iommu, Joerg Roedel, linux-riscv,
Palmer Dabbelt, Paul Walmsley, Robin Murphy, Tomasz Jeznach,
Will Deacon
Cc: lihangjing, Xu Lu, patches, xieyongji
In terms of the iommu subystem the SADE/GADE feature "3.4. IOMMU updating
of PTE accessed (A) and dirty (D) updates" is called dirty tracking.
There is no reason to enable HW support for this, and the HW cost
associated with it, unless dirty tracking is actually enabled through
iommufd. It should be a dynamic feature linked to user request.
Further, without implementing the read dirty ops the whole thing is
pointless.
Do not set DC.tc.SADE just because the HW has support for dirty tracking.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/riscv/iommu.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index ebb22979075df6..3a29b31d53bbf7 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -810,7 +810,6 @@ struct riscv_iommu_domain {
struct list_head bonds;
spinlock_t lock; /* protect bonds list updates. */
int pscid;
- bool amo_enabled;
int numa_node;
unsigned int pgd_mode;
unsigned long *pgd_root;
@@ -1201,8 +1200,6 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
if (!(prot & IOMMU_WRITE))
pte_prot = _PAGE_BASE | _PAGE_READ;
- else if (domain->amo_enabled)
- pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE;
else
pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY;
@@ -1386,7 +1383,6 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
INIT_LIST_HEAD_RCU(&domain->bonds);
spin_lock_init(&domain->lock);
domain->numa_node = dev_to_node(iommu->dev);
- domain->amo_enabled = !!(iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD);
domain->pgd_mode = pgd_mode;
domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node,
GFP_KERNEL_ACCOUNT, SZ_4K);
@@ -1509,8 +1505,6 @@ static struct iommu_device *riscv_iommu_probe_device(struct device *dev)
* the device directory. Do not mark the context valid yet.
*/
tc = 0;
- if (iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD)
- tc |= RISCV_IOMMU_DC_TC_SADE;
for (i = 0; i < fwspec->num_ids; i++) {
dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]);
if (!dc) {
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 3/5] iommu/riscv: Use the generic iommu page table
2025-11-04 19:00 [PATCH 0/5] Convert riscv to use the generic iommu page table Jason Gunthorpe
2025-11-04 19:00 ` [PATCH 1/5] iommupt: Add the RISC-V page table format Jason Gunthorpe
2025-11-04 19:00 ` [PATCH 2/5] iommu/riscv: Disable SADE Jason Gunthorpe
@ 2025-11-04 19:00 ` Jason Gunthorpe
2025-11-04 19:00 ` [PATCH 4/5] iommu/riscv: Enable SVNAPOT support for contiguous ptes Jason Gunthorpe
2025-11-04 19:00 ` [PATCH 5/5] iommu/riscv: Allow RISC_VIOMMU to COMPILE_TEST Jason Gunthorpe
4 siblings, 0 replies; 8+ messages in thread
From: Jason Gunthorpe @ 2025-11-04 19:00 UTC (permalink / raw)
To: Alexandre Ghiti, Albert Ou, iommu, Joerg Roedel, linux-riscv,
Palmer Dabbelt, Paul Walmsley, Robin Murphy, Tomasz Jeznach,
Will Deacon
Cc: lihangjing, Xu Lu, patches, xieyongji
This is a fairly straightforward conversion of the RISC-V iommu driver to
use the generic iommu page table code.
Invalidation stays as it is now with the driver pretending to implement
simple range based invalidation even though the HW is more like ARM SMMUv3
than AMD where the HW implements a single-PTE based invalidation. Future
work to extend the generic invalidate mechanism to support more ARM-like
semantics would benefit this driver as well.
Delete the existing page table code.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/riscv/Kconfig | 3 +
drivers/iommu/riscv/iommu.c | 287 +++++-------------------------------
2 files changed, 39 insertions(+), 251 deletions(-)
diff --git a/drivers/iommu/riscv/Kconfig b/drivers/iommu/riscv/Kconfig
index c071816f59a67b..a329ec634cf1c5 100644
--- a/drivers/iommu/riscv/Kconfig
+++ b/drivers/iommu/riscv/Kconfig
@@ -6,6 +6,9 @@ config RISCV_IOMMU
depends on RISCV && 64BIT
default y
select IOMMU_API
+ select GENERIC_PT
+ select IOMMU_PT
+ select IOMMU_PT_RISCV64
help
Support for implementations of the RISC-V IOMMU architecture that
complements the RISC-V MMU capabilities, providing similar address
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 3a29b31d53bbf7..8fe0031f6cb665 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -21,6 +21,7 @@
#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/pci.h>
+#include <linux/generic_pt/iommu.h>
#include "../iommu-pages.h"
#include "iommu-bits.h"
@@ -806,14 +807,15 @@ static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu,
/* This struct contains protection domain specific IOMMU driver data. */
struct riscv_iommu_domain {
- struct iommu_domain domain;
+ union {
+ struct iommu_domain domain;
+ struct pt_iommu_riscv_64 riscvpt;
+ };
struct list_head bonds;
spinlock_t lock; /* protect bonds list updates. */
int pscid;
- int numa_node;
- unsigned int pgd_mode;
- unsigned long *pgd_root;
};
+PT_IOMMU_CHECK_DOMAIN(struct riscv_iommu_domain, riscvpt.iommu, domain);
#define iommu_domain_to_riscv(iommu_domain) \
container_of(iommu_domain, struct riscv_iommu_domain, domain)
@@ -1076,156 +1078,9 @@ static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain,
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- riscv_iommu_iotlb_inval(domain, gather->start, gather->end);
-}
-
-#define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t)))
-
-#define _io_pte_present(pte) ((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE))
-#define _io_pte_leaf(pte) ((pte) & _PAGE_LEAF)
-#define _io_pte_none(pte) ((pte) == 0)
-#define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot))
-
-static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,
- unsigned long pte,
- struct iommu_pages_list *freelist)
-{
- unsigned long *ptr;
- int i;
-
- if (!_io_pte_present(pte) || _io_pte_leaf(pte))
- return;
-
- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
-
- /* Recursively free all sub page table pages */
- for (i = 0; i < PTRS_PER_PTE; i++) {
- pte = READ_ONCE(ptr[i]);
- if (!_io_pte_none(pte) && cmpxchg_relaxed(ptr + i, pte, 0) == pte)
- riscv_iommu_pte_free(domain, pte, freelist);
- }
-
- if (freelist)
- iommu_pages_list_add(freelist, ptr);
- else
- iommu_free_pages(ptr);
-}
-
-static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
- unsigned long iova, size_t pgsize,
- gfp_t gfp)
-{
- unsigned long *ptr = domain->pgd_root;
- unsigned long pte, old;
- int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
- void *addr;
-
- do {
- const int shift = PAGE_SHIFT + PT_SHIFT * level;
-
- ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
- /*
- * Note: returned entry might be a non-leaf if there was
- * existing mapping with smaller granularity. Up to the caller
- * to replace and invalidate.
- */
- if (((size_t)1 << shift) == pgsize)
- return ptr;
-pte_retry:
- pte = READ_ONCE(*ptr);
- /*
- * This is very likely incorrect as we should not be adding
- * new mapping with smaller granularity on top
- * of existing 2M/1G mapping. Fail.
- */
- if (_io_pte_present(pte) && _io_pte_leaf(pte))
- return NULL;
- /*
- * Non-leaf entry is missing, allocate and try to add to the
- * page table. This might race with other mappings, retry.
- */
- if (_io_pte_none(pte)) {
- addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp,
- SZ_4K);
- if (!addr)
- return NULL;
- old = pte;
- pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE);
- if (cmpxchg_relaxed(ptr, old, pte) != old) {
- iommu_free_pages(addr);
- goto pte_retry;
- }
- }
- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
- } while (level-- > 0);
-
- return NULL;
-}
-
-static unsigned long *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain,
- unsigned long iova, size_t *pte_pgsize)
-{
- unsigned long *ptr = domain->pgd_root;
- unsigned long pte;
- int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
-
- do {
- const int shift = PAGE_SHIFT + PT_SHIFT * level;
-
- ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
- pte = READ_ONCE(*ptr);
- if (_io_pte_present(pte) && _io_pte_leaf(pte)) {
- *pte_pgsize = (size_t)1 << shift;
- return ptr;
- }
- if (_io_pte_none(pte))
- return NULL;
- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
- } while (level-- > 0);
-
- return NULL;
-}
-
-static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
- unsigned long iova, phys_addr_t phys,
- size_t pgsize, size_t pgcount, int prot,
- gfp_t gfp, size_t *mapped)
-{
- struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- size_t size = 0;
- unsigned long *ptr;
- unsigned long pte, old, pte_prot;
- int rc = 0;
- struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
-
- if (!(prot & IOMMU_WRITE))
- pte_prot = _PAGE_BASE | _PAGE_READ;
- else
- pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY;
-
- while (pgcount) {
- ptr = riscv_iommu_pte_alloc(domain, iova, pgsize, gfp);
- if (!ptr) {
- rc = -ENOMEM;
- break;
- }
-
- old = READ_ONCE(*ptr);
- pte = _io_pte_entry(phys_to_pfn(phys), pte_prot);
- if (cmpxchg_relaxed(ptr, old, pte) != old)
- continue;
-
- riscv_iommu_pte_free(domain, old, &freelist);
-
- size += pgsize;
- iova += pgsize;
- phys += pgsize;
- --pgcount;
- }
-
- *mapped = size;
-
- if (!iommu_pages_list_empty(&freelist)) {
+ if (iommu_pages_list_empty(&gather->freelist)) {
+ riscv_iommu_iotlb_inval(domain, gather->start, gather->end);
+ } else {
/*
* In 1.0 spec version, the smallest scope we can use to
* invalidate all levels of page table (i.e. leaf and non-leaf)
@@ -1234,71 +1089,20 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
* capability.NL (non-leaf) IOTINVAL command.
*/
riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX);
- iommu_put_pages_list(&freelist);
+ iommu_put_pages_list(&gather->freelist);
}
-
- return rc;
-}
-
-static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain,
- unsigned long iova, size_t pgsize,
- size_t pgcount,
- struct iommu_iotlb_gather *gather)
-{
- struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- size_t size = pgcount << __ffs(pgsize);
- unsigned long *ptr, old;
- size_t unmapped = 0;
- size_t pte_size;
-
- while (unmapped < size) {
- ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
- if (!ptr)
- return unmapped;
-
- /* partial unmap is not allowed, fail. */
- if (iova & (pte_size - 1))
- return unmapped;
-
- old = READ_ONCE(*ptr);
- if (cmpxchg_relaxed(ptr, old, 0) != old)
- continue;
-
- iommu_iotlb_gather_add_page(&domain->domain, gather, iova,
- pte_size);
-
- iova += pte_size;
- unmapped += pte_size;
- }
-
- return unmapped;
-}
-
-static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
- dma_addr_t iova)
-{
- struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- size_t pte_size;
- unsigned long *ptr;
-
- ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
- if (!ptr)
- return 0;
-
- return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1));
}
static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- const unsigned long pfn = virt_to_pfn(domain->pgd_root);
WARN_ON(!list_empty(&domain->bonds));
if ((int)domain->pscid > 0)
ida_free(&riscv_iommu_pscids, domain->pscid);
- riscv_iommu_pte_free(domain, _io_pte_entry(pfn, _PAGE_TABLE), NULL);
+ pt_iommu_deinit(&domain->riscvpt.iommu);
kfree(domain);
}
@@ -1323,13 +1127,16 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
struct riscv_iommu_device *iommu = dev_to_iommu(dev);
struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
+ struct pt_iommu_riscv_64_hw_info pt_info;
u64 fsc, ta;
- if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode))
+ pt_iommu_riscv_64_hw_info(&domain->riscvpt, &pt_info);
+
+ if (!riscv_iommu_pt_supported(iommu, pt_info.fsc_iosatp_mode))
return -ENODEV;
- fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) |
- FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root));
+ fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, pt_info.fsc_iosatp_mode) |
+ FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, pt_info.ppn);
ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) |
RISCV_IOMMU_PC_TA_V;
@@ -1344,37 +1151,32 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
}
static const struct iommu_domain_ops riscv_iommu_paging_domain_ops = {
+ IOMMU_PT_DOMAIN_OPS(riscv_64),
.attach_dev = riscv_iommu_attach_paging_domain,
.free = riscv_iommu_free_paging_domain,
- .map_pages = riscv_iommu_map_pages,
- .unmap_pages = riscv_iommu_unmap_pages,
- .iova_to_phys = riscv_iommu_iova_to_phys,
.iotlb_sync = riscv_iommu_iotlb_sync,
.flush_iotlb_all = riscv_iommu_iotlb_flush_all,
};
static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
{
+ struct pt_iommu_riscv_64_cfg cfg = {};
struct riscv_iommu_domain *domain;
struct riscv_iommu_device *iommu;
- unsigned int pgd_mode;
- dma_addr_t va_mask;
- int va_bits;
+ int ret;
iommu = dev_to_iommu(dev);
if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57) {
- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57;
- va_bits = 57;
+ cfg.common.hw_max_vasz_lg2 = 57;
} else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48) {
- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48;
- va_bits = 48;
+ cfg.common.hw_max_vasz_lg2 = 48;
} else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39) {
- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39;
- va_bits = 39;
+ cfg.common.hw_max_vasz_lg2 = 39;
} else {
dev_err(dev, "cannot find supported page table mode\n");
return ERR_PTR(-ENODEV);
}
+ cfg.common.hw_max_oasz_lg2 = 56;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
@@ -1382,42 +1184,23 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
INIT_LIST_HEAD_RCU(&domain->bonds);
spin_lock_init(&domain->lock);
- domain->numa_node = dev_to_node(iommu->dev);
- domain->pgd_mode = pgd_mode;
- domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node,
- GFP_KERNEL_ACCOUNT, SZ_4K);
- if (!domain->pgd_root) {
- kfree(domain);
- return ERR_PTR(-ENOMEM);
- }
+ cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
+ BIT(PT_FEAT_FLUSH_RANGE);
+ domain->riscvpt.iommu.nid = dev_to_node(iommu->dev);
+ domain->domain.ops = &riscv_iommu_paging_domain_ops;
domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1,
RISCV_IOMMU_MAX_PSCID, GFP_KERNEL);
if (domain->pscid < 0) {
- iommu_free_pages(domain->pgd_root);
- kfree(domain);
+ riscv_iommu_free_paging_domain(&domain->domain);
return ERR_PTR(-ENOMEM);
}
- /*
- * Note: RISC-V Privilege spec mandates that virtual addresses
- * need to be sign-extended, so if (VA_BITS - 1) is set, all
- * bits >= VA_BITS need to also be set or else we'll get a
- * page fault. However the code that creates the mappings
- * above us (e.g. iommu_dma_alloc_iova()) won't do that for us
- * for now, so we'll end up with invalid virtual addresses
- * to map. As a workaround until we get this sorted out
- * limit the available virtual addresses to VA_BITS - 1.
- */
- va_mask = DMA_BIT_MASK(va_bits - 1);
-
- domain->domain.geometry.aperture_start = 0;
- domain->domain.geometry.aperture_end = va_mask;
- domain->domain.geometry.force_aperture = true;
- domain->domain.pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G);
-
- domain->domain.ops = &riscv_iommu_paging_domain_ops;
-
+ ret = pt_iommu_riscv_64_init(&domain->riscvpt, &cfg, GFP_KERNEL);
+ if (ret) {
+ riscv_iommu_free_paging_domain(&domain->domain);
+ return ERR_PTR(ret);
+ }
return &domain->domain;
}
@@ -1671,3 +1454,5 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
riscv_iommu_queue_disable(&iommu->cmdq);
return rc;
}
+
+MODULE_IMPORT_NS("GENERIC_PT_IOMMU");
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 4/5] iommu/riscv: Enable SVNAPOT support for contiguous ptes
2025-11-04 19:00 [PATCH 0/5] Convert riscv to use the generic iommu page table Jason Gunthorpe
` (2 preceding siblings ...)
2025-11-04 19:00 ` [PATCH 3/5] iommu/riscv: Use the generic iommu page table Jason Gunthorpe
@ 2025-11-04 19:00 ` Jason Gunthorpe
2025-11-04 19:00 ` [PATCH 5/5] iommu/riscv: Allow RISC_VIOMMU to COMPILE_TEST Jason Gunthorpe
4 siblings, 0 replies; 8+ messages in thread
From: Jason Gunthorpe @ 2025-11-04 19:00 UTC (permalink / raw)
To: Alexandre Ghiti, Albert Ou, iommu, Joerg Roedel, linux-riscv,
Palmer Dabbelt, Paul Walmsley, Robin Murphy, Tomasz Jeznach,
Will Deacon
Cc: lihangjing, Xu Lu, patches, xieyongji
This turns on a 64k page size. The "RISC-V IOMMU Architecture
Specification" states:
6.4 IOMMU capabilities
[..]
IOMMU implementations must support the Svnapot standard extension for
NAPOT Translation Contiguity.
So just switch it on unconditionally.
Cc: Xu Lu <luxu.kernel@bytedance.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/riscv/iommu.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 8fe0031f6cb665..014de227e6123f 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -1184,8 +1184,13 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
INIT_LIST_HEAD_RCU(&domain->bonds);
spin_lock_init(&domain->lock);
+ /*
+ * 6.4 IOMMU capabilities [..] IOMMU implementations must support the
+ * Svnapot standard extension for NAPOT Translation Contiguity.
+ */
cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
- BIT(PT_FEAT_FLUSH_RANGE);
+ BIT(PT_FEAT_FLUSH_RANGE) |
+ BIT(PT_FEAT_RSICV_SVNAPOT_64K);
domain->riscvpt.iommu.nid = dev_to_node(iommu->dev);
domain->domain.ops = &riscv_iommu_paging_domain_ops;
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 5/5] iommu/riscv: Allow RISC_VIOMMU to COMPILE_TEST
2025-11-04 19:00 [PATCH 0/5] Convert riscv to use the generic iommu page table Jason Gunthorpe
` (3 preceding siblings ...)
2025-11-04 19:00 ` [PATCH 4/5] iommu/riscv: Enable SVNAPOT support for contiguous ptes Jason Gunthorpe
@ 2025-11-04 19:00 ` Jason Gunthorpe
4 siblings, 0 replies; 8+ messages in thread
From: Jason Gunthorpe @ 2025-11-04 19:00 UTC (permalink / raw)
To: Alexandre Ghiti, Albert Ou, iommu, Joerg Roedel, linux-riscv,
Palmer Dabbelt, Paul Walmsley, Robin Murphy, Tomasz Jeznach,
Will Deacon
Cc: lihangjing, Xu Lu, patches, xieyongji
This driver used to use a lot of page table constants from the architecture
code which prevented COMPILE_TEST on other architectures. Now that iommupt
provides all of the constants internally there are only two small bumps
preventing COMPILE_TEST.
- Use the generic functions for the riscv specific phys_to_pfn() and
pfn_to_phys()
- Use CONFIG_MMIOWB to block off the mmiowb() barrier
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/riscv/Kconfig | 5 +++--
drivers/iommu/riscv/iommu-bits.h | 4 +++-
drivers/iommu/riscv/iommu.c | 4 +++-
3 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/riscv/Kconfig b/drivers/iommu/riscv/Kconfig
index a329ec634cf1c5..f681188d19a1bc 100644
--- a/drivers/iommu/riscv/Kconfig
+++ b/drivers/iommu/riscv/Kconfig
@@ -3,8 +3,9 @@
config RISCV_IOMMU
bool "RISC-V IOMMU Support"
- depends on RISCV && 64BIT
- default y
+ default RISCV
+ depends on (RISCV || COMPILE_TEST) && 64BIT
+ depends on GENERIC_MSI_IRQ
select IOMMU_API
select GENERIC_PT
select IOMMU_PT
diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
index 98daf0e1a30690..29a0040b1c32ea 100644
--- a/drivers/iommu/riscv/iommu-bits.h
+++ b/drivers/iommu/riscv/iommu-bits.h
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/bitfield.h>
#include <linux/bits.h>
+#include <asm/page.h>
/*
* Chapter 5: Memory Mapped register interface
@@ -718,7 +719,8 @@ static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cmd,
u64 addr)
{
- cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, phys_to_pfn(addr));
+ cmd->dword1 =
+ FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, PHYS_PFN(addr));
cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV;
}
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 014de227e6123f..2dd33ee778c009 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -160,7 +160,7 @@ static int riscv_iommu_queue_alloc(struct riscv_iommu_device *iommu,
if (FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb)) {
const size_t queue_size = entry_size << (logsz + 1);
- queue->phys = pfn_to_phys(FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb));
+ queue->phys = PFN_PHYS(FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb));
queue->base = devm_ioremap(iommu->dev, queue->phys, queue_size);
} else {
do {
@@ -436,7 +436,9 @@ static unsigned int riscv_iommu_queue_send(struct riscv_iommu_queue *queue,
* 6. Make sure the doorbell write to the device has finished before updating
* the shadow tail index in normal memory. 'fence o, w'
*/
+#ifdef CONFIG_MMIOWB
mmiowb();
+#endif
atomic_inc(&queue->tail);
/* 7. Complete submission and restore local interrupts */
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread