From: fangyu.yu@linux.alibaba.com
To: joro@8bytes.org, will@kernel.org, robin.murphy@arm.com,
pjw@kernel.org, palmer@dabbelt.com, aou@eecs.berkeley.edu,
alex@ghiti.fr, tjeznach@rivosinc.com, jgg@ziepe.ca,
kevin.tian@intel.com, baolu.lu@linux.intel.com,
vasant.hegde@amd.com, anup@brainfault.org, atish.patra@linux.dev,
skhawaja@google.com, jgg@nvidia.com
Cc: guoren@kernel.org, kvm@vger.kernel.org, iommu@lists.linux.dev,
kvm-riscv@lists.infradead.org, linux-riscv@lists.infradead.org,
linux-kernel@vger.kernel.org,
Fangyu Yu <fangyu.yu@linux.alibaba.com>
Subject: [RFC PATCH 01/11] iommupt: Add RISC-V Second-stage (iohgatp) page table support
Date: Tue, 28 Apr 2026 21:13:49 +0800 [thread overview]
Message-ID: <20260428131359.34872-2-fangyu.yu@linux.alibaba.com> (raw)
In-Reply-To: <20260428131359.34872-1-fangyu.yu@linux.alibaba.com>
From: Fangyu Yu <fangyu.yu@linux.alibaba.com>
Add support for Sv39x4/Sv48x4/Sv57x4 Second-stage page tables used by
the RISC-V IOMMU iohgatp register. The x4 root page table is 16 KiB
instead of the usual 4 KiB, covering 2 extra GPA bits (hw_max_vasz_lg2
= 41/50/59).
Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
---
drivers/iommu/generic_pt/fmt/riscv.h | 64 +++++++++++++++++++++++++---
include/linux/generic_pt/common.h | 5 +++
include/linux/generic_pt/iommu.h | 17 +++++++-
3 files changed, 80 insertions(+), 6 deletions(-)
diff --git a/drivers/iommu/generic_pt/fmt/riscv.h b/drivers/iommu/generic_pt/fmt/riscv.h
index a7fef6266a36..4fe645e60375 100644
--- a/drivers/iommu/generic_pt/fmt/riscv.h
+++ b/drivers/iommu/generic_pt/fmt/riscv.h
@@ -37,7 +37,16 @@ enum {
PT_MAX_OUTPUT_ADDRESS_LG2 = 34,
PT_MAX_TOP_LEVEL = 1,
#else
- PT_MAX_VA_ADDRESS_LG2 = 57,
+ /*
+ * PT_MAX_VA_ADDRESS_LG2 is the upper bound accepted by the generic
+ * pt_iommu_init() range check. It must cover both first-stage and
+ * second-stage (G-stage) modes:
+ *
+ * First-stage (fsc/iosatp): Sv39=39, Sv48=48, Sv57=57
+ * Second-stage (iohgatp): Sv39x4=41, Sv48x4=50, Sv57x4=59
+ *
+ */
+ PT_MAX_VA_ADDRESS_LG2 = 59,
PT_MAX_OUTPUT_ADDRESS_LG2 = 56,
PT_MAX_TOP_LEVEL = 4,
#endif
@@ -124,6 +133,14 @@ riscvpt_entry_num_contig_lg2(const struct pt_state *pts)
static inline unsigned int riscvpt_num_items_lg2(const struct pt_state *pts)
{
+ /*
+ * Second-stage (iohgatp) root page tables have 4x the usual number of
+ * entries (2048 = 2^11 instead of 512 = 2^9) to cover the 2 extra GPA
+ * bits in Sv39x4/Sv48x4/Sv57x4. Only the root (top) level is
+ * enlarged; all other levels remain at the standard 9-bit index width.
+ */
+ if (to_riscvpt(pts)->second_stage && pts->level == pts->range->top_level)
+ return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64)) + 2;
return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
}
#define pt_num_items_lg2 riscvpt_num_items_lg2
@@ -254,6 +271,7 @@ riscvpt_iommu_fmt_init(struct pt_iommu_riscv_64 *iommu_table,
struct pt_riscv *table = &iommu_table->riscv_64pt;
switch (cfg->common.hw_max_vasz_lg2) {
+ /* First-stage (fsc/iosatp): Sv39 / Sv48 / Sv57 */
case 39:
pt_top_set_level(&table->common, 2);
break;
@@ -263,6 +281,22 @@ riscvpt_iommu_fmt_init(struct pt_iommu_riscv_64 *iommu_table,
case 57:
pt_top_set_level(&table->common, 4);
break;
+ /*
+ * Second-stage (iohgatp): Sv39x4 / Sv48x4 / Sv57x4.
+ * The top level is the same as for the first-stage counterpart.
+ */
+ case 41:
+ pt_top_set_level(&table->common, 2);
+ table->second_stage = true;
+ break;
+ case 50:
+ pt_top_set_level(&table->common, 3);
+ table->second_stage = true;
+ break;
+ case 59:
+ pt_top_set_level(&table->common, 4);
+ table->second_stage = true;
+ break;
default:
return -EINVAL;
}
@@ -283,10 +317,17 @@ riscvpt_iommu_fmt_hw_info(struct pt_iommu_riscv_64 *table,
PT_WARN_ON(top_phys & ~PT_TOP_PHYS_MASK);
/*
- * See Table 3. Encodings of iosatp.MODE field" for DC.tx.SXL = 0:
- * 8 = Sv39 = top level 2
- * 9 = Sv38 = top level 3
- * 10 = Sv57 = top level 4
+ * Both first-stage (fsc/iosatp) and second-stage (iohgatp) share the
+ * same MODE numeric values for a given top level:
+ * top_level 2 -> MODE 8 (Sv39 / Sv39x4)
+ * top_level 3 -> MODE 9 (Sv48 / Sv48x4)
+ * top_level 4 -> MODE 10 (Sv57 / Sv57x4)
+ *
+ * The union members fsc_iosatp_mode and iohgatp_mode occupy the same
+ * byte; the caller selects the appropriate name based on domain type.
+ *
+ * See "Table 3. Encodings of iosatp.MODE field" (DC.tc.SXL = 0) and
+ * "Table 2. Encoding of iohgatp.MODE field" in the RISC-V IOMMU spec.
*/
info->fsc_iosatp_mode = top_range->top_level + 6;
}
@@ -294,6 +335,7 @@ riscvpt_iommu_fmt_hw_info(struct pt_iommu_riscv_64 *table,
#if defined(GENERIC_PT_KUNIT)
static const struct pt_iommu_riscv_64_cfg riscv_64_kunit_fmt_cfgs[] = {
+ /* First-stage (fsc/iosatp): Sv39 / Sv48 / Sv57 */
[0] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
.common.hw_max_oasz_lg2 = 56,
.common.hw_max_vasz_lg2 = 39 },
@@ -303,6 +345,18 @@ static const struct pt_iommu_riscv_64_cfg riscv_64_kunit_fmt_cfgs[] = {
[2] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
.common.hw_max_oasz_lg2 = 56,
.common.hw_max_vasz_lg2 = 57 },
+ /*
+ * Second-stage (iohgatp): Sv39x4 / Sv48x4 / Sv57x4.
+ */
+ [3] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
+ .common.hw_max_oasz_lg2 = 56,
+ .common.hw_max_vasz_lg2 = 41 },
+ [4] = { .common.features = 0,
+ .common.hw_max_oasz_lg2 = 56,
+ .common.hw_max_vasz_lg2 = 50 },
+ [5] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
+ .common.hw_max_oasz_lg2 = 56,
+ .common.hw_max_vasz_lg2 = 59 },
};
#define kunit_fmt_cfgs riscv_64_kunit_fmt_cfgs
enum {
diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
index fc5d0b5edadc..e82dff33ece8 100644
--- a/include/linux/generic_pt/common.h
+++ b/include/linux/generic_pt/common.h
@@ -181,6 +181,11 @@ struct pt_riscv_32 {
struct pt_riscv_64 {
struct pt_common common;
+ /*
+ * True when this table is used for second-stage / iohgatp
+ * address translation.
+ */
+ bool second_stage;
};
enum {
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index dd0edd02a48a..f27d229ff318 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -328,7 +328,22 @@ struct pt_iommu_riscv_64_cfg {
struct pt_iommu_riscv_64_hw_info {
u64 ppn;
- u8 fsc_iosatp_mode;
+ union {
+ /*
+ * First-stage (fsc/iosatp) MODE encoding:
+ * 8 = Sv39, 9 = Sv48, 10 = Sv57
+ * Used to program DC.fsc.iosatp.MODE.
+ */
+ u8 fsc_iosatp_mode;
+ /*
+ * Second-stage (iohgatp) MODE encoding:
+ * 8 = Sv39x4, 9 = Sv48x4, 10 = Sv57x4
+ * Used to program DC.iohgatp.MODE.
+ * The numeric values are identical to fsc_iosatp_mode;
+ * the caller selects the interpretation based on domain type.
+ */
+ u8 iohgatp_mode;
+ };
};
IOMMU_FORMAT(riscv_64, riscv_64pt);
--
2.50.1
next prev parent reply other threads:[~2026-04-28 13:14 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-28 13:13 [RFC PATCH 00/11] iommu/riscv: Add hardware dirty tracking for second-stage domains fangyu.yu
2026-04-28 13:13 ` fangyu.yu [this message]
2026-04-28 13:32 ` [RFC PATCH 01/11] iommupt: Add RISC-V Second-stage (iohgatp) page table support Jason Gunthorpe
2026-04-29 1:06 ` fangyu.yu
2026-04-29 12:18 ` Jason Gunthorpe
2026-04-29 15:42 ` fangyu.yu
2026-04-28 13:13 ` [RFC PATCH 02/11] iommu/riscv: report iommu capabilities fangyu.yu
2026-04-28 13:33 ` Jason Gunthorpe
2026-04-29 1:15 ` fangyu.yu
2026-04-28 13:13 ` [RFC PATCH 03/11] iommu/riscv: use data structure instead of individual values fangyu.yu
2026-04-28 13:13 ` [RFC PATCH 04/11] iommu/riscv: support GSCID and GVMA invalidation command fangyu.yu
2026-04-28 13:13 ` [RFC PATCH 05/11] RISC-V: KVM: Enable KVM_VFIO interfaces on RISC-V arch fangyu.yu
2026-04-28 13:13 ` [RFC PATCH 06/11] iommu/riscv: Add domain_alloc_paging_flags for second-stage domain fangyu.yu
2026-04-28 13:35 ` Jason Gunthorpe
2026-04-29 1:21 ` fangyu.yu
2026-04-28 13:13 ` [RFC PATCH 07/11] iommupt: Don't preset D when RISC-V IOMMU dirty tracking on fangyu.yu
2026-04-28 13:36 ` Jason Gunthorpe
2026-04-29 1:41 ` fangyu.yu
2026-04-28 13:13 ` [RFC PATCH 08/11] iommu/riscv: Add dirty tracking support for second-stage domains fangyu.yu
2026-04-28 13:38 ` Jason Gunthorpe
2026-04-29 1:46 ` fangyu.yu
2026-04-28 13:13 ` [RFC PATCH 09/11] iommu/riscv: Add IOTINVAL.GVMA after updating DDT/PDT entries fangyu.yu
2026-04-28 13:13 ` [RFC PATCH 10/11] iommupt: Add RISC-V dirty tracking PTE ops fangyu.yu
2026-04-28 13:39 ` Jason Gunthorpe
2026-04-29 1:52 ` fangyu.yu
2026-04-28 13:13 ` [RFC PATCH 11/11] iommu/riscv: support nested iommu for getting iommu hardware information fangyu.yu
2026-04-28 13:39 ` Jason Gunthorpe
2026-04-29 2:37 ` fangyu.yu
2026-05-04 19:53 ` [RFC PATCH 00/11] iommu/riscv: Add hardware dirty tracking for second-stage domains Andrew Jones
2026-05-05 13:48 ` fangyu.yu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260428131359.34872-2-fangyu.yu@linux.alibaba.com \
--to=fangyu.yu@linux.alibaba.com \
--cc=alex@ghiti.fr \
--cc=anup@brainfault.org \
--cc=aou@eecs.berkeley.edu \
--cc=atish.patra@linux.dev \
--cc=baolu.lu@linux.intel.com \
--cc=guoren@kernel.org \
--cc=iommu@lists.linux.dev \
--cc=jgg@nvidia.com \
--cc=jgg@ziepe.ca \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=kvm-riscv@lists.infradead.org \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=palmer@dabbelt.com \
--cc=pjw@kernel.org \
--cc=robin.murphy@arm.com \
--cc=skhawaja@google.com \
--cc=tjeznach@rivosinc.com \
--cc=vasant.hegde@amd.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox