From: guoren@kernel.org
To: zhiwei_liu@linux.alibaba.com, liwei1518@gmail.com,
alistair.francis@wdc.com, seb@rivosinc.com,
tjeznach@rivosinc.com
Cc: qemu-riscv@nongnu.org, qemu-devel@nongnu.org,
"Guo Ren (Alibaba DAMO Academy)" <guoren@kernel.org>,
qemu-stable@nongnu.org
Subject: [PATCH] hw/riscv/riscv-iommu: Fixup PDT Nested Walk
Date: Sun, 3 Aug 2025 00:43:38 -0400 [thread overview]
Message-ID: <20250803044338.4098505-1-guoren@kernel.org> (raw)
From: "Guo Ren (Alibaba DAMO Academy)" <guoren@kernel.org>
Current implementation is wrong when iohgatp != bare. The RISC-V
IOMMU specification has defined that the PDT is based on GPA, not
SPA. So this patch fixes the problem, making PDT walk correctly
when the G-stage table walk is enabled.
Fixes: 0c54acb8243d ("hw/riscv: add RISC-V IOMMU base emulation")
Cc: qemu-stable@nongnu.org
Cc: Sebastien Boeuf <seb@rivosinc.com>
Cc: Tomasz Jeznach <tjeznach@rivosinc.com>
Signed-off-by: Guo Ren (Alibaba DAMO Academy) <guoren@kernel.org>
---
hw/riscv/riscv-iommu.c | 148 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 146 insertions(+), 2 deletions(-)
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
index 96a7fbdefcf3..c4dccc9e5c5d 100644
--- a/hw/riscv/riscv-iommu.c
+++ b/hw/riscv/riscv-iommu.c
@@ -866,6 +866,145 @@ static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
return true;
}
+/**
+ * pdt_memory_read: PDT wrapper of dma_memory_read.
+ *
+ * @s: IOMMU Device State
+ * @ctx: Device Translation Context with devid and pasid set
+ * nested: Add G-stage translation or not
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: length of the data transferred
+ * @attrs: memory transaction attributes
+ */
+static MemTxResult pdt_memory_read(RISCVIOMMUState *s,
+ RISCVIOMMUContext *ctx, bool nested,
+ dma_addr_t addr,
+ void *buf, dma_addr_t len,
+ MemTxAttrs attrs)
+{
+ uint64_t gatp_mode, pte;
+ struct {
+ unsigned char step;
+ unsigned char levels;
+ unsigned char ptidxbits;
+ unsigned char ptesize;
+ } sc;
+ MemTxResult ret;
+ dma_addr_t base = addr;
+
+ if (!nested)
+ goto out;
+
+ /* G stages translation mode */
+ gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
+
+ /* G stages translation tables root pointer */
+ base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
+
+ /* Start at step 0 */
+ sc.step = 0;
+
+ if (s->fctl & RISCV_IOMMU_FCTL_GXL) {
+ /* 32bit mode for GXL == 1 */
+ switch (gatp_mode) {
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) {
+ return MEMTX_ACCESS_ERROR;
+ }
+ sc.levels = 2;
+ sc.ptidxbits = 10;
+ sc.ptesize = 4;
+ break;
+ default:
+ return MEMTX_ACCESS_ERROR;
+ }
+ } else {
+ /* 64bit mode for GXL == 0 */
+ switch (gatp_mode) {
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) {
+ return MEMTX_ACCESS_ERROR;
+ }
+ sc.levels = 3;
+ sc.ptidxbits = 9;
+ sc.ptesize = 8;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) {
+ return MEMTX_ACCESS_ERROR;
+ }
+ sc.levels = 4;
+ sc.ptidxbits = 9;
+ sc.ptesize = 8;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) {
+ return MEMTX_ACCESS_ERROR;
+ }
+ sc.levels = 5;
+ sc.ptidxbits = 9;
+ sc.ptesize = 8;
+ break;
+ default:
+ return MEMTX_ACCESS_ERROR;
+ }
+ }
+
+ do {
+ const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits;
+ const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits *
+ (sc.levels - 1 - sc.step);
+ const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
+ const dma_addr_t pte_addr = base + idx * sc.ptesize;
+
+ /* Address range check before first level lookup */
+ if (!sc.step) {
+ const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1;
+ if ((addr & va_mask) != addr) {
+ return MEMTX_ACCESS_ERROR;
+ }
+ }
+
+ /* Read page table entry */
+ if (sc.ptesize == 4) {
+ uint32_t pte32 = 0;
+ ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs);
+ pte = pte32;
+ } else {
+ ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs);
+ }
+ if (ret != MEMTX_OK)
+ return ret;
+
+ sc.step++;
+ hwaddr ppn = pte >> PTE_PPN_SHIFT;
+
+ if (!(pte & PTE_V)) {
+ return MEMTX_ACCESS_ERROR; /* Invalid PTE */
+ } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
+ return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */
+ } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
+ return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + PTE_X */
+ } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
+ return MEMTX_ACCESS_ERROR; /* Misaligned PPN */
+ } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
+ base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
+ } else {
+ /* Leaf PTE, translation completed. */
+ base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
+ break;
+ }
+
+ if (sc.step == sc.levels) {
+ return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */
+ }
+ } while (1);
+
+out:
+ return dma_memory_read(s->target_as, base, buf, len, attrs);
+}
+
/*
* RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
*
@@ -884,6 +1023,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
const size_t dc_len = sizeof(dc) >> dc_fmt;
int depth;
uint64_t de;
+ bool pdt_nested = false;
switch (mode) {
case RISCV_IOMMU_DDTP_MODE_OFF:
@@ -1029,6 +1169,10 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
}
+ /* Detect nested PDT walk */
+ pdt_nested = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD) !=
+ RISCV_IOMMU_DC_IOHGATP_MODE_BARE;
+
for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
@@ -1038,7 +1182,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
*/
const int split = depth * 9 + 8;
addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
- if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
+ if (pdt_memory_read(s, ctx, pdt_nested, addr, &de, sizeof(de),
MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
}
@@ -1053,7 +1197,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
/* Leaf entry in PDT */
addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
- if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
+ if (pdt_memory_read(s, ctx, pdt_nested, addr, &dc.ta, sizeof(uint64_t) * 2,
MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
}
--
2.40.1
next reply other threads:[~2025-08-03 4:47 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-03 4:43 guoren [this message]
2025-08-04 17:20 ` [PATCH] hw/riscv/riscv-iommu: Fixup PDT Nested Walk Guo Ren
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250803044338.4098505-1-guoren@kernel.org \
--to=guoren@kernel.org \
--cc=alistair.francis@wdc.com \
--cc=liwei1518@gmail.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-riscv@nongnu.org \
--cc=qemu-stable@nongnu.org \
--cc=seb@rivosinc.com \
--cc=tjeznach@rivosinc.com \
--cc=zhiwei_liu@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).