* [PATCH 1/2] riscv: ptdump: Move pagetable definitions to common header
2026-07-01 8:50 [PATCH 0/2] riscv: ptdump: Refactor for KVM gstage ptdump support Dylan.Wu
@ 2026-07-01 8:50 ` Dylan.Wu
2026-07-01 9:01 ` sashiko-bot
2026-07-01 8:50 ` [PATCH 2/2] KVM: riscv: Register ptdump with debugfs on guest creation Dylan.Wu
1 sibling, 1 reply; 5+ messages in thread
From: Dylan.Wu @ 2026-07-01 8:50 UTC (permalink / raw)
To: palmer, pjw, aou, anup
Cc: alex, atish.patra, zhouquan, linux-riscv, kvm, kvm-riscv,
linux-kernel, Dylan.Wu
Move the pagetable walking state and level definitions to a new
arch/riscv/include/asm/ptdump.h header. This allows other parts
of the kernel (like KVM) to reuse the ptdump attribute parsing
logic.
To support different pagetable types (like G-stage), the attribute
parsing logic in dump_prot() is updated to use attribute bits
defined within the pg_level structure rather than a global array.
Assisted-by: YuanSheng: deepseek-v4-pro
Co-developed-by: Quan Zhou <zhouquan@iscas.ac.cn>
Signed-off-by: Quan Zhou <zhouquan@iscas.ac.cn>
Signed-off-by: Dylan.Wu <fredwudi0305@gmail.com>
---
arch/riscv/include/asm/ptdump.h | 42 +++++++++++++
arch/riscv/mm/ptdump.c | 102 +++++++++++---------------------
2 files changed, 77 insertions(+), 67 deletions(-)
create mode 100644 arch/riscv/include/asm/ptdump.h
diff --git a/arch/riscv/include/asm/ptdump.h b/arch/riscv/include/asm/ptdump.h
new file mode 100644
index 000000000..9173910fa
--- /dev/null
+++ b/arch/riscv/include/asm/ptdump.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_PTDUMP_H
+#define _ASM_RISCV_PTDUMP_H
+
+#include <linux/ptdump.h>
+#include <linux/seq_file.h>
+
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+struct ptdump_prot_bits {
+ u64 mask;
+ const char *set;
+ const char *clear;
+};
+
+struct ptdump_pg_level {
+ const struct ptdump_prot_bits *bits;
+ const char *name;
+ u64 mask;
+ int num;
+};
+
+struct ptdump_pg_state {
+ struct ptdump_state ptdump;
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ const struct ptdump_pg_level *pg_level;
+ unsigned long start_address;
+ unsigned long start_pa;
+ unsigned long last_pa;
+ int level;
+ u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+};
+
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val);
+
+#endif /* _ASM_RISCV_PTDUMP_H */
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index f4b4a9fcb..7e55656cb 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -11,6 +11,7 @@
#include <linux/ptdump.h>
#include <linux/pgtable.h>
+#include <asm/ptdump.h>
#include <asm/kasan.h>
#define pt_dump_seq_printf(m, fmt, args...) \
@@ -25,31 +26,6 @@
seq_puts(m, fmt); \
})
-/*
- * The page dumper groups page table entries of the same type into a single
- * description. It uses pg_state to track the range information while
- * iterating over the pte entries. When the continuity is broken it then
- * dumps out a description of the range.
- */
-struct pg_state {
- struct ptdump_state ptdump;
- struct seq_file *seq;
- const struct addr_marker *marker;
- unsigned long start_address;
- unsigned long start_pa;
- unsigned long last_pa;
- int level;
- u64 current_prot;
- bool check_wx;
- unsigned long wx_pages;
-};
-
-/* Address marker */
-struct addr_marker {
- unsigned long start_address;
- const char *name;
-};
-
/* Private information for debugfs */
struct ptd_mm_info {
struct mm_struct *mm;
@@ -126,14 +102,7 @@ static struct ptd_mm_info efi_ptd_info = {
};
#endif
-/* Page Table Entry */
-struct prot_bits {
- u64 mask;
- const char *set;
- const char *clear;
-};
-
-static const struct prot_bits pte_bits[] = {
+static const struct ptdump_prot_bits pte_bits[] = {
{
#ifdef CONFIG_64BIT
.mask = _PAGE_NAPOT,
@@ -183,52 +152,48 @@ static const struct prot_bits pte_bits[] = {
}
};
-/* Page Level */
-struct pg_level {
- const char *name;
- u64 mask;
-};
-
-static struct pg_level pg_level[] = {
+static struct ptdump_pg_level kernel_pg_levels[] = {
{ /* pgd */
.name = "PGD",
}, { /* p4d */
- .name = (CONFIG_PGTABLE_LEVELS > 4) ? "P4D" : "PGD",
+ .name = "P4D",
}, { /* pud */
- .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
+ .name = "PUD",
}, { /* pmd */
- .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD",
+ .name = "PMD",
}, { /* pte */
.name = "PTE",
},
};
-static void dump_prot(struct pg_state *st)
+static void dump_prot(struct ptdump_pg_state *st)
{
unsigned int i;
+ const struct ptdump_pg_level *lvl = &st->pg_level[st->level];
+ const struct ptdump_prot_bits *bits = lvl->bits;
- for (i = 0; i < ARRAY_SIZE(pte_bits); i++) {
+ for (i = 0; i < lvl->num; i++) {
char s[7];
unsigned long val;
- val = st->current_prot & pte_bits[i].mask;
+ val = st->current_prot & bits[i].mask;
if (val) {
- if (pte_bits[i].mask == _PAGE_SOFT)
- snprintf(s, sizeof(s), pte_bits[i].set, val >> 8);
+ if (bits[i].mask == _PAGE_SOFT)
+ snprintf(s, sizeof(s), bits[i].set, val >> 8);
#ifdef CONFIG_64BIT
- else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) {
+ else if (bits[i].mask == _PAGE_MTMASK_SVPBMT) {
if (val == _PAGE_NOCACHE_SVPBMT)
- snprintf(s, sizeof(s), pte_bits[i].set, "NC");
+ snprintf(s, sizeof(s), bits[i].set, "NC");
else if (val == _PAGE_IO_SVPBMT)
- snprintf(s, sizeof(s), pte_bits[i].set, "IO");
+ snprintf(s, sizeof(s), bits[i].set, "IO");
else
- snprintf(s, sizeof(s), pte_bits[i].set, "??");
+ snprintf(s, sizeof(s), bits[i].set, "??");
}
#endif
else
- strscpy(s, pte_bits[i].set);
+ strscpy(s, bits[i].set);
} else {
- strscpy(s, pte_bits[i].clear);
+ strscpy(s, bits[i].clear);
}
pt_dump_seq_printf(st->seq, " %s", s);
@@ -240,7 +205,7 @@ static void dump_prot(struct pg_state *st)
#else
#define ADDR_FORMAT "0x%08lx"
#endif
-static void dump_addr(struct pg_state *st, unsigned long addr)
+static void dump_addr(struct ptdump_pg_state *st, unsigned long addr)
{
static const char units[] = "KMGTPE";
const char *unit = units;
@@ -258,10 +223,10 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
}
pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
- pg_level[st->level].name);
+ st->pg_level[st->level].name);
}
-static void note_prot_wx(struct pg_state *st, unsigned long addr)
+static void note_prot_wx(struct ptdump_pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
@@ -276,15 +241,15 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
-static void note_page(struct ptdump_state *pt_st, unsigned long addr,
- int level, u64 val)
+void note_page(struct ptdump_state *pt_st, unsigned long addr,
+ int level, u64 val)
{
- struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
u64 pa = PFN_PHYS(pte_pfn(__pte(val)));
u64 prot = 0;
if (level >= 0)
- prot = val & pg_level[level].mask;
+ prot = val & st->pg_level[level].mask;
if (st->level == -1) {
st->level = level;
@@ -317,6 +282,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr,
st->last_pa = pa;
}
}
+EXPORT_SYMBOL_GPL(note_page);
static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
{
@@ -352,9 +318,10 @@ static void note_page_flush(struct ptdump_state *pt_st)
static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
{
- struct pg_state st = {
+ struct ptdump_pg_state st = {
.seq = s,
.marker = pinfo->markers,
+ .pg_level = kernel_pg_levels,
.level = -1,
.ptdump = {
.note_page_pte = note_page_pte,
@@ -375,12 +342,13 @@ static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
bool ptdump_check_wx(void)
{
- struct pg_state st = {
+ struct ptdump_pg_state st = {
.seq = NULL,
.marker = (struct addr_marker[]) {
{0, NULL},
{-1, NULL},
},
+ .pg_level = kernel_pg_levels,
.level = -1,
.check_wx = true,
.ptdump = {
@@ -446,12 +414,12 @@ static int __init ptdump_init(void)
kernel_ptd_info.base_addr = KERN_VIRT_START;
- pg_level[1].name = pgtable_l5_enabled ? "P4D" : "PGD";
- pg_level[2].name = pgtable_l4_enabled ? "PUD" : "PGD";
+ kernel_pg_levels[1].name = pgtable_l5_enabled ? "P4D" : "PGD";
+ kernel_pg_levels[2].name = pgtable_l4_enabled ? "PUD" : "PGD";
- for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+ for (i = 0; i < ARRAY_SIZE(kernel_pg_levels); i++)
for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
- pg_level[i].mask |= pte_bits[j].mask;
+ kernel_pg_levels[i].mask |= pte_bits[j].mask;
debugfs_create_file("kernel_page_tables", 0400, NULL, &kernel_ptd_info,
&ptdump_fops);
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/2] KVM: riscv: Register ptdump with debugfs on guest creation
2026-07-01 8:50 [PATCH 0/2] riscv: ptdump: Refactor for KVM gstage ptdump support Dylan.Wu
2026-07-01 8:50 ` [PATCH 1/2] riscv: ptdump: Move pagetable definitions to common header Dylan.Wu
@ 2026-07-01 8:50 ` Dylan.Wu
2026-07-01 9:06 ` sashiko-bot
1 sibling, 1 reply; 5+ messages in thread
From: Dylan.Wu @ 2026-07-01 8:50 UTC (permalink / raw)
To: palmer, pjw, aou, anup
Cc: alex, atish.patra, zhouquan, linux-riscv, kvm, kvm-riscv,
linux-kernel, Dylan.Wu
Introduce KVM ptdump to show the guest gstage pagetables. This registers
a 'gstage_page_tables' file under the guest debugfs directory.
Userspace can now inspect the gstage layout and permissions, which
is useful for architectural debugging and memory management audits.
Assisted-by: YuanSheng: deepseek-v4-pro
Co-developed-by: Quan Zhou <zhouquan@iscas.ac.cn>
Signed-off-by: Quan Zhou <zhouquan@iscas.ac.cn>
Signed-off-by: Dylan.Wu <fredwudi0305@gmail.com>
---
arch/riscv/include/asm/kvm_host.h | 6 +
arch/riscv/kvm/Kconfig | 15 +++
arch/riscv/kvm/Makefile | 1 +
arch/riscv/kvm/ptdump.c | 178 ++++++++++++++++++++++++++++++
arch/riscv/kvm/vm.c | 5 +
5 files changed, 205 insertions(+)
create mode 100644 arch/riscv/kvm/ptdump.c
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 60017ceec..04129c5f8 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -322,4 +322,10 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu);
/* Flags representing implementation specific details */
DECLARE_STATIC_KEY_FALSE(kvm_riscv_vsstage_tlb_no_gpa);
+#ifdef CONFIG_PTDUMP_GSTAGE_DEBUGFS
+void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
+#else
+static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {}
+#endif
+
#endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index ec2cee0a3..0ceb4a452 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -38,3 +38,18 @@ config KVM
If unsure, say N.
endif # VIRTUALIZATION
+
+config PTDUMP_GSTAGE_DEBUGFS
+ bool "Present the gstage pagetables to debugfs"
+ depends on KVM
+ depends on DEBUG_KERNEL
+ depends on DEBUG_FS
+ depends on PTDUMP_DEBUGFS
+ default n
+ help
+ Say Y here if you want to show the RISC-V KVM gstage guest page tables
+ layout in a debugfs file. This information is primarily useful for
+ architecture-specific kernel developers and KVM maintainers to
+ investigate memory mapping and permission issues. It is probably
+ not a good idea to enable this feature in a production kernel.
+ If in doubt, say N.
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 296c2ba05..0170c8c3b 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -42,3 +42,4 @@ kvm-y += vcpu_timer.o
kvm-y += vcpu_vector.o
kvm-y += vm.o
kvm-y += vmid.o
+kvm-$(CONFIG_PTDUMP_GSTAGE_DEBUGFS) += ptdump.o
diff --git a/arch/riscv/kvm/ptdump.c b/arch/riscv/kvm/ptdump.c
new file mode 100644
index 000000000..972d45d69
--- /dev/null
+++ b/arch/riscv/kvm/ptdump.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Debug helper used to dump the gstage pagetables of the system.
+ */
+#include <linux/debugfs.h>
+#include <linux/kvm_host.h>
+#include <linux/seq_file.h>
+#include <linux/mm.h>
+#include <asm/ptdump.h>
+#include <asm/pgtable.h>
+
+static const struct ptdump_prot_bits gstage_pte_bits[] = {
+ {
+ .mask = _PAGE_SOFT,
+ .set = "RSW(%d)",
+ .clear = " .. ",
+ }, {
+ .mask = _PAGE_DIRTY,
+ .set = "D",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_ACCESSED,
+ .set = "A",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_USER,
+ .set = "U",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_EXEC,
+ .set = "X",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_WRITE,
+ .set = "W",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_READ,
+ .set = "R",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_PRESENT,
+ .set = "V",
+ .clear = ".",
+ }
+};
+
+static struct ptdump_pg_level gstage_pg_levels[] = {
+ { .name = "PGD" },
+ { .name = "P4D" },
+ { .name = "PUD" },
+ { .name = "PMD" },
+ { .name = "PTE" },
+};
+
+struct kvm_ptdump_state {
+ struct kvm *kvm;
+ struct ptdump_pg_state parser_state;
+ struct addr_marker marker[2];
+ struct ptdump_range range[2];
+};
+
+static void kvm_ptdump_walk_level(struct ptdump_state *pt_st,
+ unsigned long *tbl, int level,
+ unsigned long start_addr)
+{
+ unsigned long addr = start_addr;
+ unsigned long next, virt_addr;
+ int i;
+ unsigned long step = 1UL << (PAGE_SHIFT + (4 - level) * 9);
+
+ for (i = 0; i < PTRS_PER_PTE; i++, addr += step) {
+ unsigned long val = tbl[i];
+
+ next = addr + step;
+
+ if (level == 4 || (val & _PAGE_LEAF) || !(val & _PAGE_PRESENT)) {
+ note_page(pt_st, addr, level, val);
+ } else {
+ unsigned long pa = (val >> _PAGE_PFN_SHIFT) << PAGE_SHIFT;
+
+ virt_addr = (unsigned long)phys_to_virt(pa);
+
+ kvm_ptdump_walk_level(pt_st, (unsigned long *)virt_addr,
+ level + 1, addr);
+ }
+ }
+}
+
+static int kvm_ptdump_visitor(struct seq_file *m, void *v)
+{
+ struct kvm_ptdump_state *st = m->private;
+ struct kvm *kvm = st->kvm;
+ unsigned long *pgd = (unsigned long *)kvm->arch.pgd;
+ int start_level = 5 - kvm->arch.pgd_levels;
+ int i, j;
+
+ st->parser_state.level = -1;
+ st->parser_state.start_address = 0;
+ st->parser_state.seq = m;
+
+ for (i = 0; i < ARRAY_SIZE(gstage_pg_levels); i++) {
+ gstage_pg_levels[i].bits = gstage_pte_bits;
+ gstage_pg_levels[i].num = ARRAY_SIZE(gstage_pte_bits);
+ gstage_pg_levels[i].mask = 0;
+ for (j = 0; j < ARRAY_SIZE(gstage_pte_bits); j++)
+ gstage_pg_levels[i].mask |= gstage_pte_bits[j].mask;
+ }
+
+ read_lock(&kvm->mmu_lock);
+ if (pgd) {
+ kvm_ptdump_walk_level(&st->parser_state.ptdump, pgd,
+ start_level, 0);
+ }
+ read_unlock(&kvm->mmu_lock);
+
+ note_page(&st->parser_state.ptdump, 0, -1, 0);
+ return 0;
+}
+
+static int kvm_ptdump_open(struct inode *inode, struct file *file)
+{
+ struct kvm *kvm = inode->i_private;
+ struct kvm_ptdump_state *st;
+ int ret;
+
+ if (!kvm_get_kvm_safe(kvm))
+ return -ENOENT;
+
+ st = kzalloc(sizeof(*st), GFP_KERNEL);
+ if (!st) {
+ kvm_put_kvm(kvm);
+ return -ENOMEM;
+ }
+
+ st->kvm = kvm;
+ st->marker[0].name = "Guest IPA";
+ st->marker[0].start_address = 0;
+ st->marker[1].start_address = -1UL;
+ st->range[0].start = 0;
+ st->range[0].end = -1UL;
+
+ st->parser_state.marker = st->marker;
+ st->parser_state.pg_level = gstage_pg_levels;
+ st->parser_state.ptdump.range = st->range;
+
+ ret = single_open(file, kvm_ptdump_visitor, st);
+ if (ret) {
+ kfree(st);
+ kvm_put_kvm(kvm);
+ }
+ return ret;
+}
+
+static int kvm_ptdump_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct kvm_ptdump_state *st = seq->private;
+ struct kvm *kvm = st->kvm;
+
+ kfree(st);
+ kvm_put_kvm(kvm);
+ return single_release(inode, file);
+}
+
+static const struct file_operations kvm_gstage_fops = {
+ .owner = THIS_MODULE,
+ .open = kvm_ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = kvm_ptdump_release,
+};
+
+void kvm_s2_ptdump_create_debugfs(struct kvm *kvm)
+{
+ debugfs_create_file("gstage_page_tables", 0400, kvm->debugfs_dentry, kvm,
+ &kvm_gstage_fops);
+}
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index a9f083fee..464ad2eaf 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -269,3 +269,8 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
return -EINVAL;
}
+
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+ kvm_s2_ptdump_create_debugfs(kvm);
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread