* [PATCH v8 1/6] KVM: arm64: Move pagetable definitions to common header
2024-08-16 12:39 [PATCH v8 0/6] arm64: ptdump: View the second stage page-tables Sebastian Ene
@ 2024-08-16 12:39 ` Sebastian Ene
2024-08-16 12:39 ` [PATCH v8 2/6] arm64: ptdump: Expose the attribute parsing functionality Sebastian Ene
` (4 subsequent siblings)
5 siblings, 0 replies; 19+ messages in thread
From: Sebastian Ene @ 2024-08-16 12:39 UTC (permalink / raw)
To: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, maz, oliver.upton, rananta,
ryan.roberts, sebastianene, shahuang, suzuki.poulose, will,
yuzenghui
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team
In preparation for using the stage-2 definitions in ptdump, move some of
these macros in the common header.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/kvm_pgtable.h | 42 ++++++++++++++++++++++++++++
arch/arm64/kvm/hyp/pgtable.c | 42 ----------------------------
2 files changed, 42 insertions(+), 42 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 19278dfe7978..03f4c3d7839c 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -59,6 +59,48 @@ typedef u64 kvm_pte_t;
#define KVM_PHYS_INVALID (-1ULL)
+#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
+
+#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
+
+#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
+
+#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
+ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
+ KVM_PTE_LEAF_ATTR_HI_S2_XN)
+
+#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
+#define KVM_MAX_OWNER_ID 1
+
+/*
+ * Used to indicate a pte for which a 'break-before-make' sequence is in
+ * progress.
+ */
+#define KVM_INVALID_PTE_LOCKED BIT(10)
+
static inline bool kvm_pte_valid(kvm_pte_t pte)
{
return pte & KVM_PTE_VALID;
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 9e2bbee77491..c3e9d77bba23 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -17,48 +17,6 @@
#define KVM_PTE_TYPE_PAGE 1
#define KVM_PTE_TYPE_TABLE 1
-#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
-
-#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
- ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
- ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
-#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
-#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
-#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
-
-#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
-#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
-#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
-#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
-#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
-#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
-
-#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
-
-#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
-
-#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
-
-#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
-
-#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
-
-#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
- KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
- KVM_PTE_LEAF_ATTR_HI_S2_XN)
-
-#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
-#define KVM_MAX_OWNER_ID 1
-
-/*
- * Used to indicate a pte for which a 'break-before-make' sequence is in
- * progress.
- */
-#define KVM_INVALID_PTE_LOCKED BIT(10)
-
struct kvm_pgtable_walk_data {
struct kvm_pgtable_walker *walker;
--
2.46.0.184.g6999bdac58-goog
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v8 2/6] arm64: ptdump: Expose the attribute parsing functionality
2024-08-16 12:39 [PATCH v8 0/6] arm64: ptdump: View the second stage page-tables Sebastian Ene
2024-08-16 12:39 ` [PATCH v8 1/6] KVM: arm64: Move pagetable definitions to common header Sebastian Ene
@ 2024-08-16 12:39 ` Sebastian Ene
2024-08-16 12:39 ` [PATCH v8 3/6] arm64: ptdump: Use the mask from the state structure Sebastian Ene
` (3 subsequent siblings)
5 siblings, 0 replies; 19+ messages in thread
From: Sebastian Ene @ 2024-08-16 12:39 UTC (permalink / raw)
To: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, maz, oliver.upton, rananta,
ryan.roberts, sebastianene, shahuang, suzuki.poulose, will,
yuzenghui
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team
Reuse the descriptor parsing functionality to keep the same output format
as the original ptdump code. In order for this to happen, move the state
tracking objects into a common header.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/ptdump.h | 41 +++++++++++++++++++++++-
arch/arm64/mm/ptdump.c | 55 +++++++--------------------------
2 files changed, 51 insertions(+), 45 deletions(-)
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 5b1701c76d1c..bd5d3ee3e8dc 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -9,6 +9,7 @@
#include <linux/mm_types.h>
#include <linux/seq_file.h>
+#include <linux/ptdump.h>
struct addr_marker {
unsigned long start_address;
@@ -21,14 +22,52 @@ struct ptdump_info {
unsigned long base_addr;
};
+struct ptdump_prot_bits {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+};
+
+struct ptdump_pg_level {
+ const struct ptdump_prot_bits *bits;
+ char name[4];
+ int num;
+ u64 mask;
+};
+
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
+struct ptdump_pg_state {
+ struct ptdump_state ptdump;
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ const struct mm_struct *mm;
+ unsigned long start_address;
+ int level;
+ u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+ unsigned long uxn_pages;
+};
+
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ u64 val);
#ifdef CONFIG_PTDUMP_DEBUGFS
#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
-#endif
+#endif /* CONFIG_PTDUMP_DEBUGFS */
+#else
+static inline void note_page(void *pt_st, unsigned long addr,
+ int level, u64 val) { }
#endif /* CONFIG_PTDUMP_CORE */
#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 6986827e0d64..404751fd30fe 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -38,33 +38,7 @@
seq_printf(m, fmt); \
})
-/*
- * The page dumper groups page table entries of the same type into a single
- * description. It uses pg_state to track the range information while
- * iterating over the pte entries. When the continuity is broken it then
- * dumps out a description of the range.
- */
-struct pg_state {
- struct ptdump_state ptdump;
- struct seq_file *seq;
- const struct addr_marker *marker;
- const struct mm_struct *mm;
- unsigned long start_address;
- int level;
- u64 current_prot;
- bool check_wx;
- unsigned long wx_pages;
- unsigned long uxn_pages;
-};
-
-struct prot_bits {
- u64 mask;
- u64 val;
- const char *set;
- const char *clear;
-};
-
-static const struct prot_bits pte_bits[] = {
+static const struct ptdump_prot_bits pte_bits[] = {
{
.mask = PTE_VALID,
.val = PTE_VALID,
@@ -143,14 +117,7 @@ static const struct prot_bits pte_bits[] = {
}
};
-struct pg_level {
- const struct prot_bits *bits;
- char name[4];
- int num;
- u64 mask;
-};
-
-static struct pg_level pg_level[] __ro_after_init = {
+static struct ptdump_pg_level pg_level[] __ro_after_init = {
{ /* pgd */
.name = "PGD",
.bits = pte_bits,
@@ -174,7 +141,7 @@ static struct pg_level pg_level[] __ro_after_init = {
},
};
-static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
+static void dump_prot(struct ptdump_pg_state *st, const struct ptdump_prot_bits *bits,
size_t num)
{
unsigned i;
@@ -192,7 +159,7 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
}
}
-static void note_prot_uxn(struct pg_state *st, unsigned long addr)
+static void note_prot_uxn(struct ptdump_pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
@@ -206,7 +173,7 @@ static void note_prot_uxn(struct pg_state *st, unsigned long addr)
st->uxn_pages += (addr - st->start_address) / PAGE_SIZE;
}
-static void note_prot_wx(struct pg_state *st, unsigned long addr)
+static void note_prot_wx(struct ptdump_pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
@@ -221,10 +188,10 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
-static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- u64 val)
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ u64 val)
{
- struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
static const char units[] = "KMGTPE";
u64 prot = 0;
@@ -286,12 +253,12 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
{
unsigned long end = ~0UL;
- struct pg_state st;
+ struct ptdump_pg_state st;
if (info->base_addr < TASK_SIZE_64)
end = TASK_SIZE_64;
- st = (struct pg_state){
+ st = (struct ptdump_pg_state){
.seq = s,
.marker = info->markers,
.mm = info->mm,
@@ -324,7 +291,7 @@ static struct ptdump_info kernel_ptdump_info __ro_after_init = {
bool ptdump_check_wx(void)
{
- struct pg_state st = {
+ struct ptdump_pg_state st = {
.seq = NULL,
.marker = (struct addr_marker[]) {
{ 0, NULL},
--
2.46.0.184.g6999bdac58-goog
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v8 3/6] arm64: ptdump: Use the mask from the state structure
2024-08-16 12:39 [PATCH v8 0/6] arm64: ptdump: View the second stage page-tables Sebastian Ene
2024-08-16 12:39 ` [PATCH v8 1/6] KVM: arm64: Move pagetable definitions to common header Sebastian Ene
2024-08-16 12:39 ` [PATCH v8 2/6] arm64: ptdump: Expose the attribute parsing functionality Sebastian Ene
@ 2024-08-16 12:39 ` Sebastian Ene
2024-08-20 13:49 ` Marc Zyngier
2024-08-16 12:39 ` [PATCH v8 4/6] KVM: arm64: Register ptdump with debugfs on guest creation Sebastian Ene
` (2 subsequent siblings)
5 siblings, 1 reply; 19+ messages in thread
From: Sebastian Ene @ 2024-08-16 12:39 UTC (permalink / raw)
To: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, maz, oliver.upton, rananta,
ryan.roberts, sebastianene, shahuang, suzuki.poulose, will,
yuzenghui
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team
Printing the descriptor attributes requires accessing a mask which has a
different set of attributes for stage-2. In preparation for adding support
for the stage-2 pagetables dumping, use the mask from the local context
and not from the globally defined pg_level array. Store a pointer to
the pg_level array in the ptdump state structure. This will allow us to
extract the mask which is wrapped in the pg_level array and use it for
descriptor parsing in the note_page.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/ptdump.h | 1 +
arch/arm64/mm/ptdump.c | 13 ++++++++-----
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index bd5d3ee3e8dc..71a7ed01153a 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -44,6 +44,7 @@ struct ptdump_pg_level {
*/
struct ptdump_pg_state {
struct ptdump_state ptdump;
+ struct ptdump_pg_level *pg_level;
struct seq_file *seq;
const struct addr_marker *marker;
const struct mm_struct *mm;
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 404751fd30fe..ca53ef274a8b 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -117,7 +117,7 @@ static const struct ptdump_prot_bits pte_bits[] = {
}
};
-static struct ptdump_pg_level pg_level[] __ro_after_init = {
+static struct ptdump_pg_level kernel_pg_levels[] __ro_after_init = {
{ /* pgd */
.name = "PGD",
.bits = pte_bits,
@@ -192,6 +192,7 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
u64 val)
{
struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
+ struct ptdump_pg_level *pg_level = st->pg_level;
static const char units[] = "KMGTPE";
u64 prot = 0;
@@ -262,6 +263,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
.seq = s,
.marker = info->markers,
.mm = info->mm,
+ .pg_level = &kernel_pg_levels[0],
.level = -1,
.ptdump = {
.note_page = note_page,
@@ -279,10 +281,10 @@ static void __init ptdump_initialize(void)
{
unsigned i, j;
- for (i = 0; i < ARRAY_SIZE(pg_level); i++)
- if (pg_level[i].bits)
- for (j = 0; j < pg_level[i].num; j++)
- pg_level[i].mask |= pg_level[i].bits[j].mask;
+ for (i = 0; i < ARRAY_SIZE(kernel_pg_levels); i++)
+ if (kernel_pg_levels[i].bits)
+ for (j = 0; j < kernel_pg_levels[i].num; j++)
+ kernel_pg_levels[i].mask |= kernel_pg_levels[i].bits[j].mask;
}
static struct ptdump_info kernel_ptdump_info __ro_after_init = {
@@ -297,6 +299,7 @@ bool ptdump_check_wx(void)
{ 0, NULL},
{ -1, NULL},
},
+ .pg_level = &kernel_pg_levels[0],
.level = -1,
.check_wx = true,
.ptdump = {
--
2.46.0.184.g6999bdac58-goog
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH v8 3/6] arm64: ptdump: Use the mask from the state structure
2024-08-16 12:39 ` [PATCH v8 3/6] arm64: ptdump: Use the mask from the state structure Sebastian Ene
@ 2024-08-20 13:49 ` Marc Zyngier
2024-08-20 14:13 ` Sebastian Ene
0 siblings, 1 reply; 19+ messages in thread
From: Marc Zyngier @ 2024-08-20 13:49 UTC (permalink / raw)
To: Sebastian Ene
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Fri, 16 Aug 2024 13:39:03 +0100,
Sebastian Ene <sebastianene@google.com> wrote:
>
> Printing the descriptor attributes requires accessing a mask which has a
> different set of attributes for stage-2. In preparation for adding support
> for the stage-2 pagetables dumping, use the mask from the local context
> and not from the globally defined pg_level array. Store a pointer to
> the pg_level array in the ptdump state structure. This will allow us to
> extract the mask which is wrapped in the pg_level array and use it for
> descriptor parsing in the note_page.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
> arch/arm64/include/asm/ptdump.h | 1 +
> arch/arm64/mm/ptdump.c | 13 ++++++++-----
> 2 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
> index bd5d3ee3e8dc..71a7ed01153a 100644
> --- a/arch/arm64/include/asm/ptdump.h
> +++ b/arch/arm64/include/asm/ptdump.h
> @@ -44,6 +44,7 @@ struct ptdump_pg_level {
> */
> struct ptdump_pg_state {
> struct ptdump_state ptdump;
> + struct ptdump_pg_level *pg_level;
> struct seq_file *seq;
> const struct addr_marker *marker;
> const struct mm_struct *mm;
> diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
> index 404751fd30fe..ca53ef274a8b 100644
> --- a/arch/arm64/mm/ptdump.c
> +++ b/arch/arm64/mm/ptdump.c
> @@ -117,7 +117,7 @@ static const struct ptdump_prot_bits pte_bits[] = {
> }
> };
>
> -static struct ptdump_pg_level pg_level[] __ro_after_init = {
> +static struct ptdump_pg_level kernel_pg_levels[] __ro_after_init = {
Do you actually need this sort of renaming? Given that it is static,
this looks like some slightly abusive repainting which isn't warranted
here.
I also didn't understand the commit message: you're not tracking any
mask here, but a page table level. You are also not using it for
anything yet, see below.
> { /* pgd */
> .name = "PGD",
> .bits = pte_bits,
> @@ -192,6 +192,7 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
> u64 val)
> {
> struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
> + struct ptdump_pg_level *pg_level = st->pg_level;
This is what I mean. What is this pg_level used for?
> static const char units[] = "KMGTPE";
> u64 prot = 0;
>
> @@ -262,6 +263,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
> .seq = s,
> .marker = info->markers,
> .mm = info->mm,
> + .pg_level = &kernel_pg_levels[0],
> .level = -1,
> .ptdump = {
> .note_page = note_page,
> @@ -279,10 +281,10 @@ static void __init ptdump_initialize(void)
> {
> unsigned i, j;
>
> - for (i = 0; i < ARRAY_SIZE(pg_level); i++)
> - if (pg_level[i].bits)
> - for (j = 0; j < pg_level[i].num; j++)
> - pg_level[i].mask |= pg_level[i].bits[j].mask;
> + for (i = 0; i < ARRAY_SIZE(kernel_pg_levels); i++)
> + if (kernel_pg_levels[i].bits)
> + for (j = 0; j < kernel_pg_levels[i].num; j++)
> + kernel_pg_levels[i].mask |= kernel_pg_levels[i].bits[j].mask;
> }
>
> static struct ptdump_info kernel_ptdump_info __ro_after_init = {
> @@ -297,6 +299,7 @@ bool ptdump_check_wx(void)
> { 0, NULL},
> { -1, NULL},
> },
> + .pg_level = &kernel_pg_levels[0],
> .level = -1,
> .check_wx = true,
> .ptdump = {
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH v8 3/6] arm64: ptdump: Use the mask from the state structure
2024-08-20 13:49 ` Marc Zyngier
@ 2024-08-20 14:13 ` Sebastian Ene
2024-08-20 14:25 ` Marc Zyngier
0 siblings, 1 reply; 19+ messages in thread
From: Sebastian Ene @ 2024-08-20 14:13 UTC (permalink / raw)
To: Marc Zyngier
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Tue, Aug 20, 2024 at 02:49:04PM +0100, Marc Zyngier wrote:
> On Fri, 16 Aug 2024 13:39:03 +0100,
> Sebastian Ene <sebastianene@google.com> wrote:
> >
> > Printing the descriptor attributes requires accessing a mask which has a
> > different set of attributes for stage-2. In preparation for adding support
> > for the stage-2 pagetables dumping, use the mask from the local context
> > and not from the globally defined pg_level array. Store a pointer to
> > the pg_level array in the ptdump state structure. This will allow us to
> > extract the mask which is wrapped in the pg_level array and use it for
> > descriptor parsing in the note_page.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> > arch/arm64/include/asm/ptdump.h | 1 +
> > arch/arm64/mm/ptdump.c | 13 ++++++++-----
> > 2 files changed, 9 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
> > index bd5d3ee3e8dc..71a7ed01153a 100644
> > --- a/arch/arm64/include/asm/ptdump.h
> > +++ b/arch/arm64/include/asm/ptdump.h
> > @@ -44,6 +44,7 @@ struct ptdump_pg_level {
> > */
> > struct ptdump_pg_state {
> > struct ptdump_state ptdump;
> > + struct ptdump_pg_level *pg_level;
> > struct seq_file *seq;
> > const struct addr_marker *marker;
> > const struct mm_struct *mm;
> > diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
> > index 404751fd30fe..ca53ef274a8b 100644
> > --- a/arch/arm64/mm/ptdump.c
> > +++ b/arch/arm64/mm/ptdump.c
> > @@ -117,7 +117,7 @@ static const struct ptdump_prot_bits pte_bits[] = {
> > }
> > };
> >
> > -static struct ptdump_pg_level pg_level[] __ro_after_init = {
> > +static struct ptdump_pg_level kernel_pg_levels[] __ro_after_init = {
Hi Marc,
> Do you actually need this sort of renaming? Given that it is static,
> this looks like some slightly abusive repainting which isn't warranted
> here.
I applied Will's suggestion from
https://lore.kernel.org/all/20240705111229.GB9231@willie-the-truck/
>
>
> I also didn't understand the commit message: you're not tracking any
> mask here, but a page table level. You are also not using it for
> anything yet, see below.
and I missed updating the commit message.
>
>
> > { /* pgd */
> > .name = "PGD",
> > .bits = pte_bits,
> > @@ -192,6 +192,7 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
> > u64 val)
> > {
> > struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
> > + struct ptdump_pg_level *pg_level = st->pg_level;
>
> This is what I mean. What is this pg_level used for?
I make use of it to extract the name based on the level. The suggestion
that Will made allowed me to keep the code with less changes.
Thanks,
Seb
>
> > static const char units[] = "KMGTPE";
> > u64 prot = 0;
> >
> > @@ -262,6 +263,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
> > .seq = s,
> > .marker = info->markers,
> > .mm = info->mm,
> > + .pg_level = &kernel_pg_levels[0],
> > .level = -1,
> > .ptdump = {
> > .note_page = note_page,
> > @@ -279,10 +281,10 @@ static void __init ptdump_initialize(void)
> > {
> > unsigned i, j;
> >
> > - for (i = 0; i < ARRAY_SIZE(pg_level); i++)
> > - if (pg_level[i].bits)
> > - for (j = 0; j < pg_level[i].num; j++)
> > - pg_level[i].mask |= pg_level[i].bits[j].mask;
> > + for (i = 0; i < ARRAY_SIZE(kernel_pg_levels); i++)
> > + if (kernel_pg_levels[i].bits)
> > + for (j = 0; j < kernel_pg_levels[i].num; j++)
> > + kernel_pg_levels[i].mask |= kernel_pg_levels[i].bits[j].mask;
> > }
> >
> > static struct ptdump_info kernel_ptdump_info __ro_after_init = {
> > @@ -297,6 +299,7 @@ bool ptdump_check_wx(void)
> > { 0, NULL},
> > { -1, NULL},
> > },
> > + .pg_level = &kernel_pg_levels[0],
> > .level = -1,
> > .check_wx = true,
> > .ptdump = {
>
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH v8 3/6] arm64: ptdump: Use the mask from the state structure
2024-08-20 14:13 ` Sebastian Ene
@ 2024-08-20 14:25 ` Marc Zyngier
2024-08-20 14:39 ` Sebastian Ene
0 siblings, 1 reply; 19+ messages in thread
From: Marc Zyngier @ 2024-08-20 14:25 UTC (permalink / raw)
To: Sebastian Ene
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Tue, 20 Aug 2024 15:13:27 +0100,
Sebastian Ene <sebastianene@google.com> wrote:
>
> On Tue, Aug 20, 2024 at 02:49:04PM +0100, Marc Zyngier wrote:
> > On Fri, 16 Aug 2024 13:39:03 +0100,
> > Sebastian Ene <sebastianene@google.com> wrote:
> > >
> > > Printing the descriptor attributes requires accessing a mask which has a
> > > different set of attributes for stage-2. In preparation for adding support
> > > for the stage-2 pagetables dumping, use the mask from the local context
> > > and not from the globally defined pg_level array. Store a pointer to
> > > the pg_level array in the ptdump state structure. This will allow us to
> > > extract the mask which is wrapped in the pg_level array and use it for
> > > descriptor parsing in the note_page.
> > >
> > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > ---
> > > arch/arm64/include/asm/ptdump.h | 1 +
> > > arch/arm64/mm/ptdump.c | 13 ++++++++-----
> > > 2 files changed, 9 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
> > > index bd5d3ee3e8dc..71a7ed01153a 100644
> > > --- a/arch/arm64/include/asm/ptdump.h
> > > +++ b/arch/arm64/include/asm/ptdump.h
> > > @@ -44,6 +44,7 @@ struct ptdump_pg_level {
> > > */
> > > struct ptdump_pg_state {
> > > struct ptdump_state ptdump;
> > > + struct ptdump_pg_level *pg_level;
> > > struct seq_file *seq;
> > > const struct addr_marker *marker;
> > > const struct mm_struct *mm;
> > > diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
> > > index 404751fd30fe..ca53ef274a8b 100644
> > > --- a/arch/arm64/mm/ptdump.c
> > > +++ b/arch/arm64/mm/ptdump.c
> > > @@ -117,7 +117,7 @@ static const struct ptdump_prot_bits pte_bits[] = {
> > > }
> > > };
> > >
> > > -static struct ptdump_pg_level pg_level[] __ro_after_init = {
> > > +static struct ptdump_pg_level kernel_pg_levels[] __ro_after_init = {
>
> Hi Marc,
>
>
> > Do you actually need this sort of renaming? Given that it is static,
> > this looks like some slightly abusive repainting which isn't warranted
> > here.
>
> I applied Will's suggestion from
> https://lore.kernel.org/all/20240705111229.GB9231@willie-the-truck/
> >
> >
> > I also didn't understand the commit message: you're not tracking any
> > mask here, but a page table level. You are also not using it for
> > anything yet, see below.
>
> and I missed updating the commit message.
>
> >
> >
> > > { /* pgd */
> > > .name = "PGD",
> > > .bits = pte_bits,
> > > @@ -192,6 +192,7 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
> > > u64 val)
> > > {
> > > struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
> > > + struct ptdump_pg_level *pg_level = st->pg_level;
> >
> > This is what I mean. What is this pg_level used for?
>
> I make use of it to extract the name based on the level. The suggestion
> that Will made allowed me to keep the code with less changes.
Err, I see that now. It'd be good if you described what actually
happens, because it is almost impossible to understand it from reading
the patch.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH v8 3/6] arm64: ptdump: Use the mask from the state structure
2024-08-20 14:25 ` Marc Zyngier
@ 2024-08-20 14:39 ` Sebastian Ene
0 siblings, 0 replies; 19+ messages in thread
From: Sebastian Ene @ 2024-08-20 14:39 UTC (permalink / raw)
To: Marc Zyngier
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Tue, Aug 20, 2024 at 03:25:13PM +0100, Marc Zyngier wrote:
> On Tue, 20 Aug 2024 15:13:27 +0100,
> Sebastian Ene <sebastianene@google.com> wrote:
> >
> > On Tue, Aug 20, 2024 at 02:49:04PM +0100, Marc Zyngier wrote:
> > > On Fri, 16 Aug 2024 13:39:03 +0100,
> > > Sebastian Ene <sebastianene@google.com> wrote:
> > > >
> > > > Printing the descriptor attributes requires accessing a mask which has a
> > > > different set of attributes for stage-2. In preparation for adding support
> > > > for the stage-2 pagetables dumping, use the mask from the local context
> > > > and not from the globally defined pg_level array. Store a pointer to
> > > > the pg_level array in the ptdump state structure. This will allow us to
> > > > extract the mask which is wrapped in the pg_level array and use it for
> > > > descriptor parsing in the note_page.
> > > >
> > > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > > ---
> > > > arch/arm64/include/asm/ptdump.h | 1 +
> > > > arch/arm64/mm/ptdump.c | 13 ++++++++-----
> > > > 2 files changed, 9 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
> > > > index bd5d3ee3e8dc..71a7ed01153a 100644
> > > > --- a/arch/arm64/include/asm/ptdump.h
> > > > +++ b/arch/arm64/include/asm/ptdump.h
> > > > @@ -44,6 +44,7 @@ struct ptdump_pg_level {
> > > > */
> > > > struct ptdump_pg_state {
> > > > struct ptdump_state ptdump;
> > > > + struct ptdump_pg_level *pg_level;
> > > > struct seq_file *seq;
> > > > const struct addr_marker *marker;
> > > > const struct mm_struct *mm;
> > > > diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
> > > > index 404751fd30fe..ca53ef274a8b 100644
> > > > --- a/arch/arm64/mm/ptdump.c
> > > > +++ b/arch/arm64/mm/ptdump.c
> > > > @@ -117,7 +117,7 @@ static const struct ptdump_prot_bits pte_bits[] = {
> > > > }
> > > > };
> > > >
> > > > -static struct ptdump_pg_level pg_level[] __ro_after_init = {
> > > > +static struct ptdump_pg_level kernel_pg_levels[] __ro_after_init = {
> >
> > Hi Marc,
> >
> >
> > > Do you actually need this sort of renaming? Given that it is static,
> > > this looks like some slightly abusive repainting which isn't warranted
> > > here.
> >
> > I applied Will's suggestion from
> > https://lore.kernel.org/all/20240705111229.GB9231@willie-the-truck/
> > >
> > >
> > > I also didn't understand the commit message: you're not tracking any
> > > mask here, but a page table level. You are also not using it for
> > > anything yet, see below.
> >
> > and I missed updating the commit message.
> >
> > >
> > >
> > > > { /* pgd */
> > > > .name = "PGD",
> > > > .bits = pte_bits,
> > > > @@ -192,6 +192,7 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
> > > > u64 val)
> > > > {
> > > > struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
> > > > + struct ptdump_pg_level *pg_level = st->pg_level;
> > >
> > > This is what I mean. What is this pg_level used for?
> >
> > I make use of it to extract the name based on the level. The suggestion
> > that Will made allowed me to keep the code with less changes.
>
> Err, I see that now. It'd be good if you described what actually
> happens, because it is almost impossible to understand it from reading
> the patch.
Yes, let me re-write the commit message in the next version,
Thanks,
Seb
>
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v8 4/6] KVM: arm64: Register ptdump with debugfs on guest creation
2024-08-16 12:39 [PATCH v8 0/6] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (2 preceding siblings ...)
2024-08-16 12:39 ` [PATCH v8 3/6] arm64: ptdump: Use the mask from the state structure Sebastian Ene
@ 2024-08-16 12:39 ` Sebastian Ene
2024-08-20 14:06 ` Marc Zyngier
2024-08-16 12:39 ` [PATCH v8 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes Sebastian Ene
2024-08-16 12:39 ` [PATCH v8 6/6] KVM: arm64: Expose guest stage-2 pagetable config to debugfs Sebastian Ene
5 siblings, 1 reply; 19+ messages in thread
From: Sebastian Ene @ 2024-08-16 12:39 UTC (permalink / raw)
To: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, maz, oliver.upton, rananta,
ryan.roberts, sebastianene, shahuang, suzuki.poulose, will,
yuzenghui
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team
While arch/*/mem/ptdump handles the kernel pagetable dumping code,
introduce KVM/ptdump which shows the guest stage-2 pagetables. The
separation is necessary because most of the definitions from the
stage-2 pagetable reside in the KVM path and we will be invoking
functionality **specific** to KVM.
When a guest is created, register a new file entry under the guest
debugfs dir which allows userspace to show the contents of the guest
stage-2 pagetables when accessed.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/kvm/Kconfig | 14 ++++++
arch/arm64/kvm/Makefile | 1 +
arch/arm64/kvm/arm.c | 2 +
arch/arm64/kvm/kvm_ptdump.h | 20 ++++++++
arch/arm64/kvm/ptdump.c | 91 +++++++++++++++++++++++++++++++++++++
5 files changed, 128 insertions(+)
create mode 100644 arch/arm64/kvm/kvm_ptdump.h
create mode 100644 arch/arm64/kvm/ptdump.c
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8304eb342be9..fcc41e58ede6 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -66,4 +66,18 @@ config PROTECTED_NVHE_STACKTRACE
If unsure, or not using protected nVHE (pKVM), say N.
+config PTDUMP_STAGE2_DEBUGFS
+ bool "Present the stage-2 pagetables to debugfs"
+ depends on KVM
+ select PTDUMP_CORE
+ default n
+ help
+ Say Y here if you want to show the stage-2 kernel pagetables
+ layout in a debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+
+ If in doubt, say N.
+
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 86a629aaf0a1..e4233b323a73 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
+kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
always-y := hyp_constants.h hyp-constants.s
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9bef7638342e..60fed2146763 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -45,6 +45,7 @@
#include <kvm/arm_hypercalls.h>
#include <kvm/arm_pmu.h>
#include <kvm/arm_psci.h>
+#include <kvm_ptdump.h>
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
@@ -228,6 +229,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
void kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
kvm_sys_regs_create_debugfs(kvm);
+ kvm_ptdump_guest_register(kvm);
}
static void kvm_destroy_mpidr_data(struct kvm *kvm)
diff --git a/arch/arm64/kvm/kvm_ptdump.h b/arch/arm64/kvm/kvm_ptdump.h
new file mode 100644
index 000000000000..0a62b0e2908c
--- /dev/null
+++ b/arch/arm64/kvm/kvm_ptdump.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) Google, 2024
+ * Author: Sebastian Ene <sebastianene@google.com>
+ */
+
+#ifndef __KVM_PTDUMP_H
+#define __KVM_PTDUMP_H
+
+#include <linux/kvm_host.h>
+#include <asm/ptdump.h>
+
+
+#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+void kvm_ptdump_guest_register(struct kvm *kvm);
+#else
+static inline void kvm_ptdump_guest_register(struct kvm *kvm) {}
+#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
+
+#endif /* __KVM_PTDUMP_H */
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
new file mode 100644
index 000000000000..52483d56be2e
--- /dev/null
+++ b/arch/arm64/kvm/ptdump.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Debug helper used to dump the stage-2 pagetables of the system and their
+ * associated permissions.
+ *
+ * Copyright (C) Google, 2024
+ * Author: Sebastian Ene <sebastianene@google.com>
+ */
+#include <linux/debugfs.h>
+#include <linux/kvm_host.h>
+#include <linux/seq_file.h>
+
+#include <asm/kvm_pgtable.h>
+#include <kvm_ptdump.h>
+
+
+static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit)
+{
+ struct ptdump_pg_state *st = ctx->arg;
+ struct ptdump_state *pt_st = &st->ptdump;
+
+ note_page(pt_st, ctx->addr, ctx->level, ctx->old);
+ return 0;
+}
+
+static int kvm_ptdump_show_common(struct seq_file *m,
+ struct kvm_pgtable *pgtable,
+ struct ptdump_pg_state *parser_state)
+{
+ struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
+ .cb = kvm_ptdump_visitor,
+ .arg = parser_state,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ parser_state->level = -1;
+ parser_state->start_address = 0;
+
+ return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
+}
+
+static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
+{
+ struct kvm *kvm = m->private;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct ptdump_pg_state parser_state = {0};
+ int ret;
+
+ write_lock(&kvm->mmu_lock);
+ ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
+ write_unlock(&kvm->mmu_lock);
+
+ return ret;
+}
+
+static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+ int ret;
+
+ if (!kvm_get_kvm_safe(kvm))
+ return -ENOENT;
+
+ ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
+ if (ret < 0)
+ kvm_put_kvm(kvm);
+
+ return ret;
+}
+
+static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+
+ kvm_put_kvm(kvm);
+ return single_release(m, file);
+}
+
+static const struct file_operations kvm_ptdump_guest_fops = {
+ .open = kvm_ptdump_guest_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = kvm_ptdump_guest_close,
+};
+
+void kvm_ptdump_guest_register(struct kvm *kvm)
+{
+ debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
+ kvm, &kvm_ptdump_guest_fops);
+}
--
2.46.0.184.g6999bdac58-goog
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH v8 4/6] KVM: arm64: Register ptdump with debugfs on guest creation
2024-08-16 12:39 ` [PATCH v8 4/6] KVM: arm64: Register ptdump with debugfs on guest creation Sebastian Ene
@ 2024-08-20 14:06 ` Marc Zyngier
2024-08-23 10:45 ` Sebastian Ene
2024-08-23 10:53 ` Sebastian Ene
0 siblings, 2 replies; 19+ messages in thread
From: Marc Zyngier @ 2024-08-20 14:06 UTC (permalink / raw)
To: Sebastian Ene
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Fri, 16 Aug 2024 13:39:04 +0100,
Sebastian Ene <sebastianene@google.com> wrote:
>
> While arch/*/mem/ptdump handles the kernel pagetable dumping code,
> introduce KVM/ptdump which shows the guest stage-2 pagetables. The
> separation is necessary because most of the definitions from the
> stage-2 pagetable reside in the KVM path and we will be invoking
> functionality **specific** to KVM.
Drop the ** emphasis.
>
> When a guest is created, register a new file entry under the guest
> debugfs dir which allows userspace to show the contents of the guest
> stage-2 pagetables when accessed.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
> arch/arm64/kvm/Kconfig | 14 ++++++
> arch/arm64/kvm/Makefile | 1 +
> arch/arm64/kvm/arm.c | 2 +
> arch/arm64/kvm/kvm_ptdump.h | 20 ++++++++
> arch/arm64/kvm/ptdump.c | 91 +++++++++++++++++++++++++++++++++++++
> 5 files changed, 128 insertions(+)
> create mode 100644 arch/arm64/kvm/kvm_ptdump.h
> create mode 100644 arch/arm64/kvm/ptdump.c
>
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index 8304eb342be9..fcc41e58ede6 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -66,4 +66,18 @@ config PROTECTED_NVHE_STACKTRACE
>
> If unsure, or not using protected nVHE (pKVM), say N.
>
> +config PTDUMP_STAGE2_DEBUGFS
> + bool "Present the stage-2 pagetables to debugfs"
> + depends on KVM
> + select PTDUMP_CORE
This looks wrong. Looking at PTDUMP_DEBUGFS, it has the following
constraints:
depends on DEBUG_KERNEL
depends on DEBUG_FS
depends on GENERIC_PTDUMP
select PTDUMP_CORE
I don't see why the Stage-2 version should have anything different.
> + default n
> + help
> + Say Y here if you want to show the stage-2 kernel pagetables
> + layout in a debugfs file. This information is only useful for kernel developers
> + who are working in architecture specific areas of the kernel.
> + It is probably not a good idea to enable this feature in a production
> + kernel.
nit: try to keep the formatting within 80 columns.
More importantly, I find it very strange to expose the configuration
option so early in the series, while the support code isn't there yet.
You can perfectly introduce code that is conditional on a config
option and only add it at the end.
> +
> + If in doubt, say N.
> +
> endif # VIRTUALIZATION
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index 86a629aaf0a1..e4233b323a73 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
>
> kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
> kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
> +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
>
> always-y := hyp_constants.h hyp-constants.s
>
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 9bef7638342e..60fed2146763 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -45,6 +45,7 @@
> #include <kvm/arm_hypercalls.h>
> #include <kvm/arm_pmu.h>
> #include <kvm/arm_psci.h>
> +#include <kvm_ptdump.h>
>
> static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
>
> @@ -228,6 +229,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
> void kvm_arch_create_vm_debugfs(struct kvm *kvm)
> {
> kvm_sys_regs_create_debugfs(kvm);
> + kvm_ptdump_guest_register(kvm);
Consider using a name that is homogeneous with what we already have
(kvm_s2_ptdump_create_debugfs?).
> }
>
> static void kvm_destroy_mpidr_data(struct kvm *kvm)
> diff --git a/arch/arm64/kvm/kvm_ptdump.h b/arch/arm64/kvm/kvm_ptdump.h
> new file mode 100644
> index 000000000000..0a62b0e2908c
> --- /dev/null
> +++ b/arch/arm64/kvm/kvm_ptdump.h
> @@ -0,0 +1,20 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) Google, 2024
> + * Author: Sebastian Ene <sebastianene@google.com>
> + */
> +
> +#ifndef __KVM_PTDUMP_H
> +#define __KVM_PTDUMP_H
> +
> +#include <linux/kvm_host.h>
> +#include <asm/ptdump.h>
> +
> +
> +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
> +void kvm_ptdump_guest_register(struct kvm *kvm);
> +#else
> +static inline void kvm_ptdump_guest_register(struct kvm *kvm) {}
> +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
> +
> +#endif /* __KVM_PTDUMP_H */
Please don't add new include files that contain so little stuff. These
things may as well be added either to asm/kvm_host.h or asm/ptdump.h.
> diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> new file mode 100644
> index 000000000000..52483d56be2e
> --- /dev/null
> +++ b/arch/arm64/kvm/ptdump.c
> @@ -0,0 +1,91 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Debug helper used to dump the stage-2 pagetables of the system and their
> + * associated permissions.
> + *
> + * Copyright (C) Google, 2024
> + * Author: Sebastian Ene <sebastianene@google.com>
> + */
> +#include <linux/debugfs.h>
> +#include <linux/kvm_host.h>
> +#include <linux/seq_file.h>
> +
> +#include <asm/kvm_pgtable.h>
> +#include <kvm_ptdump.h>
> +
> +
> +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> + enum kvm_pgtable_walk_flags visit)
> +{
> + struct ptdump_pg_state *st = ctx->arg;
> + struct ptdump_state *pt_st = &st->ptdump;
> +
> + note_page(pt_st, ctx->addr, ctx->level, ctx->old);
> + return 0;
> +}
> +
> +static int kvm_ptdump_show_common(struct seq_file *m,
What does "common" mean here? You have exactly *one* caller, so why
isn't that inlined in kvm_ptdump_guest_show()?
> + struct kvm_pgtable *pgtable,
> + struct ptdump_pg_state *parser_state)
> +{
> + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
> + .cb = kvm_ptdump_visitor,
> + .arg = parser_state,
> + .flags = KVM_PGTABLE_WALK_LEAF,
> + };
> +
> + parser_state->level = -1;
> + parser_state->start_address = 0;
> +
> + return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> +}
> +
> +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
> +{
> + struct kvm *kvm = m->private;
> + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> + struct ptdump_pg_state parser_state = {0};
nit: the common idiom is "parser_state = {}".
> + int ret;
> +
> + write_lock(&kvm->mmu_lock);
> + ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
> + write_unlock(&kvm->mmu_lock);
> +
> + return ret;
> +}
> +
> +static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
> +{
> + struct kvm *kvm = m->i_private;
> + int ret;
> +
> + if (!kvm_get_kvm_safe(kvm))
> + return -ENOENT;
> +
> + ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
> + if (ret < 0)
> + kvm_put_kvm(kvm);
> +
> + return ret;
> +}
> +
> +static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
> +{
> + struct kvm *kvm = m->i_private;
> +
> + kvm_put_kvm(kvm);
> + return single_release(m, file);
> +}
> +
> +static const struct file_operations kvm_ptdump_guest_fops = {
> + .open = kvm_ptdump_guest_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = kvm_ptdump_guest_close,
> +};
> +
> +void kvm_ptdump_guest_register(struct kvm *kvm)
> +{
> + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
> + kvm, &kvm_ptdump_guest_fops);
> +}
> --
> 2.46.0.184.g6999bdac58-goog
Overall, I have a hard time understanding what this does. It walks the
page tables, but doesn't do anything useful.
I have the feeling that this patch would be better squashed together
with patch #5, since it significantly reworks what patch #4 does.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH v8 4/6] KVM: arm64: Register ptdump with debugfs on guest creation
2024-08-20 14:06 ` Marc Zyngier
@ 2024-08-23 10:45 ` Sebastian Ene
2024-08-23 10:53 ` Sebastian Ene
1 sibling, 0 replies; 19+ messages in thread
From: Sebastian Ene @ 2024-08-23 10:45 UTC (permalink / raw)
To: Marc Zyngier
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Tue, Aug 20, 2024 at 03:06:45PM +0100, Marc Zyngier wrote:
Hello Marc,
> On Fri, 16 Aug 2024 13:39:04 +0100,
> Sebastian Ene <sebastianene@google.com> wrote:
> >
> > While arch/*/mem/ptdump handles the kernel pagetable dumping code,
> > introduce KVM/ptdump which shows the guest stage-2 pagetables. The
> > separation is necessary because most of the definitions from the
> > stage-2 pagetable reside in the KVM path and we will be invoking
> > functionality **specific** to KVM.
>
> Drop the ** emphasis.
I will drop this.
>
> >
> > When a guest is created, register a new file entry under the guest
> > debugfs dir which allows userspace to show the contents of the guest
> > stage-2 pagetables when accessed.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> > arch/arm64/kvm/Kconfig | 14 ++++++
> > arch/arm64/kvm/Makefile | 1 +
> > arch/arm64/kvm/arm.c | 2 +
> > arch/arm64/kvm/kvm_ptdump.h | 20 ++++++++
> > arch/arm64/kvm/ptdump.c | 91 +++++++++++++++++++++++++++++++++++++
> > 5 files changed, 128 insertions(+)
> > create mode 100644 arch/arm64/kvm/kvm_ptdump.h
> > create mode 100644 arch/arm64/kvm/ptdump.c
> >
> > diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> > index 8304eb342be9..fcc41e58ede6 100644
> > --- a/arch/arm64/kvm/Kconfig
> > +++ b/arch/arm64/kvm/Kconfig
> > @@ -66,4 +66,18 @@ config PROTECTED_NVHE_STACKTRACE
> >
> > If unsure, or not using protected nVHE (pKVM), say N.
> >
> > +config PTDUMP_STAGE2_DEBUGFS
> > + bool "Present the stage-2 pagetables to debugfs"
> > + depends on KVM
> > + select PTDUMP_CORE
>
> This looks wrong. Looking at PTDUMP_DEBUGFS, it has the following
> constraints:
>
> depends on DEBUG_KERNEL
> depends on DEBUG_FS
> depends on GENERIC_PTDUMP
> select PTDUMP_CORE
>
> I don't see why the Stage-2 version should have anything different.
>
> > + default n
> > + help
> > + Say Y here if you want to show the stage-2 kernel pagetables
> > + layout in a debugfs file. This information is only useful for kernel developers
> > + who are working in architecture specific areas of the kernel.
> > + It is probably not a good idea to enable this feature in a production
> > + kernel.
>
> nit: try to keep the formatting within 80 columns.
Let me fix the formatting and add the other depends on rules.
>
> More importantly, I find it very strange to expose the configuration
> option so early in the series, while the support code isn't there yet.
> You can perfectly introduce code that is conditional on a config
> option and only add it at the end.
I will move the configuration option as a separate patch in the last
part of the series.
>
> > +
> > + If in doubt, say N.
> > +
> > endif # VIRTUALIZATION
> > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> > index 86a629aaf0a1..e4233b323a73 100644
> > --- a/arch/arm64/kvm/Makefile
> > +++ b/arch/arm64/kvm/Makefile
> > @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
> >
> > kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
> > kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
> > +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
> >
> > always-y := hyp_constants.h hyp-constants.s
> >
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index 9bef7638342e..60fed2146763 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -45,6 +45,7 @@
> > #include <kvm/arm_hypercalls.h>
> > #include <kvm/arm_pmu.h>
> > #include <kvm/arm_psci.h>
> > +#include <kvm_ptdump.h>
> >
> > static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
> >
> > @@ -228,6 +229,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
> > void kvm_arch_create_vm_debugfs(struct kvm *kvm)
> > {
> > kvm_sys_regs_create_debugfs(kvm);
> > + kvm_ptdump_guest_register(kvm);
>
> Consider using a name that is homogeneous with what we already have
> (kvm_s2_ptdump_create_debugfs?).
>
> > }
> >
> > static void kvm_destroy_mpidr_data(struct kvm *kvm)
> > diff --git a/arch/arm64/kvm/kvm_ptdump.h b/arch/arm64/kvm/kvm_ptdump.h
> > new file mode 100644
> > index 000000000000..0a62b0e2908c
> > --- /dev/null
> > +++ b/arch/arm64/kvm/kvm_ptdump.h
> > @@ -0,0 +1,20 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) Google, 2024
> > + * Author: Sebastian Ene <sebastianene@google.com>
> > + */
> > +
> > +#ifndef __KVM_PTDUMP_H
> > +#define __KVM_PTDUMP_H
> > +
> > +#include <linux/kvm_host.h>
> > +#include <asm/ptdump.h>
> > +
> > +
> > +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
> > +void kvm_ptdump_guest_register(struct kvm *kvm);
> > +#else
> > +static inline void kvm_ptdump_guest_register(struct kvm *kvm) {}
> > +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
> > +
> > +#endif /* __KVM_PTDUMP_H */
>
> Please don't add new include files that contain so little stuff. These
> things may as well be added either to asm/kvm_host.h or asm/ptdump.h.
>
> > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > new file mode 100644
> > index 000000000000..52483d56be2e
> > --- /dev/null
> > +++ b/arch/arm64/kvm/ptdump.c
> > @@ -0,0 +1,91 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Debug helper used to dump the stage-2 pagetables of the system and their
> > + * associated permissions.
> > + *
> > + * Copyright (C) Google, 2024
> > + * Author: Sebastian Ene <sebastianene@google.com>
> > + */
> > +#include <linux/debugfs.h>
> > +#include <linux/kvm_host.h>
> > +#include <linux/seq_file.h>
> > +
> > +#include <asm/kvm_pgtable.h>
> > +#include <kvm_ptdump.h>
> > +
> > +
> > +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> > + enum kvm_pgtable_walk_flags visit)
> > +{
> > + struct ptdump_pg_state *st = ctx->arg;
> > + struct ptdump_state *pt_st = &st->ptdump;
> > +
> > + note_page(pt_st, ctx->addr, ctx->level, ctx->old);
> > + return 0;
> > +}
> > +
> > +static int kvm_ptdump_show_common(struct seq_file *m,
>
> What does "common" mean here? You have exactly *one* caller, so why
> isn't that inlined in kvm_ptdump_guest_show()?
>
> > + struct kvm_pgtable *pgtable,
> > + struct ptdump_pg_state *parser_state)
> > +{
> > + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
> > + .cb = kvm_ptdump_visitor,
> > + .arg = parser_state,
> > + .flags = KVM_PGTABLE_WALK_LEAF,
> > + };
> > +
> > + parser_state->level = -1;
> > + parser_state->start_address = 0;
> > +
> > + return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> > +}
> > +
> > +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
> > +{
> > + struct kvm *kvm = m->private;
> > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> > + struct ptdump_pg_state parser_state = {0};
>
> nit: the common idiom is "parser_state = {}".
>
> > + int ret;
> > +
> > + write_lock(&kvm->mmu_lock);
> > + ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
> > + write_unlock(&kvm->mmu_lock);
> > +
> > + return ret;
> > +}
> > +
> > +static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
> > +{
> > + struct kvm *kvm = m->i_private;
> > + int ret;
> > +
> > + if (!kvm_get_kvm_safe(kvm))
> > + return -ENOENT;
> > +
> > + ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
> > + if (ret < 0)
> > + kvm_put_kvm(kvm);
> > +
> > + return ret;
> > +}
> > +
> > +static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
> > +{
> > + struct kvm *kvm = m->i_private;
> > +
> > + kvm_put_kvm(kvm);
> > + return single_release(m, file);
> > +}
> > +
> > +static const struct file_operations kvm_ptdump_guest_fops = {
> > + .open = kvm_ptdump_guest_open,
> > + .read = seq_read,
> > + .llseek = seq_lseek,
> > + .release = kvm_ptdump_guest_close,
> > +};
> > +
> > +void kvm_ptdump_guest_register(struct kvm *kvm)
> > +{
> > + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
> > + kvm, &kvm_ptdump_guest_fops);
> > +}
> > --
> > 2.46.0.184.g6999bdac58-goog
>
> Overall, I have a hard time understanding what this does. It walks the
> page tables, but doesn't do anything useful.
>
> I have the feeling that this patch would be better squashed together
> with patch #5, since it significantly reworks what patch #4 does.
>
Right, I think there are a few patches that should be squashed to
prevent this "empty" functionality.
Thanks for the feedback,
Seb
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
>
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
>
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH v8 4/6] KVM: arm64: Register ptdump with debugfs on guest creation
2024-08-20 14:06 ` Marc Zyngier
2024-08-23 10:45 ` Sebastian Ene
@ 2024-08-23 10:53 ` Sebastian Ene
1 sibling, 0 replies; 19+ messages in thread
From: Sebastian Ene @ 2024-08-23 10:53 UTC (permalink / raw)
To: Marc Zyngier
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Tue, Aug 20, 2024 at 03:06:45PM +0100, Marc Zyngier wrote:
> On Fri, 16 Aug 2024 13:39:04 +0100,
> Sebastian Ene <sebastianene@google.com> wrote:
> >
> > While arch/*/mem/ptdump handles the kernel pagetable dumping code,
> > introduce KVM/ptdump which shows the guest stage-2 pagetables. The
> > separation is necessary because most of the definitions from the
> > stage-2 pagetable reside in the KVM path and we will be invoking
> > functionality **specific** to KVM.
>
> Drop the ** emphasis.
>
> >
> > When a guest is created, register a new file entry under the guest
> > debugfs dir which allows userspace to show the contents of the guest
> > stage-2 pagetables when accessed.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> > arch/arm64/kvm/Kconfig | 14 ++++++
> > arch/arm64/kvm/Makefile | 1 +
> > arch/arm64/kvm/arm.c | 2 +
> > arch/arm64/kvm/kvm_ptdump.h | 20 ++++++++
> > arch/arm64/kvm/ptdump.c | 91 +++++++++++++++++++++++++++++++++++++
> > 5 files changed, 128 insertions(+)
> > create mode 100644 arch/arm64/kvm/kvm_ptdump.h
> > create mode 100644 arch/arm64/kvm/ptdump.c
> >
> > diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> > index 8304eb342be9..fcc41e58ede6 100644
> > --- a/arch/arm64/kvm/Kconfig
> > +++ b/arch/arm64/kvm/Kconfig
> > @@ -66,4 +66,18 @@ config PROTECTED_NVHE_STACKTRACE
> >
> > If unsure, or not using protected nVHE (pKVM), say N.
> >
> > +config PTDUMP_STAGE2_DEBUGFS
> > + bool "Present the stage-2 pagetables to debugfs"
> > + depends on KVM
> > + select PTDUMP_CORE
>
> This looks wrong. Looking at PTDUMP_DEBUGFS, it has the following
> constraints:
>
> depends on DEBUG_KERNEL
> depends on DEBUG_FS
> depends on GENERIC_PTDUMP
> select PTDUMP_CORE
>
> I don't see why the Stage-2 version should have anything different.
>
> > + default n
> > + help
> > + Say Y here if you want to show the stage-2 kernel pagetables
> > + layout in a debugfs file. This information is only useful for kernel developers
> > + who are working in architecture specific areas of the kernel.
> > + It is probably not a good idea to enable this feature in a production
> > + kernel.
>
> nit: try to keep the formatting within 80 columns.
>
> More importantly, I find it very strange to expose the configuration
> option so early in the series, while the support code isn't there yet.
> You can perfectly introduce code that is conditional on a config
> option and only add it at the end.
>
> > +
> > + If in doubt, say N.
> > +
> > endif # VIRTUALIZATION
> > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> > index 86a629aaf0a1..e4233b323a73 100644
> > --- a/arch/arm64/kvm/Makefile
> > +++ b/arch/arm64/kvm/Makefile
> > @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
> >
> > kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
> > kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
> > +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
> >
> > always-y := hyp_constants.h hyp-constants.s
> >
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index 9bef7638342e..60fed2146763 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -45,6 +45,7 @@
> > #include <kvm/arm_hypercalls.h>
> > #include <kvm/arm_pmu.h>
> > #include <kvm/arm_psci.h>
> > +#include <kvm_ptdump.h>
> >
> > static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
> >
> > @@ -228,6 +229,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
> > void kvm_arch_create_vm_debugfs(struct kvm *kvm)
> > {
> > kvm_sys_regs_create_debugfs(kvm);
> > + kvm_ptdump_guest_register(kvm);
>
> Consider using a name that is homogeneous with what we already have
> (kvm_s2_ptdump_create_debugfs?).
>
> > }
> >
> > static void kvm_destroy_mpidr_data(struct kvm *kvm)
> > diff --git a/arch/arm64/kvm/kvm_ptdump.h b/arch/arm64/kvm/kvm_ptdump.h
> > new file mode 100644
> > index 000000000000..0a62b0e2908c
> > --- /dev/null
> > +++ b/arch/arm64/kvm/kvm_ptdump.h
> > @@ -0,0 +1,20 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) Google, 2024
> > + * Author: Sebastian Ene <sebastianene@google.com>
> > + */
> > +
> > +#ifndef __KVM_PTDUMP_H
> > +#define __KVM_PTDUMP_H
> > +
> > +#include <linux/kvm_host.h>
> > +#include <asm/ptdump.h>
> > +
> > +
> > +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
> > +void kvm_ptdump_guest_register(struct kvm *kvm);
> > +#else
> > +static inline void kvm_ptdump_guest_register(struct kvm *kvm) {}
> > +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
> > +
> > +#endif /* __KVM_PTDUMP_H */
>
> Please don't add new include files that contain so little stuff. These
> things may as well be added either to asm/kvm_host.h or asm/ptdump.h.
>
I will drop this in this case.
> > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > new file mode 100644
> > index 000000000000..52483d56be2e
> > --- /dev/null
> > +++ b/arch/arm64/kvm/ptdump.c
> > @@ -0,0 +1,91 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Debug helper used to dump the stage-2 pagetables of the system and their
> > + * associated permissions.
> > + *
> > + * Copyright (C) Google, 2024
> > + * Author: Sebastian Ene <sebastianene@google.com>
> > + */
> > +#include <linux/debugfs.h>
> > +#include <linux/kvm_host.h>
> > +#include <linux/seq_file.h>
> > +
> > +#include <asm/kvm_pgtable.h>
> > +#include <kvm_ptdump.h>
> > +
> > +
> > +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> > + enum kvm_pgtable_walk_flags visit)
> > +{
> > + struct ptdump_pg_state *st = ctx->arg;
> > + struct ptdump_state *pt_st = &st->ptdump;
> > +
> > + note_page(pt_st, ctx->addr, ctx->level, ctx->old);
> > + return 0;
> > +}
> > +
> > +static int kvm_ptdump_show_common(struct seq_file *m,
>
> What does "common" mean here? You have exactly *one* caller, so why
> isn't that inlined in kvm_ptdump_guest_show()?
>
Right, this first part of the series adds the support for non-protected
VM which is only one caller. The later one which I haven't sent yet adds
support for protected for both the host and the guest. I will keep it in
kvm_ptdump_guest_show() for now.
> > + struct kvm_pgtable *pgtable,
> > + struct ptdump_pg_state *parser_state)
> > +{
> > + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
> > + .cb = kvm_ptdump_visitor,
> > + .arg = parser_state,
> > + .flags = KVM_PGTABLE_WALK_LEAF,
> > + };
> > +
> > + parser_state->level = -1;
> > + parser_state->start_address = 0;
> > +
> > + return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> > +}
> > +
> > +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
> > +{
> > + struct kvm *kvm = m->private;
> > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> > + struct ptdump_pg_state parser_state = {0};
>
> nit: the common idiom is "parser_state = {}".
>
> > + int ret;
> > +
> > + write_lock(&kvm->mmu_lock);
> > + ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
> > + write_unlock(&kvm->mmu_lock);
> > +
> > + return ret;
> > +}
> > +
> > +static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
> > +{
> > + struct kvm *kvm = m->i_private;
> > + int ret;
> > +
> > + if (!kvm_get_kvm_safe(kvm))
> > + return -ENOENT;
> > +
> > + ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
> > + if (ret < 0)
> > + kvm_put_kvm(kvm);
> > +
> > + return ret;
> > +}
> > +
> > +static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
> > +{
> > + struct kvm *kvm = m->i_private;
> > +
> > + kvm_put_kvm(kvm);
> > + return single_release(m, file);
> > +}
> > +
> > +static const struct file_operations kvm_ptdump_guest_fops = {
> > + .open = kvm_ptdump_guest_open,
> > + .read = seq_read,
> > + .llseek = seq_lseek,
> > + .release = kvm_ptdump_guest_close,
> > +};
> > +
> > +void kvm_ptdump_guest_register(struct kvm *kvm)
> > +{
> > + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
> > + kvm, &kvm_ptdump_guest_fops);
> > +}
> > --
> > 2.46.0.184.g6999bdac58-goog
>
> Overall, I have a hard time understanding what this does. It walks the
> page tables, but doesn't do anything useful.
>
> I have the feeling that this patch would be better squashed together
> with patch #5, since it significantly reworks what patch #4 does.
>
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
>
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v8 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes
2024-08-16 12:39 [PATCH v8 0/6] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (3 preceding siblings ...)
2024-08-16 12:39 ` [PATCH v8 4/6] KVM: arm64: Register ptdump with debugfs on guest creation Sebastian Ene
@ 2024-08-16 12:39 ` Sebastian Ene
2024-08-19 10:28 ` Vincent Donnefort
2024-08-20 14:20 ` Marc Zyngier
2024-08-16 12:39 ` [PATCH v8 6/6] KVM: arm64: Expose guest stage-2 pagetable config to debugfs Sebastian Ene
5 siblings, 2 replies; 19+ messages in thread
From: Sebastian Ene @ 2024-08-16 12:39 UTC (permalink / raw)
To: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, maz, oliver.upton, rananta,
ryan.roberts, sebastianene, shahuang, suzuki.poulose, will,
yuzenghui
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team
Define a set of attributes used by the ptdump parser to display the
properties of a guest memory region covered by a pagetable descriptor.
Build a description of the pagetable levels and initialize the parser
with this configuration.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/kvm/ptdump.c | 135 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 129 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
index 52483d56be2e..79be07ec3c3c 100644
--- a/arch/arm64/kvm/ptdump.c
+++ b/arch/arm64/kvm/ptdump.c
@@ -14,6 +14,51 @@
#include <kvm_ptdump.h>
+#define MARKERS_LEN (2)
+#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
+
+struct kvm_ptdump_guest_state {
+ struct kvm *kvm;
+ struct ptdump_pg_state parser_state;
+ struct addr_marker ipa_marker[MARKERS_LEN];
+ struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS];
+ struct ptdump_range range[MARKERS_LEN];
+};
+
+static const struct ptdump_prot_bits stage2_pte_bits[] = {
+ {
+ .mask = PTE_VALID,
+ .val = PTE_VALID,
+ .set = " ",
+ .clear = "F",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
+ .set = "R",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
+ .set = "W",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
+ .val = PTE_VALID,
+ .set = " ",
+ .clear = "X",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
+ .set = "AF",
+ .clear = " ",
+ }, {
+ .mask = PTE_TABLE_BIT | PTE_VALID,
+ .val = PTE_VALID,
+ .set = "BLK",
+ .clear = " ",
+ },
+};
+
static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
@@ -40,15 +85,81 @@ static int kvm_ptdump_show_common(struct seq_file *m,
return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
}
+static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl)
+{
+ static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
+ u32 i = 0;
+ u64 mask = 0;
+
+ if (start_lvl > 2) {
+ pr_err("invalid start_lvl %u\n", start_lvl);
+ return -EINVAL;
+ }
+
+ if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL))
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
+ mask |= stage2_pte_bits[i].mask;
+
+ for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) {
+ strscpy(level[i].name, level_names[i], sizeof(level[i].name));
+
+ level[i].num = ARRAY_SIZE(stage2_pte_bits);
+ level[i].bits = stage2_pte_bits;
+ level[i].mask = mask;
+ }
+
+ if (start_lvl > 0)
+ strscpy(level[start_lvl].name, "PGD", sizeof(level[start_lvl].name));
+
+ return 0;
+}
+
+static struct kvm_ptdump_guest_state
+*kvm_ptdump_parser_init(struct kvm *kvm)
+{
+ struct kvm_ptdump_guest_state *st;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct kvm_pgtable *pgtable = mmu->pgt;
+ int ret;
+
+ st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
+ if (!st)
+ return NULL;
+
+ ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
+ if (ret) {
+ kfree(st);
+ return ERR_PTR(ret);
+ }
+
+ st->ipa_marker[0].name = "Guest IPA";
+ st->ipa_marker[1].start_address = BIT(pgtable->ia_bits);
+ st->range[0].end = BIT(pgtable->ia_bits);
+
+ st->kvm = kvm;
+ st->parser_state = (struct ptdump_pg_state) {
+ .marker = &st->ipa_marker[0],
+ .level = -1,
+ .pg_level = &st->level[0],
+ .ptdump.range = &st->range[0],
+ };
+
+ return st;
+}
+
static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
{
- struct kvm *kvm = m->private;
+ struct kvm_ptdump_guest_state *st = m->private;
+ struct kvm *kvm = st->kvm;
struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
- struct ptdump_pg_state parser_state = {0};
int ret;
+ st->parser_state.seq = m;
+
write_lock(&kvm->mmu_lock);
- ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
+ ret = kvm_ptdump_show_common(m, mmu->pgt, &st->parser_state);
write_unlock(&kvm->mmu_lock);
return ret;
@@ -57,22 +168,34 @@ static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
{
struct kvm *kvm = m->i_private;
+ struct kvm_ptdump_guest_state *st;
int ret;
if (!kvm_get_kvm_safe(kvm))
return -ENOENT;
- ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
- if (ret < 0)
- kvm_put_kvm(kvm);
+ st = kvm_ptdump_parser_init(kvm);
+ if (IS_ERR(st)) {
+ ret = PTR_ERR(st);
+ goto free_with_kvm_ref;
+ }
+
+ ret = single_open(file, kvm_ptdump_guest_show, st);
+ if (!ret)
+ return 0;
+ kfree(st);
+free_with_kvm_ref:
+ kvm_put_kvm(kvm);
return ret;
}
static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
{
struct kvm *kvm = m->i_private;
+ void *st = ((struct seq_file *)file->private_data)->private;
+ kfree(st);
kvm_put_kvm(kvm);
return single_release(m, file);
}
--
2.46.0.184.g6999bdac58-goog
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH v8 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes
2024-08-16 12:39 ` [PATCH v8 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes Sebastian Ene
@ 2024-08-19 10:28 ` Vincent Donnefort
2024-08-19 12:18 ` Sebastian Ene
2024-08-20 14:20 ` Marc Zyngier
1 sibling, 1 reply; 19+ messages in thread
From: Vincent Donnefort @ 2024-08-19 10:28 UTC (permalink / raw)
To: Sebastian Ene
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, mark.rutland, maz, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
[...]
> +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl)
> +{
> + static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
> + u32 i = 0;
> + u64 mask = 0;
> +
> + if (start_lvl > 2) {
> + pr_err("invalid start_lvl %u\n", start_lvl);
> + return -EINVAL;
> + }
It looks like a duplicate of the if below.
> +
> + if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL))
> + return -EINVAL;
> +
> + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
> + mask |= stage2_pte_bits[i].mask;
> +
> + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) {
> + strscpy(level[i].name, level_names[i], sizeof(level[i].name));
> +
> + level[i].num = ARRAY_SIZE(stage2_pte_bits);
> + level[i].bits = stage2_pte_bits;
> + level[i].mask = mask;
> + }
> +
> + if (start_lvl > 0)
> + strscpy(level[start_lvl].name, "PGD", sizeof(level[start_lvl].name));
> +
> + return 0;
> +}
> +
> +static struct kvm_ptdump_guest_state
> +*kvm_ptdump_parser_init(struct kvm *kvm)
nit: I guess it's more a "create" than an "init" as we allocate the memory.
> +{
> + struct kvm_ptdump_guest_state *st;
> + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> + struct kvm_pgtable *pgtable = mmu->pgt;
> + int ret;
> +
> + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
> + if (!st)
> + return NULL;
return ERR_PTR(-ENOMEM); ?
> +
> + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
> + if (ret) {
> + kfree(st);
> + return ERR_PTR(ret);
> + }
> +
> + st->ipa_marker[0].name = "Guest IPA";
> + st->ipa_marker[1].start_address = BIT(pgtable->ia_bits);
> + st->range[0].end = BIT(pgtable->ia_bits);
> +
> + st->kvm = kvm;
> + st->parser_state = (struct ptdump_pg_state) {
> + .marker = &st->ipa_marker[0],
> + .level = -1,
> + .pg_level = &st->level[0],
> + .ptdump.range = &st->range[0],
> + };
> +
> + return st;
> +}
> +
[...]
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH v8 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes
2024-08-19 10:28 ` Vincent Donnefort
@ 2024-08-19 12:18 ` Sebastian Ene
0 siblings, 0 replies; 19+ messages in thread
From: Sebastian Ene @ 2024-08-19 12:18 UTC (permalink / raw)
To: Vincent Donnefort
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, mark.rutland, maz, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Mon, Aug 19, 2024 at 11:28:19AM +0100, Vincent Donnefort wrote:
>
> [...]
>
> > +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl)
> > +{
> > + static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
> > + u32 i = 0;
> > + u64 mask = 0;
> > +
> > + if (start_lvl > 2) {
> > + pr_err("invalid start_lvl %u\n", start_lvl);
> > + return -EINVAL;
> > + }
>
> It looks like a duplicate of the if below.
I will drop this.
>
> > +
> > + if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL))
> > + return -EINVAL;
> > +
> > + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
> > + mask |= stage2_pte_bits[i].mask;
> > +
> > + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) {
> > + strscpy(level[i].name, level_names[i], sizeof(level[i].name));
> > +
> > + level[i].num = ARRAY_SIZE(stage2_pte_bits);
> > + level[i].bits = stage2_pte_bits;
> > + level[i].mask = mask;
> > + }
> > +
> > + if (start_lvl > 0)
> > + strscpy(level[start_lvl].name, "PGD", sizeof(level[start_lvl].name));
> > +
> > + return 0;
> > +}
> > +
> > +static struct kvm_ptdump_guest_state
> > +*kvm_ptdump_parser_init(struct kvm *kvm)
>
> nit: I guess it's more a "create" than an "init" as we allocate the memory.
Yes, that should work as well.
>
> > +{
> > + struct kvm_ptdump_guest_state *st;
> > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> > + struct kvm_pgtable *pgtable = mmu->pgt;
> > + int ret;
> > +
> > + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
> > + if (!st)
> > + return NULL;
>
> return ERR_PTR(-ENOMEM); ?
Yep, we can encode the ENOMEM in the ptr.
>
> > +
> > + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
> > + if (ret) {
> > + kfree(st);
> > + return ERR_PTR(ret);
> > + }
> > +
> > + st->ipa_marker[0].name = "Guest IPA";
> > + st->ipa_marker[1].start_address = BIT(pgtable->ia_bits);
> > + st->range[0].end = BIT(pgtable->ia_bits);
> > +
> > + st->kvm = kvm;
> > + st->parser_state = (struct ptdump_pg_state) {
> > + .marker = &st->ipa_marker[0],
> > + .level = -1,
> > + .pg_level = &st->level[0],
> > + .ptdump.range = &st->range[0],
> > + };
> > +
> > + return st;
> > +}
> > +
>
> [...]
Thanks,
Seb
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v8 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes
2024-08-16 12:39 ` [PATCH v8 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes Sebastian Ene
2024-08-19 10:28 ` Vincent Donnefort
@ 2024-08-20 14:20 ` Marc Zyngier
2024-08-22 16:15 ` Marc Zyngier
1 sibling, 1 reply; 19+ messages in thread
From: Marc Zyngier @ 2024-08-20 14:20 UTC (permalink / raw)
To: Sebastian Ene
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Fri, 16 Aug 2024 13:39:05 +0100,
Sebastian Ene <sebastianene@google.com> wrote:
>
> Define a set of attributes used by the ptdump parser to display the
> properties of a guest memory region covered by a pagetable descriptor.
> Build a description of the pagetable levels and initialize the parser
> with this configuration.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
> arch/arm64/kvm/ptdump.c | 135 ++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 129 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> index 52483d56be2e..79be07ec3c3c 100644
> --- a/arch/arm64/kvm/ptdump.c
> +++ b/arch/arm64/kvm/ptdump.c
> @@ -14,6 +14,51 @@
> #include <kvm_ptdump.h>
>
>
> +#define MARKERS_LEN (2)
> +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> +
> +struct kvm_ptdump_guest_state {
> + struct kvm *kvm;
> + struct ptdump_pg_state parser_state;
> + struct addr_marker ipa_marker[MARKERS_LEN];
> + struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS];
> + struct ptdump_range range[MARKERS_LEN];
> +};
> +
> +static const struct ptdump_prot_bits stage2_pte_bits[] = {
> + {
> + .mask = PTE_VALID,
> + .val = PTE_VALID,
> + .set = " ",
> + .clear = "F",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> + .set = "R",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> + .set = "W",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> + .val = PTE_VALID,
> + .set = " ",
> + .clear = "X",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> + .set = "AF",
> + .clear = " ",
> + }, {
> + .mask = PTE_TABLE_BIT | PTE_VALID,
> + .val = PTE_VALID,
> + .set = "BLK",
> + .clear = " ",
> + },
> +};
> +
> static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> enum kvm_pgtable_walk_flags visit)
> {
> @@ -40,15 +85,81 @@ static int kvm_ptdump_show_common(struct seq_file *m,
> return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> }
>
> +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl)
> +{
> + static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
How about 5 level page tables, which we support since v6.8? The
architecture uses a SL=-1 in this case, and I have the feeling this is
going to expose in a lovely way, given that you use a u32 for
start_level... :-/
I also question the use of these names, which have no relation with
what the architecture describes (news flash, arm64 isn't just another
x86 implementation ;-).
I'd rather this displays the actual level, which is something anyone
can understand.
I think this needs some surgery to handle FEAT_LVA2 properly.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH v8 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes
2024-08-20 14:20 ` Marc Zyngier
@ 2024-08-22 16:15 ` Marc Zyngier
2024-08-23 5:21 ` Sebastian Ene
0 siblings, 1 reply; 19+ messages in thread
From: Marc Zyngier @ 2024-08-22 16:15 UTC (permalink / raw)
To: Sebastian Ene
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Tue, 20 Aug 2024 15:20:25 +0100,
Marc Zyngier <maz@kernel.org> wrote:
>
> On Fri, 16 Aug 2024 13:39:05 +0100,
> Sebastian Ene <sebastianene@google.com> wrote:
> >
> > Define a set of attributes used by the ptdump parser to display the
> > properties of a guest memory region covered by a pagetable descriptor.
> > Build a description of the pagetable levels and initialize the parser
> > with this configuration.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> > arch/arm64/kvm/ptdump.c | 135 ++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 129 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > index 52483d56be2e..79be07ec3c3c 100644
> > --- a/arch/arm64/kvm/ptdump.c
> > +++ b/arch/arm64/kvm/ptdump.c
> > @@ -14,6 +14,51 @@
> > #include <kvm_ptdump.h>
> >
> >
> > +#define MARKERS_LEN (2)
> > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> > +
> > +struct kvm_ptdump_guest_state {
> > + struct kvm *kvm;
> > + struct ptdump_pg_state parser_state;
> > + struct addr_marker ipa_marker[MARKERS_LEN];
> > + struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS];
> > + struct ptdump_range range[MARKERS_LEN];
> > +};
> > +
> > +static const struct ptdump_prot_bits stage2_pte_bits[] = {
> > + {
> > + .mask = PTE_VALID,
> > + .val = PTE_VALID,
> > + .set = " ",
> > + .clear = "F",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > + .set = "R",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > + .set = "W",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > + .val = PTE_VALID,
> > + .set = " ",
> > + .clear = "X",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > + .set = "AF",
> > + .clear = " ",
> > + }, {
> > + .mask = PTE_TABLE_BIT | PTE_VALID,
> > + .val = PTE_VALID,
> > + .set = "BLK",
> > + .clear = " ",
> > + },
> > +};
> > +
> > static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> > enum kvm_pgtable_walk_flags visit)
> > {
> > @@ -40,15 +85,81 @@ static int kvm_ptdump_show_common(struct seq_file *m,
> > return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> > }
> >
> > +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl)
> > +{
> > + static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
>
> How about 5 level page tables, which we support since v6.8? The
> architecture uses a SL=-1 in this case, and I have the feeling this is
> going to expose in a lovely way, given that you use a u32 for
> start_level... :-/
Talking to Oliver, I just had an epiphany: we never have a 5th level
with KVM, because we always use concatenated page tables at the start
level. So the depth is still 4 levels, but we get up to 16 pages at
level 0, and that's how we expand the IPA space from 48 to 52 bits.
So by the look of it, your code should still be OK with only 4 levels.
Apologies for leading you in the wrong direction.
M.
--
Without deviation from the norm, progress is not possible.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH v8 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes
2024-08-22 16:15 ` Marc Zyngier
@ 2024-08-23 5:21 ` Sebastian Ene
0 siblings, 0 replies; 19+ messages in thread
From: Sebastian Ene @ 2024-08-23 5:21 UTC (permalink / raw)
To: Marc Zyngier
Cc: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, oliver.upton, rananta,
ryan.roberts, shahuang, suzuki.poulose, will, yuzenghui, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team
On Thu, Aug 22, 2024 at 05:15:54PM +0100, 'Marc Zyngier' via kernel-team wrote:
> On Tue, 20 Aug 2024 15:20:25 +0100,
> Marc Zyngier <maz@kernel.org> wrote:
> >
> > On Fri, 16 Aug 2024 13:39:05 +0100,
> > Sebastian Ene <sebastianene@google.com> wrote:
> > >
> > > Define a set of attributes used by the ptdump parser to display the
> > > properties of a guest memory region covered by a pagetable descriptor.
> > > Build a description of the pagetable levels and initialize the parser
> > > with this configuration.
> > >
> > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > ---
> > > arch/arm64/kvm/ptdump.c | 135 ++++++++++++++++++++++++++++++++++++++--
> > > 1 file changed, 129 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > > index 52483d56be2e..79be07ec3c3c 100644
> > > --- a/arch/arm64/kvm/ptdump.c
> > > +++ b/arch/arm64/kvm/ptdump.c
> > > @@ -14,6 +14,51 @@
> > > #include <kvm_ptdump.h>
> > >
> > >
> > > +#define MARKERS_LEN (2)
> > > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> > > +
> > > +struct kvm_ptdump_guest_state {
> > > + struct kvm *kvm;
> > > + struct ptdump_pg_state parser_state;
> > > + struct addr_marker ipa_marker[MARKERS_LEN];
> > > + struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS];
> > > + struct ptdump_range range[MARKERS_LEN];
> > > +};
> > > +
> > > +static const struct ptdump_prot_bits stage2_pte_bits[] = {
> > > + {
> > > + .mask = PTE_VALID,
> > > + .val = PTE_VALID,
> > > + .set = " ",
> > > + .clear = "F",
> > > + }, {
> > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > > + .set = "R",
> > > + .clear = " ",
> > > + }, {
> > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > > + .set = "W",
> > > + .clear = " ",
> > > + }, {
> > > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > > + .val = PTE_VALID,
> > > + .set = " ",
> > > + .clear = "X",
> > > + }, {
> > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > > + .set = "AF",
> > > + .clear = " ",
> > > + }, {
> > > + .mask = PTE_TABLE_BIT | PTE_VALID,
> > > + .val = PTE_VALID,
> > > + .set = "BLK",
> > > + .clear = " ",
> > > + },
> > > +};
> > > +
> > > static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> > > enum kvm_pgtable_walk_flags visit)
> > > {
> > > @@ -40,15 +85,81 @@ static int kvm_ptdump_show_common(struct seq_file *m,
> > > return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> > > }
> > >
> > > +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl)
> > > +{
> > > + static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
> >
> > How about 5 level page tables, which we support since v6.8? The
> > architecture uses a SL=-1 in this case, and I have the feeling this is
> > going to expose in a lovely way, given that you use a u32 for
> > start_level... :-/
>
Hello Marc,
> Talking to Oliver, I just had an epiphany: we never have a 5th level
> with KVM, because we always use concatenated page tables at the start
> level. So the depth is still 4 levels, but we get up to 16 pages at
> level 0, and that's how we expand the IPA space from 48 to 52 bits.
>
Thanks for letting me know. Then it should be fine keeping that as an
unisgned integer.
> So by the look of it, your code should still be OK with only 4 levels.
>
Thanks,
Seb
> Apologies for leading you in the wrong direction.
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
>
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v8 6/6] KVM: arm64: Expose guest stage-2 pagetable config to debugfs
2024-08-16 12:39 [PATCH v8 0/6] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (4 preceding siblings ...)
2024-08-16 12:39 ` [PATCH v8 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes Sebastian Ene
@ 2024-08-16 12:39 ` Sebastian Ene
5 siblings, 0 replies; 19+ messages in thread
From: Sebastian Ene @ 2024-08-16 12:39 UTC (permalink / raw)
To: akpm, alexghiti, ankita, ardb, catalin.marinas, christophe.leroy,
james.morse, vdonnefort, mark.rutland, maz, oliver.upton, rananta,
ryan.roberts, sebastianene, shahuang, suzuki.poulose, will,
yuzenghui
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team
Make the start level and the IPA bits properties available in the
virtual machine debugfs directory. Make sure that the KVM structure
doesn't disappear behind our back and keep a reference to the KVM struct
while these files are opened.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/kvm/ptdump.c | 48 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
index 79be07ec3c3c..b0d55ea3fb2a 100644
--- a/arch/arm64/kvm/ptdump.c
+++ b/arch/arm64/kvm/ptdump.c
@@ -207,8 +207,56 @@ static const struct file_operations kvm_ptdump_guest_fops = {
.release = kvm_ptdump_guest_close,
};
+static int kvm_pgtable_debugfs_show(struct seq_file *m, void *unused)
+{
+ const struct file *file = m->file;
+ struct kvm_pgtable *pgtable = m->private;
+
+ if (!strcmp(file_dentry(file)->d_iname, "ipa_range"))
+ seq_printf(m, "%2u\n", pgtable->ia_bits);
+ else if (!strcmp(file_dentry(file)->d_iname, "stage2_levels"))
+ seq_printf(m, "%1d\n", KVM_PGTABLE_LAST_LEVEL - pgtable->start_level + 1);
+ return 0;
+}
+
+static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+ struct kvm_pgtable *pgtable;
+ int ret;
+
+ if (!kvm_get_kvm_safe(kvm))
+ return -ENOENT;
+
+ pgtable = kvm->arch.mmu.pgt;
+
+ ret = single_open(file, kvm_pgtable_debugfs_show, pgtable);
+ if (ret < 0)
+ kvm_put_kvm(kvm);
+ return ret;
+}
+
+static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+
+ kvm_put_kvm(kvm);
+ return single_release(m, file);
+}
+
+static const struct file_operations kvm_pgtable_debugfs_fops = {
+ .open = kvm_pgtable_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = kvm_pgtable_debugfs_close,
+};
+
void kvm_ptdump_guest_register(struct kvm *kvm)
{
debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
kvm, &kvm_ptdump_guest_fops);
+ debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm,
+ &kvm_pgtable_debugfs_fops);
+ debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry,
+ kvm, &kvm_pgtable_debugfs_fops);
}
--
2.46.0.184.g6999bdac58-goog
^ permalink raw reply related [flat|nested] 19+ messages in thread