* [PATCH v3 01/10] KVM: arm64: Add snap shooting the host stage-2 pagetables
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
@ 2023-11-15 17:16 ` Sebastian Ene
2023-11-21 14:12 ` Vincent Donnefort
2023-11-15 17:16 ` [PATCH v3 02/10] arm64: ptdump: Use the mask from the state structure Sebastian Ene
` (9 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Sebastian Ene @ 2023-11-15 17:16 UTC (permalink / raw)
To: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team, vdonnefort,
qperret, smostafa, Sebastian Ene
Introduce a new HVC that allows the caller to snap shoot the stage-2
pagetables under NVHE debug configuration. The caller specifies the
location where the pagetables are copied and must ensure that the memory
is accessible by the hypervisor. The memory where the pagetables are
copied has to be allocated by the caller and shared with the hypervisor.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/kvm_asm.h | 1 +
arch/arm64/include/asm/kvm_pgtable.h | 36 +++++++
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 +
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 20 ++++
arch/arm64/kvm/hyp/nvhe/mem_protect.c | 102 ++++++++++++++++++
arch/arm64/kvm/hyp/pgtable.c | 56 ++++++++++
6 files changed, 216 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24b5e6b23417..99145a24c0f6 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -81,6 +81,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_copy_host_stage2,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index d3e354bb8351..be615700f8ac 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -10,6 +10,7 @@
#include <linux/bits.h>
#include <linux/kvm_host.h>
#include <linux/types.h>
+#include <asm/kvm_host.h>
#define KVM_PGTABLE_MAX_LEVELS 4U
@@ -351,6 +352,21 @@ struct kvm_pgtable {
kvm_pgtable_force_pte_cb_t force_pte_cb;
};
+/**
+ * struct kvm_pgtable_snapshot - Snapshot page-table wrapper.
+ * @pgtable: The page-table configuration.
+ * @mc: Memcache used for pagetable pages allocation.
+ * @pgd_hva: Host virtual address of a physically contiguous buffer
+ * used for storing the PGD.
+ * @pgd_len: The size of the phyisically contiguous buffer in bytes.
+ */
+struct kvm_pgtable_snapshot {
+ struct kvm_pgtable pgtable;
+ struct kvm_hyp_memcache mc;
+ void *pgd_hva;
+ size_t pgd_len;
+};
+
/**
* kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
* @pgt: Uninitialised page-table structure to initialise.
@@ -756,4 +772,24 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
*/
void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t addr, size_t size);
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+/**
+ * kvm_pgtable_stage2_copy() - Snapshot the pagetable
+ *
+ * @to_pgt: Destination pagetable
+ * @from_pgt: Source pagetable. The caller must lock the pagetables first
+ * @mc: The memcache where we allocate the destination pagetables from
+ */
+int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
+ const struct kvm_pgtable *from_pgt,
+ void *mc);
+#else
+static inline int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
+ const struct kvm_pgtable *from_pgt,
+ void *mc)
+{
+ return -EPERM;
+}
+#endif /* CONFIG_NVHE_EL2_DEBUG */
#endif /* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 0972faccc2af..9cfb35d68850 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -69,6 +69,7 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
+int __pkvm_host_stage2_prepare_copy(struct kvm_pgtable_snapshot *snapshot);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2385fd03ed87..98646cc67497 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -314,6 +314,25 @@ static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
}
+static void handle___pkvm_copy_host_stage2(struct kvm_cpu_context *host_ctxt)
+{
+#ifdef CONFIG_NVHE_EL2_DEBUG
+ int ret = -EPERM;
+ DECLARE_REG(struct kvm_pgtable_snapshot *, snapshot, host_ctxt, 1);
+ kvm_pteref_t pgd;
+
+ snapshot = kern_hyp_va(snapshot);
+ ret = __pkvm_host_stage2_prepare_copy(snapshot);
+ if (!ret) {
+ pgd = snapshot->pgtable.pgd;
+ snapshot->pgtable.pgd = (kvm_pteref_t)__hyp_pa(pgd);
+ }
+ cpu_reg(host_ctxt, 1) = ret;
+#else
+ cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
+#endif
+}
+
typedef void (*hcall_t)(struct kvm_cpu_context *);
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -348,6 +367,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
HANDLE_FUNC(__pkvm_teardown_vm),
+ HANDLE_FUNC(__pkvm_copy_host_stage2),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 8d0a5834e883..1c3ab5ac9110 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -266,6 +266,108 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
return 0;
}
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static struct hyp_pool snapshot_pool = {0};
+static DEFINE_HYP_SPINLOCK(snapshot_pool_lock);
+
+static void *snapshot_zalloc_pages_exact(size_t size)
+{
+ void *addr = hyp_alloc_pages(&snapshot_pool, get_order(size));
+
+ hyp_split_page(hyp_virt_to_page(addr));
+
+ /*
+ * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
+ * so there should be no need to free any of the tail pages to make the
+ * allocation exact.
+ */
+ WARN_ON(size != (PAGE_SIZE << get_order(size)));
+
+ return addr;
+}
+
+static void snapshot_get_page(void *addr)
+{
+ hyp_get_page(&snapshot_pool, addr);
+}
+
+static void *snapshot_zalloc_page(void *mc)
+{
+ struct hyp_page *p;
+ void *addr;
+
+ addr = hyp_alloc_pages(&snapshot_pool, 0);
+ if (addr)
+ return addr;
+
+ addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
+ if (!addr)
+ return addr;
+
+ memset(addr, 0, PAGE_SIZE);
+ p = hyp_virt_to_page(addr);
+ memset(p, 0, sizeof(*p));
+ p->refcount = 1;
+
+ return addr;
+}
+
+static void snapshot_s2_free_pages_exact(void *addr, unsigned long size)
+{
+ u8 order = get_order(size);
+ unsigned int i;
+ struct hyp_page *p;
+
+ for (i = 0; i < (1 << order); i++) {
+ p = hyp_virt_to_page(addr + (i * PAGE_SIZE));
+ hyp_page_ref_dec_and_test(p);
+ }
+}
+
+int __pkvm_host_stage2_prepare_copy(struct kvm_pgtable_snapshot *snapshot)
+{
+ size_t required_pgd_len;
+ struct kvm_pgtable_mm_ops mm_ops = {0};
+ struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
+ struct kvm_pgtable *to_pgt, *from_pgt = &host_mmu.pgt;
+ struct kvm_hyp_memcache *memcache = &snapshot->mc;
+ int ret;
+ void *pgd;
+
+ required_pgd_len = kvm_pgtable_stage2_pgd_size(mmu->vtcr);
+ if (snapshot->pgd_len < required_pgd_len)
+ return -ENOMEM;
+
+ to_pgt = &snapshot->pgtable;
+ pgd = kern_hyp_va(snapshot->pgd_hva);
+
+ hyp_spin_lock(&snapshot_pool_lock);
+ hyp_pool_init(&snapshot_pool, hyp_virt_to_pfn(pgd),
+ required_pgd_len / PAGE_SIZE, 0);
+
+ mm_ops.zalloc_pages_exact = snapshot_zalloc_pages_exact;
+ mm_ops.zalloc_page = snapshot_zalloc_page;
+ mm_ops.free_pages_exact = snapshot_s2_free_pages_exact;
+ mm_ops.get_page = snapshot_get_page;
+ mm_ops.phys_to_virt = hyp_phys_to_virt;
+ mm_ops.virt_to_phys = hyp_virt_to_phys;
+ mm_ops.page_count = hyp_page_count;
+
+ to_pgt->ia_bits = from_pgt->ia_bits;
+ to_pgt->start_level = from_pgt->start_level;
+ to_pgt->flags = from_pgt->flags;
+ to_pgt->mm_ops = &mm_ops;
+
+ host_lock_component();
+ ret = kvm_pgtable_stage2_copy(to_pgt, from_pgt, memcache);
+ host_unlock_component();
+
+ hyp_spin_unlock(&snapshot_pool_lock);
+
+ return ret;
+}
+#endif /* CONFIG_NVHE_EL2_DEBUG */
+
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
{
void *addr;
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 1966fdee740e..46b15d74118f 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1598,3 +1598,59 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p
WARN_ON(mm_ops->page_count(pgtable) != 1);
mm_ops->put_page(pgtable);
}
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static int stage2_copy_walker(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit)
+{
+ struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
+ void *copy_table, *original_addr;
+ kvm_pte_t new = ctx->old;
+
+ if (!stage2_pte_is_counted(ctx->old))
+ return 0;
+
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ copy_table = mm_ops->zalloc_page(ctx->arg);
+ if (!copy_table)
+ return -ENOMEM;
+
+ original_addr = kvm_pte_follow(ctx->old, mm_ops);
+
+ memcpy(copy_table, original_addr, PAGE_SIZE);
+ new = kvm_init_table_pte(copy_table, mm_ops);
+ }
+
+ *ctx->ptep = new;
+
+ return 0;
+}
+
+int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
+ const struct kvm_pgtable *from_pgt,
+ void *mc)
+{
+ int ret;
+ size_t pgd_sz;
+ struct kvm_pgtable_mm_ops *mm_ops = to_pgt->mm_ops;
+ struct kvm_pgtable_walker walker = {
+ .cb = stage2_copy_walker,
+ .flags = KVM_PGTABLE_WALK_LEAF |
+ KVM_PGTABLE_WALK_TABLE_PRE,
+ .arg = mc
+ };
+
+ pgd_sz = kvm_pgd_pages(to_pgt->ia_bits, to_pgt->start_level) *
+ PAGE_SIZE;
+ to_pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
+ if (!to_pgt->pgd)
+ return -ENOMEM;
+
+ memcpy(to_pgt->pgd, from_pgt->pgd, pgd_sz);
+
+ ret = kvm_pgtable_walk(to_pgt, 0, BIT(to_pgt->ia_bits), &walker);
+ mm_ops->free_pages_exact(to_pgt->pgd, pgd_sz);
+
+ return ret;
+}
+#endif /* CONFIG_NVHE_EL2_DEBUG */
--
2.43.0.rc0.421.g78406f8d94-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v3 01/10] KVM: arm64: Add snap shooting the host stage-2 pagetables
2023-11-15 17:16 ` [PATCH v3 01/10] KVM: arm64: Add snap shooting the host stage-2 pagetables Sebastian Ene
@ 2023-11-21 14:12 ` Vincent Donnefort
2023-11-23 14:40 ` Sebastian Ene
0 siblings, 1 reply; 22+ messages in thread
From: Vincent Donnefort @ 2023-11-21 14:12 UTC (permalink / raw)
To: Sebastian Ene
Cc: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team, qperret, smostafa
On Wed, Nov 15, 2023 at 05:16:31PM +0000, Sebastian Ene wrote:
> Introduce a new HVC that allows the caller to snap shoot the stage-2
"snapshot" same in the title of this patch.
> pagetables under NVHE debug configuration. The caller specifies the
> location where the pagetables are copied and must ensure that the memory
> is accessible by the hypervisor. The memory where the pagetables are
> copied has to be allocated by the caller and shared with the hypervisor.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
> arch/arm64/include/asm/kvm_asm.h | 1 +
> arch/arm64/include/asm/kvm_pgtable.h | 36 +++++++
> arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 +
> arch/arm64/kvm/hyp/nvhe/hyp-main.c | 20 ++++
> arch/arm64/kvm/hyp/nvhe/mem_protect.c | 102 ++++++++++++++++++
> arch/arm64/kvm/hyp/pgtable.c | 56 ++++++++++
> 6 files changed, 216 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 24b5e6b23417..99145a24c0f6 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -81,6 +81,7 @@ enum __kvm_host_smccc_func {
> __KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
> __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
> __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
> + __KVM_HOST_SMCCC_FUNC___pkvm_copy_host_stage2,
> };
>
> #define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index d3e354bb8351..be615700f8ac 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -10,6 +10,7 @@
> #include <linux/bits.h>
> #include <linux/kvm_host.h>
> #include <linux/types.h>
> +#include <asm/kvm_host.h>
>
> #define KVM_PGTABLE_MAX_LEVELS 4U
>
> @@ -351,6 +352,21 @@ struct kvm_pgtable {
> kvm_pgtable_force_pte_cb_t force_pte_cb;
> };
>
> +/**
> + * struct kvm_pgtable_snapshot - Snapshot page-table wrapper.
> + * @pgtable: The page-table configuration.
> + * @mc: Memcache used for pagetable pages allocation.
> + * @pgd_hva: Host virtual address of a physically contiguous buffer
> + * used for storing the PGD.
> + * @pgd_len: The size of the phyisically contiguous buffer in bytes.
> + */
> +struct kvm_pgtable_snapshot {
> + struct kvm_pgtable pgtable;
> + struct kvm_hyp_memcache mc;
> + void *pgd_hva;
> + size_t pgd_len;
> +};
> +
> /**
> * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
> * @pgt: Uninitialised page-table structure to initialise.
> @@ -756,4 +772,24 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
> */
> void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
> phys_addr_t addr, size_t size);
> +
> +#ifdef CONFIG_NVHE_EL2_DEBUG
> +/**
> + * kvm_pgtable_stage2_copy() - Snapshot the pagetable
> + *
> + * @to_pgt: Destination pagetable
> + * @from_pgt: Source pagetable. The caller must lock the pagetables first
> + * @mc: The memcache where we allocate the destination pagetables from
> + */
> +int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
> + const struct kvm_pgtable *from_pgt,
> + void *mc);
> +#else
> +static inline int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
> + const struct kvm_pgtable *from_pgt,
> + void *mc)
> +{
> + return -EPERM;
> +}
> +#endif /* CONFIG_NVHE_EL2_DEBUG */
> #endif /* __ARM64_KVM_PGTABLE_H__ */
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> index 0972faccc2af..9cfb35d68850 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> @@ -69,6 +69,7 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
> int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
> int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
> int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
> +int __pkvm_host_stage2_prepare_copy(struct kvm_pgtable_snapshot *snapshot);
nit: the struct being a snapshot and that function doing the actual snapshot,
maybe __pkvm_host_stage2_snapshot() would be a better name?
>
> bool addr_is_memory(phys_addr_t phys);
> int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index 2385fd03ed87..98646cc67497 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -314,6 +314,25 @@ static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
> cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
> }
>
> +static void handle___pkvm_copy_host_stage2(struct kvm_cpu_context *host_ctxt)
> +{
> +#ifdef CONFIG_NVHE_EL2_DEBUG
> + int ret = -EPERM;
> + DECLARE_REG(struct kvm_pgtable_snapshot *, snapshot, host_ctxt, 1);
> + kvm_pteref_t pgd;
> +
> + snapshot = kern_hyp_va(snapshot);
> + ret = __pkvm_host_stage2_prepare_copy(snapshot);
> + if (!ret) {
> + pgd = snapshot->pgtable.pgd;
> + snapshot->pgtable.pgd = (kvm_pteref_t)__hyp_pa(pgd);
> + }
> + cpu_reg(host_ctxt, 1) = ret;
> +#else
> + cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
> +#endif
> +}
> +
> typedef void (*hcall_t)(struct kvm_cpu_context *);
>
> #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
> @@ -348,6 +367,7 @@ static const hcall_t host_hcall[] = {
> HANDLE_FUNC(__pkvm_init_vm),
> HANDLE_FUNC(__pkvm_init_vcpu),
> HANDLE_FUNC(__pkvm_teardown_vm),
> + HANDLE_FUNC(__pkvm_copy_host_stage2),
> };
>
> static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 8d0a5834e883..1c3ab5ac9110 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -266,6 +266,108 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
> return 0;
> }
>
> +#ifdef CONFIG_NVHE_EL2_DEBUG
> +static struct hyp_pool snapshot_pool = {0};
> +static DEFINE_HYP_SPINLOCK(snapshot_pool_lock);
> +
> +static void *snapshot_zalloc_pages_exact(size_t size)
> +{
> + void *addr = hyp_alloc_pages(&snapshot_pool, get_order(size));
> +
> + hyp_split_page(hyp_virt_to_page(addr));
> +
> + /*
> + * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
> + * so there should be no need to free any of the tail pages to make the
> + * allocation exact.
> + */
> + WARN_ON(size != (PAGE_SIZE << get_order(size)));
> +
> + return addr;
> +}
> +
> +static void snapshot_get_page(void *addr)
> +{
> + hyp_get_page(&snapshot_pool, addr);
> +}
> +
> +static void *snapshot_zalloc_page(void *mc)
> +{
> + struct hyp_page *p;
> + void *addr;
> +
> + addr = hyp_alloc_pages(&snapshot_pool, 0);
> + if (addr)
> + return addr;
> +
> + addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
> + if (!addr)
> + return addr;
> +
> + memset(addr, 0, PAGE_SIZE);
> + p = hyp_virt_to_page(addr);
> + memset(p, 0, sizeof(*p));
Why do you need to rease this entry of the vmemmap? That sounds odd.
> + p->refcount = 1;
> +
> + return addr;
> +}
> +
> +static void snapshot_s2_free_pages_exact(void *addr, unsigned long size)
> +{
> + u8 order = get_order(size);
> + unsigned int i;
> + struct hyp_page *p;
> +
> + for (i = 0; i < (1 << order); i++) {
> + p = hyp_virt_to_page(addr + (i * PAGE_SIZE));
> + hyp_page_ref_dec_and_test(p);
hyp_page_ref_dec() would be enough here.
> + }
> +}
> +
> +int __pkvm_host_stage2_prepare_copy(struct kvm_pgtable_snapshot *snapshot)
> +{
> + size_t required_pgd_len;
> + struct kvm_pgtable_mm_ops mm_ops = {0};
> + struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
> + struct kvm_pgtable *to_pgt, *from_pgt = &host_mmu.pgt;
> + struct kvm_hyp_memcache *memcache = &snapshot->mc;
> + int ret;
> + void *pgd;
The shared memory between host and hyp should probably be pinned here even if it
is behind a DEBUG option. That would also ensure that the memory has actually
been shared beforehand and we'll not crash while running that.
Pinning that memory would also make sure that the right amount has been shared
for the pgd and we'll not overflow.
> +
> + required_pgd_len = kvm_pgtable_stage2_pgd_size(mmu->vtcr);
> + if (snapshot->pgd_len < required_pgd_len)
> + return -ENOMEM;
> +
> + to_pgt = &snapshot->pgtable;
> + pgd = kern_hyp_va(snapshot->pgd_hva);
> +
> + hyp_spin_lock(&snapshot_pool_lock);
> + hyp_pool_init(&snapshot_pool, hyp_virt_to_pfn(pgd),
> + required_pgd_len / PAGE_SIZE, 0);
> +
> + mm_ops.zalloc_pages_exact = snapshot_zalloc_pages_exact;
> + mm_ops.zalloc_page = snapshot_zalloc_page;
> + mm_ops.free_pages_exact = snapshot_s2_free_pages_exact;
> + mm_ops.get_page = snapshot_get_page;
> + mm_ops.phys_to_virt = hyp_phys_to_virt;
> + mm_ops.virt_to_phys = hyp_virt_to_phys;
> + mm_ops.page_count = hyp_page_count;
It looks a bit odd to me to have to handle refcount while the lifespan of that
newly create page-table only the lifespace of that function, where everything is
behind a lock.
Is the need for the pool to handle the physically contiguous mem for the PGD? If
that is the case, maybe you could avoid the pool by just passing as a separated
argument the memory for the PGD and in another one the memcache for the other
levels?
Especially IIUC, the pool would be used here only for the PGD allocation, as you
add only the size of the PGD to the pool?
>+
> + to_pgt->ia_bits = from_pgt->ia_bits;
> + to_pgt->start_level = from_pgt->start_level;
> + to_pgt->flags = from_pgt->flags;
> + to_pgt->mm_ops = &mm_ops;
> +
> + host_lock_component();
> + ret = kvm_pgtable_stage2_copy(to_pgt, from_pgt, memcache);
> + host_unlock_component();
> +
> + hyp_spin_unlock(&snapshot_pool_lock);
> +
> + return ret;
> +}
> +#endif /* CONFIG_NVHE_EL2_DEBUG */
> +
> void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
> {
> void *addr;
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 1966fdee740e..46b15d74118f 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -1598,3 +1598,59 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p
> WARN_ON(mm_ops->page_count(pgtable) != 1);
> mm_ops->put_page(pgtable);
> }
> +
> +#ifdef CONFIG_NVHE_EL2_DEBUG
> +static int stage2_copy_walker(const struct kvm_pgtable_visit_ctx *ctx,
> + enum kvm_pgtable_walk_flags visit)
> +{
> + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
> + void *copy_table, *original_addr;
> + kvm_pte_t new = ctx->old;
> +
> + if (!stage2_pte_is_counted(ctx->old))
> + return 0;
> +
> + if (kvm_pte_table(ctx->old, ctx->level)) {
> + copy_table = mm_ops->zalloc_page(ctx->arg);
> + if (!copy_table)
> + return -ENOMEM;
> +
> + original_addr = kvm_pte_follow(ctx->old, mm_ops);
> +
> + memcpy(copy_table, original_addr, PAGE_SIZE);
> + new = kvm_init_table_pte(copy_table, mm_ops);
> + }
> +
> + *ctx->ptep = new;
> +
> + return 0;
> +}
> +
> +int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
> + const struct kvm_pgtable *from_pgt,
> + void *mc)
> +{
> + int ret;
> + size_t pgd_sz;
> + struct kvm_pgtable_mm_ops *mm_ops = to_pgt->mm_ops;
> + struct kvm_pgtable_walker walker = {
> + .cb = stage2_copy_walker,
> + .flags = KVM_PGTABLE_WALK_LEAF |
> + KVM_PGTABLE_WALK_TABLE_PRE,
Do you need the WALK_LEAF? Entire tables are copied and you don't need to change
anything at the leaf level. Actually, stage2_copy_walker() writes new =
ctx->old in that case, doesn't it?
> + .arg = mc
> + };
> +
> + pgd_sz = kvm_pgd_pages(to_pgt->ia_bits, to_pgt->start_level) *
> + PAGE_SIZE;
> + to_pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
> + if (!to_pgt->pgd)
> + return -ENOMEM;
> +
> + memcpy(to_pgt->pgd, from_pgt->pgd, pgd_sz);
> +
> + ret = kvm_pgtable_walk(to_pgt, 0, BIT(to_pgt->ia_bits), &walker);
> + mm_ops->free_pages_exact(to_pgt->pgd, pgd_sz);
> +
> + return ret;
> +}
> +#endif /* CONFIG_NVHE_EL2_DEBUG */
> --
> 2.43.0.rc0.421.g78406f8d94-goog
>
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v3 01/10] KVM: arm64: Add snap shooting the host stage-2 pagetables
2023-11-21 14:12 ` Vincent Donnefort
@ 2023-11-23 14:40 ` Sebastian Ene
0 siblings, 0 replies; 22+ messages in thread
From: Sebastian Ene @ 2023-11-23 14:40 UTC (permalink / raw)
To: Vincent Donnefort
Cc: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team, qperret, smostafa
On Tue, Nov 21, 2023 at 02:12:50PM +0000, Vincent Donnefort wrote:
Hi,
> On Wed, Nov 15, 2023 at 05:16:31PM +0000, Sebastian Ene wrote:
> > Introduce a new HVC that allows the caller to snap shoot the stage-2
>
> "snapshot" same in the title of this patch.
>
I will update the name, thanks.
> > pagetables under NVHE debug configuration. The caller specifies the
> > location where the pagetables are copied and must ensure that the memory
> > is accessible by the hypervisor. The memory where the pagetables are
> > copied has to be allocated by the caller and shared with the hypervisor.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> > arch/arm64/include/asm/kvm_asm.h | 1 +
> > arch/arm64/include/asm/kvm_pgtable.h | 36 +++++++
> > arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 +
> > arch/arm64/kvm/hyp/nvhe/hyp-main.c | 20 ++++
> > arch/arm64/kvm/hyp/nvhe/mem_protect.c | 102 ++++++++++++++++++
> > arch/arm64/kvm/hyp/pgtable.c | 56 ++++++++++
> > 6 files changed, 216 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> > index 24b5e6b23417..99145a24c0f6 100644
> > --- a/arch/arm64/include/asm/kvm_asm.h
> > +++ b/arch/arm64/include/asm/kvm_asm.h
> > @@ -81,6 +81,7 @@ enum __kvm_host_smccc_func {
> > __KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
> > __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
> > __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
> > + __KVM_HOST_SMCCC_FUNC___pkvm_copy_host_stage2,
> > };
> >
> > #define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
> > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> > index d3e354bb8351..be615700f8ac 100644
> > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > @@ -10,6 +10,7 @@
> > #include <linux/bits.h>
> > #include <linux/kvm_host.h>
> > #include <linux/types.h>
> > +#include <asm/kvm_host.h>
> >
> > #define KVM_PGTABLE_MAX_LEVELS 4U
> >
> > @@ -351,6 +352,21 @@ struct kvm_pgtable {
> > kvm_pgtable_force_pte_cb_t force_pte_cb;
> > };
> >
> > +/**
> > + * struct kvm_pgtable_snapshot - Snapshot page-table wrapper.
> > + * @pgtable: The page-table configuration.
> > + * @mc: Memcache used for pagetable pages allocation.
> > + * @pgd_hva: Host virtual address of a physically contiguous buffer
> > + * used for storing the PGD.
> > + * @pgd_len: The size of the phyisically contiguous buffer in bytes.
> > + */
> > +struct kvm_pgtable_snapshot {
> > + struct kvm_pgtable pgtable;
> > + struct kvm_hyp_memcache mc;
> > + void *pgd_hva;
> > + size_t pgd_len;
> > +};
> > +
> > /**
> > * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
> > * @pgt: Uninitialised page-table structure to initialise.
> > @@ -756,4 +772,24 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
> > */
> > void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
> > phys_addr_t addr, size_t size);
> > +
> > +#ifdef CONFIG_NVHE_EL2_DEBUG
> > +/**
> > + * kvm_pgtable_stage2_copy() - Snapshot the pagetable
> > + *
> > + * @to_pgt: Destination pagetable
> > + * @from_pgt: Source pagetable. The caller must lock the pagetables first
> > + * @mc: The memcache where we allocate the destination pagetables from
> > + */
> > +int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
> > + const struct kvm_pgtable *from_pgt,
> > + void *mc);
> > +#else
> > +static inline int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
> > + const struct kvm_pgtable *from_pgt,
> > + void *mc)
> > +{
> > + return -EPERM;
> > +}
> > +#endif /* CONFIG_NVHE_EL2_DEBUG */
> > #endif /* __ARM64_KVM_PGTABLE_H__ */
> > diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> > index 0972faccc2af..9cfb35d68850 100644
> > --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> > +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> > @@ -69,6 +69,7 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
> > int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
> > int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
> > int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
> > +int __pkvm_host_stage2_prepare_copy(struct kvm_pgtable_snapshot *snapshot);
>
> nit: the struct being a snapshot and that function doing the actual snapshot,
> maybe __pkvm_host_stage2_snapshot() would be a better name?
>
Sure, we can use __pkvm_host_stage2_snapshot.
> >
> > bool addr_is_memory(phys_addr_t phys);
> > int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
> > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > index 2385fd03ed87..98646cc67497 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > @@ -314,6 +314,25 @@ static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
> > cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
> > }
> >
> > +static void handle___pkvm_copy_host_stage2(struct kvm_cpu_context *host_ctxt)
> > +{
> > +#ifdef CONFIG_NVHE_EL2_DEBUG
> > + int ret = -EPERM;
> > + DECLARE_REG(struct kvm_pgtable_snapshot *, snapshot, host_ctxt, 1);
> > + kvm_pteref_t pgd;
> > +
> > + snapshot = kern_hyp_va(snapshot);
> > + ret = __pkvm_host_stage2_prepare_copy(snapshot);
> > + if (!ret) {
> > + pgd = snapshot->pgtable.pgd;
> > + snapshot->pgtable.pgd = (kvm_pteref_t)__hyp_pa(pgd);
> > + }
> > + cpu_reg(host_ctxt, 1) = ret;
> > +#else
> > + cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
> > +#endif
> > +}
> > +
> > typedef void (*hcall_t)(struct kvm_cpu_context *);
> >
> > #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
> > @@ -348,6 +367,7 @@ static const hcall_t host_hcall[] = {
> > HANDLE_FUNC(__pkvm_init_vm),
> > HANDLE_FUNC(__pkvm_init_vcpu),
> > HANDLE_FUNC(__pkvm_teardown_vm),
> > + HANDLE_FUNC(__pkvm_copy_host_stage2),
> > };
> >
> > static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
> > diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > index 8d0a5834e883..1c3ab5ac9110 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > @@ -266,6 +266,108 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
> > return 0;
> > }
> >
> > +#ifdef CONFIG_NVHE_EL2_DEBUG
> > +static struct hyp_pool snapshot_pool = {0};
> > +static DEFINE_HYP_SPINLOCK(snapshot_pool_lock);
> > +
> > +static void *snapshot_zalloc_pages_exact(size_t size)
> > +{
> > + void *addr = hyp_alloc_pages(&snapshot_pool, get_order(size));
> > +
> > + hyp_split_page(hyp_virt_to_page(addr));
> > +
> > + /*
> > + * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
> > + * so there should be no need to free any of the tail pages to make the
> > + * allocation exact.
> > + */
> > + WARN_ON(size != (PAGE_SIZE << get_order(size)));
> > +
> > + return addr;
> > +}
> > +
> > +static void snapshot_get_page(void *addr)
> > +{
> > + hyp_get_page(&snapshot_pool, addr);
> > +}
> > +
> > +static void *snapshot_zalloc_page(void *mc)
> > +{
> > + struct hyp_page *p;
> > + void *addr;
> > +
> > + addr = hyp_alloc_pages(&snapshot_pool, 0);
> > + if (addr)
> > + return addr;
> > +
> > + addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
> > + if (!addr)
> > + return addr;
> > +
> > + memset(addr, 0, PAGE_SIZE);
> > + p = hyp_virt_to_page(addr);
> > + memset(p, 0, sizeof(*p));
>
> Why do you need to rease this entry of the vmemmap? That sounds odd.
>
I guess I did in the same way as the guest does for
`guest_s2_zalloc_page`. Do you see any issues with this ?
> > + p->refcount = 1;
> > +
> > + return addr;
> > +}
> > +
> > +static void snapshot_s2_free_pages_exact(void *addr, unsigned long size)
> > +{
> > + u8 order = get_order(size);
> > + unsigned int i;
> > + struct hyp_page *p;
> > +
> > + for (i = 0; i < (1 << order); i++) {
> > + p = hyp_virt_to_page(addr + (i * PAGE_SIZE));
> > + hyp_page_ref_dec_and_test(p);
>
> hyp_page_ref_dec() would be enough here.
>
Yes, we can use hyp_page_ref_dec here, thanks.
> > + }
> > +}
> > +
> > +int __pkvm_host_stage2_prepare_copy(struct kvm_pgtable_snapshot *snapshot)
> > +{
> > + size_t required_pgd_len;
> > + struct kvm_pgtable_mm_ops mm_ops = {0};
> > + struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
> > + struct kvm_pgtable *to_pgt, *from_pgt = &host_mmu.pgt;
> > + struct kvm_hyp_memcache *memcache = &snapshot->mc;
> > + int ret;
> > + void *pgd;
>
> The shared memory between host and hyp should probably be pinned here even if it
> is behind a DEBUG option. That would also ensure that the memory has actually
> been shared beforehand and we'll not crash while running that.
>
> Pinning that memory would also make sure that the right amount has been shared
> for the pgd and we'll not overflow.
>
After having the discussion offline I think it's better to change this
to the donation API from host -> hypervisor as the host doesn't need
share access while the snapshot is beeing done.
> > +
> > + required_pgd_len = kvm_pgtable_stage2_pgd_size(mmu->vtcr);
> > + if (snapshot->pgd_len < required_pgd_len)
> > + return -ENOMEM;
> > +
> > + to_pgt = &snapshot->pgtable;
> > + pgd = kern_hyp_va(snapshot->pgd_hva);
> > +
> > + hyp_spin_lock(&snapshot_pool_lock);
> > + hyp_pool_init(&snapshot_pool, hyp_virt_to_pfn(pgd),
> > + required_pgd_len / PAGE_SIZE, 0);
> > +
> > + mm_ops.zalloc_pages_exact = snapshot_zalloc_pages_exact;
> > + mm_ops.zalloc_page = snapshot_zalloc_page;
> > + mm_ops.free_pages_exact = snapshot_s2_free_pages_exact;
> > + mm_ops.get_page = snapshot_get_page;
> > + mm_ops.phys_to_virt = hyp_phys_to_virt;
> > + mm_ops.virt_to_phys = hyp_virt_to_phys;
> > + mm_ops.page_count = hyp_page_count;
>
> It looks a bit odd to me to have to handle refcount while the lifespan of that
> newly create page-table only the lifespace of that function, where everything is
> behind a lock.
>
I doubled checked on this and I don't think we need it so it can be
dropped, thanks.
> Is the need for the pool to handle the physically contiguous mem for the PGD? If
> that is the case, maybe you could avoid the pool by just passing as a separated
> argument the memory for the PGD and in another one the memcache for the other
> levels?
>
I found it convinient to reuse it as we are doing the same thing when we
build the host stage-2.
> Especially IIUC, the pool would be used here only for the PGD allocation, as you
> add only the size of the PGD to the pool?
>
Yes, we only populate the pool with memory for the PGD. Actually I
wonder if we can just allocate everything from the pool and drop the
memcache.
> >+
> > + to_pgt->ia_bits = from_pgt->ia_bits;
> > + to_pgt->start_level = from_pgt->start_level;
> > + to_pgt->flags = from_pgt->flags;
> > + to_pgt->mm_ops = &mm_ops;
> > +
> > + host_lock_component();
> > + ret = kvm_pgtable_stage2_copy(to_pgt, from_pgt, memcache);
> > + host_unlock_component();
> > +
> > + hyp_spin_unlock(&snapshot_pool_lock);
> > +
> > + return ret;
> > +}
> > +#endif /* CONFIG_NVHE_EL2_DEBUG */
> > +
> > void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
> > {
> > void *addr;
> > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> > index 1966fdee740e..46b15d74118f 100644
> > --- a/arch/arm64/kvm/hyp/pgtable.c
> > +++ b/arch/arm64/kvm/hyp/pgtable.c
> > @@ -1598,3 +1598,59 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p
> > WARN_ON(mm_ops->page_count(pgtable) != 1);
> > mm_ops->put_page(pgtable);
> > }
> > +
> > +#ifdef CONFIG_NVHE_EL2_DEBUG
> > +static int stage2_copy_walker(const struct kvm_pgtable_visit_ctx *ctx,
> > + enum kvm_pgtable_walk_flags visit)
> > +{
> > + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
> > + void *copy_table, *original_addr;
> > + kvm_pte_t new = ctx->old;
> > +
> > + if (!stage2_pte_is_counted(ctx->old))
> > + return 0;
> > +
> > + if (kvm_pte_table(ctx->old, ctx->level)) {
> > + copy_table = mm_ops->zalloc_page(ctx->arg);
> > + if (!copy_table)
> > + return -ENOMEM;
> > +
> > + original_addr = kvm_pte_follow(ctx->old, mm_ops);
> > +
> > + memcpy(copy_table, original_addr, PAGE_SIZE);
> > + new = kvm_init_table_pte(copy_table, mm_ops);
> > + }
> > +
> > + *ctx->ptep = new;
> > +
> > + return 0;
> > +}
> > +
> > +int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
> > + const struct kvm_pgtable *from_pgt,
> > + void *mc)
> > +{
> > + int ret;
> > + size_t pgd_sz;
> > + struct kvm_pgtable_mm_ops *mm_ops = to_pgt->mm_ops;
> > + struct kvm_pgtable_walker walker = {
> > + .cb = stage2_copy_walker,
> > + .flags = KVM_PGTABLE_WALK_LEAF |
> > + KVM_PGTABLE_WALK_TABLE_PRE,
>
> Do you need the WALK_LEAF? Entire tables are copied and you don't need to change
> anything at the leaf level. Actually, stage2_copy_walker() writes new =
> ctx->old in that case, doesn't it?
>
We need it because we invoke the callback on the _PRE flag if it is a
table and we invoke it on the _LEAF if it is a block mapping.
> > + .arg = mc
> > + };
> > +
> > + pgd_sz = kvm_pgd_pages(to_pgt->ia_bits, to_pgt->start_level) *
> > + PAGE_SIZE;
> > + to_pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
> > + if (!to_pgt->pgd)
> > + return -ENOMEM;
> > +
> > + memcpy(to_pgt->pgd, from_pgt->pgd, pgd_sz);
> > +
> > + ret = kvm_pgtable_walk(to_pgt, 0, BIT(to_pgt->ia_bits), &walker);
> > + mm_ops->free_pages_exact(to_pgt->pgd, pgd_sz);
>
> > +
> > + return ret;
> > +}
> > +#endif /* CONFIG_NVHE_EL2_DEBUG */
> > --
> > 2.43.0.rc0.421.g78406f8d94-goog
> >
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3 02/10] arm64: ptdump: Use the mask from the state structure
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
2023-11-15 17:16 ` [PATCH v3 01/10] KVM: arm64: Add snap shooting the host stage-2 pagetables Sebastian Ene
@ 2023-11-15 17:16 ` Sebastian Ene
2023-11-15 17:16 ` [PATCH v3 03/10] arm64: ptdump: Add the walker function to the ptdump info structure Sebastian Ene
` (8 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Sebastian Ene @ 2023-11-15 17:16 UTC (permalink / raw)
To: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team, vdonnefort,
qperret, smostafa, Sebastian Ene
Printing the descriptor attributes requires accessing a mask which has a
different set of attributes for stage-2. In preparation for adding support
for the stage-2 pagetables dumping, use the mask from the local context
and not from the globally defined pg_level array. Store a pointer to
the pg_level array in the ptdump state structure. This will allow us to
extract the mask which is wrapped in the pg_level array and use it for
descriptor parsing in the note_page.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/mm/ptdump.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index e305b6593c4e..8761a70f916f 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -75,6 +75,7 @@ static struct addr_marker address_markers[] = {
struct pg_state {
struct ptdump_state ptdump;
struct seq_file *seq;
+ struct pg_level *pg_level;
const struct addr_marker *marker;
unsigned long start_address;
int level;
@@ -252,11 +253,12 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
u64 val)
{
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ struct pg_level *pg_info = st->pg_level;
static const char units[] = "KMGTPE";
u64 prot = 0;
if (level >= 0)
- prot = val & pg_level[level].mask;
+ prot = val & pg_info[level].mask;
if (st->level == -1) {
st->level = level;
@@ -282,10 +284,10 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
unit++;
}
pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
- pg_level[st->level].name);
- if (st->current_prot && pg_level[st->level].bits)
- dump_prot(st, pg_level[st->level].bits,
- pg_level[st->level].num);
+ pg_info[st->level].name);
+ if (st->current_prot && pg_info[st->level].bits)
+ dump_prot(st, pg_info[st->level].bits,
+ pg_info[st->level].num);
pt_dump_seq_puts(st->seq, "\n");
if (addr >= st->marker[1].start_address) {
@@ -316,6 +318,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
st = (struct pg_state){
.seq = s,
.marker = info->markers,
+ .pg_level = &pg_level[0],
.level = -1,
.ptdump = {
.note_page = note_page,
@@ -353,6 +356,7 @@ void ptdump_check_wx(void)
{ 0, NULL},
{ -1, NULL},
},
+ .pg_level = &pg_level[0],
.level = -1,
.check_wx = true,
.ptdump = {
--
2.43.0.rc0.421.g78406f8d94-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v3 03/10] arm64: ptdump: Add the walker function to the ptdump info structure
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
2023-11-15 17:16 ` [PATCH v3 01/10] KVM: arm64: Add snap shooting the host stage-2 pagetables Sebastian Ene
2023-11-15 17:16 ` [PATCH v3 02/10] arm64: ptdump: Use the mask from the state structure Sebastian Ene
@ 2023-11-15 17:16 ` Sebastian Ene
2023-11-15 17:16 ` [PATCH v3 04/10] KVM: arm64: Move pagetable definitions to common header Sebastian Ene
` (7 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Sebastian Ene @ 2023-11-15 17:16 UTC (permalink / raw)
To: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team, vdonnefort,
qperret, smostafa, Sebastian Ene
Stage-2 needs a dedicated walk function to be able to parse concatenated
pagetables. The ptdump info structure is used to hold different
configuration options for the walker. This structure is registered with
the debugfs entry and is stored in the argument for the debugfs file.
Hence, in preparation for parsing the stage-2 pagetables add the walk
function as an argument for the debugfs file.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/ptdump.h | 1 +
arch/arm64/mm/ptdump.c | 1 +
arch/arm64/mm/ptdump_debugfs.c | 3 ++-
3 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 581caac525b0..1f6e0aabf16a 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -19,6 +19,7 @@ struct ptdump_info {
struct mm_struct *mm;
const struct addr_marker *markers;
unsigned long base_addr;
+ void (*ptdump_walk)(struct seq_file *s, struct ptdump_info *info);
};
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 8761a70f916f..d531e24ea0b2 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -346,6 +346,7 @@ static struct ptdump_info kernel_ptdump_info = {
.mm = &init_mm,
.markers = address_markers,
.base_addr = PAGE_OFFSET,
+ .ptdump_walk = &ptdump_walk,
};
void ptdump_check_wx(void)
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 68bf1a125502..7564519db1e6 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -10,7 +10,8 @@ static int ptdump_show(struct seq_file *m, void *v)
struct ptdump_info *info = m->private;
get_online_mems();
- ptdump_walk(m, info);
+ if (info->ptdump_walk)
+ info->ptdump_walk(m, info);
put_online_mems();
return 0;
}
--
2.43.0.rc0.421.g78406f8d94-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v3 04/10] KVM: arm64: Move pagetable definitions to common header
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (2 preceding siblings ...)
2023-11-15 17:16 ` [PATCH v3 03/10] arm64: ptdump: Add the walker function to the ptdump info structure Sebastian Ene
@ 2023-11-15 17:16 ` Sebastian Ene
2023-11-15 17:16 ` [PATCH v3 05/10] arm64: ptdump: Add hooks on debugfs file operations Sebastian Ene
` (6 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Sebastian Ene @ 2023-11-15 17:16 UTC (permalink / raw)
To: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team, vdonnefort,
qperret, smostafa, Sebastian Ene
In preparation for using the stage-2 definitions in ptdump, move some of
these macros in the common header.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/kvm_pgtable.h | 42 ++++++++++++++++++++++++++++
arch/arm64/kvm/hyp/pgtable.c | 42 ----------------------------
2 files changed, 42 insertions(+), 42 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index be615700f8ac..913f34d75b29 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -45,6 +45,48 @@ typedef u64 kvm_pte_t;
#define KVM_PHYS_INVALID (-1ULL)
+#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
+
+#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
+
+#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
+
+#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
+ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
+ KVM_PTE_LEAF_ATTR_HI_S2_XN)
+
+#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
+#define KVM_MAX_OWNER_ID 1
+
+/*
+ * Used to indicate a pte for which a 'break-before-make' sequence is in
+ * progress.
+ */
+#define KVM_INVALID_PTE_LOCKED BIT(10)
+
static inline bool kvm_pte_valid(kvm_pte_t pte)
{
return pte & KVM_PTE_VALID;
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 46b15d74118f..65643320a6bb 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -17,48 +17,6 @@
#define KVM_PTE_TYPE_PAGE 1
#define KVM_PTE_TYPE_TABLE 1
-#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
-
-#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
- ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
- ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
-#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
-#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
-#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
-
-#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
-#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
-#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
-#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
-#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
-#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
-
-#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
-
-#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
-
-#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
-
-#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
-
-#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
-
-#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
- KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
- KVM_PTE_LEAF_ATTR_HI_S2_XN)
-
-#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
-#define KVM_MAX_OWNER_ID 1
-
-/*
- * Used to indicate a pte for which a 'break-before-make' sequence is in
- * progress.
- */
-#define KVM_INVALID_PTE_LOCKED BIT(10)
-
struct kvm_pgtable_walk_data {
struct kvm_pgtable_walker *walker;
--
2.43.0.rc0.421.g78406f8d94-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v3 05/10] arm64: ptdump: Add hooks on debugfs file operations
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (3 preceding siblings ...)
2023-11-15 17:16 ` [PATCH v3 04/10] KVM: arm64: Move pagetable definitions to common header Sebastian Ene
@ 2023-11-15 17:16 ` Sebastian Ene
2023-11-22 14:36 ` Vincent Donnefort
2023-11-15 17:16 ` [PATCH v3 06/10] arm64: ptdump: Register a debugfs entry for the host stage-2 tables Sebastian Ene
` (5 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Sebastian Ene @ 2023-11-15 17:16 UTC (permalink / raw)
To: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team, vdonnefort,
qperret, smostafa, Sebastian Ene
Introduce callbacks invoked when the debugfs entry is accessed from
userspace. This hooks will allow us to allocate and prepare the memory
resources used by ptdump when the debugfs file is opened/closed.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/ptdump.h | 7 +++++
arch/arm64/mm/ptdump_debugfs.c | 53 +++++++++++++++++++++++++++++++--
2 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 1f6e0aabf16a..9b2bebfcefbe 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -20,9 +20,16 @@ struct ptdump_info {
const struct addr_marker *markers;
unsigned long base_addr;
void (*ptdump_walk)(struct seq_file *s, struct ptdump_info *info);
+ int (*ptdump_prepare_walk)(void *file_priv);
+ void (*ptdump_end_walk)(void *file_priv);
};
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
+
+struct ptdump_info_file_priv {
+ struct ptdump_info info;
+ void *file_priv;
+};
#ifdef CONFIG_PTDUMP_DEBUGFS
#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 7564519db1e6..3bf5de51e8c3 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -7,7 +7,8 @@
static int ptdump_show(struct seq_file *m, void *v)
{
- struct ptdump_info *info = m->private;
+ struct ptdump_info_file_priv *f_priv = m->private;
+ struct ptdump_info *info = &f_priv->info;
get_online_mems();
if (info->ptdump_walk)
@@ -15,7 +16,55 @@ static int ptdump_show(struct seq_file *m, void *v)
put_online_mems();
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(ptdump);
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+ int ret;
+ struct ptdump_info *info = inode->i_private;
+ struct ptdump_info_file_priv *f_priv;
+
+ f_priv = kzalloc(sizeof(struct ptdump_info_file_priv), GFP_KERNEL);
+ if (!f_priv)
+ return -ENOMEM;
+
+ memcpy(&f_priv->info, info, sizeof(*info));
+
+ ret = single_open(file, ptdump_show, f_priv);
+ if (ret) {
+ kfree(f_priv);
+ return ret;
+ }
+
+ if (info->ptdump_prepare_walk) {
+ ret = info->ptdump_prepare_walk(f_priv);
+ if (ret)
+ kfree(f_priv);
+ }
+
+ return ret;
+}
+
+static int ptdump_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *f = file->private_data;
+ struct ptdump_info_file_priv *f_priv = f->private;
+ struct ptdump_info *info = &f_priv->info;
+
+ if (info->ptdump_end_walk)
+ info->ptdump_end_walk(f_priv);
+
+ kfree(f_priv);
+
+ return single_release(inode, file);
+}
+
+static const struct file_operations ptdump_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = ptdump_release,
+};
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name)
{
--
2.43.0.rc0.421.g78406f8d94-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v3 05/10] arm64: ptdump: Add hooks on debugfs file operations
2023-11-15 17:16 ` [PATCH v3 05/10] arm64: ptdump: Add hooks on debugfs file operations Sebastian Ene
@ 2023-11-22 14:36 ` Vincent Donnefort
0 siblings, 0 replies; 22+ messages in thread
From: Vincent Donnefort @ 2023-11-22 14:36 UTC (permalink / raw)
To: Sebastian Ene
Cc: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team, qperret, smostafa
On Wed, Nov 15, 2023 at 05:16:35PM +0000, Sebastian Ene wrote:
> Introduce callbacks invoked when the debugfs entry is accessed from
> userspace. This hooks will allow us to allocate and prepare the memory
> resources used by ptdump when the debugfs file is opened/closed.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
> arch/arm64/include/asm/ptdump.h | 7 +++++
> arch/arm64/mm/ptdump_debugfs.c | 53 +++++++++++++++++++++++++++++++--
> 2 files changed, 58 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
> index 1f6e0aabf16a..9b2bebfcefbe 100644
> --- a/arch/arm64/include/asm/ptdump.h
> +++ b/arch/arm64/include/asm/ptdump.h
> @@ -20,9 +20,16 @@ struct ptdump_info {
> const struct addr_marker *markers;
> unsigned long base_addr;
> void (*ptdump_walk)(struct seq_file *s, struct ptdump_info *info);
> + int (*ptdump_prepare_walk)(void *file_priv);
> + void (*ptdump_end_walk)(void *file_priv);
> };
>
> void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
> +
> +struct ptdump_info_file_priv {
> + struct ptdump_info info;
> + void *file_priv;
> +};
> #ifdef CONFIG_PTDUMP_DEBUGFS
> #define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
> void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
> diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
> index 7564519db1e6..3bf5de51e8c3 100644
> --- a/arch/arm64/mm/ptdump_debugfs.c
> +++ b/arch/arm64/mm/ptdump_debugfs.c
> @@ -7,7 +7,8 @@
>
> static int ptdump_show(struct seq_file *m, void *v)
> {
> - struct ptdump_info *info = m->private;
> + struct ptdump_info_file_priv *f_priv = m->private;
> + struct ptdump_info *info = &f_priv->info;
>
> get_online_mems();
> if (info->ptdump_walk)
> @@ -15,7 +16,55 @@ static int ptdump_show(struct seq_file *m, void *v)
> put_online_mems();
> return 0;
> }
> -DEFINE_SHOW_ATTRIBUTE(ptdump);
> +
> +static int ptdump_open(struct inode *inode, struct file *file)
> +{
> + int ret;
> + struct ptdump_info *info = inode->i_private;
> + struct ptdump_info_file_priv *f_priv;
> +
> + f_priv = kzalloc(sizeof(struct ptdump_info_file_priv), GFP_KERNEL);
> + if (!f_priv)
> + return -ENOMEM;
> +
> + memcpy(&f_priv->info, info, sizeof(*info));
That doesn't look right. Why would reading the file need a copy of that?
Also, that is a lot of "priv" it's hard to understand which is which.
I suppose you need have the description of the pgtable which is created at the
same time as the debugfs entry is registered.
And you have (or not) the snapshot of the pgtable, which is created only on the
file open.
> +
> + ret = single_open(file, ptdump_show, f_priv);
> + if (ret) {
> + kfree(f_priv);
> + return ret;
> + }
> +
> + if (info->ptdump_prepare_walk) {
> + ret = info->ptdump_prepare_walk(f_priv);
> + if (ret)
> + kfree(f_priv);
> + }
> +
> + return ret;
> +}
> +
[...]
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3 06/10] arm64: ptdump: Register a debugfs entry for the host stage-2 tables
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (4 preceding siblings ...)
2023-11-15 17:16 ` [PATCH v3 05/10] arm64: ptdump: Add hooks on debugfs file operations Sebastian Ene
@ 2023-11-15 17:16 ` Sebastian Ene
2023-11-21 17:13 ` Vincent Donnefort
2023-11-15 17:16 ` [PATCH v3 07/10] arm64: ptdump: Parse the host stage-2 page-tables from the snapshot Sebastian Ene
` (4 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Sebastian Ene @ 2023-11-15 17:16 UTC (permalink / raw)
To: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team, vdonnefort,
qperret, smostafa, Sebastian Ene
Initialize a structures used to keep the state of the host stage-2 ptdump
walker when pKVM is enabled. Create a new debugfs entry for the host
stage-2 pagetables and hook the callbacks invoked when the entry is
accessed. When the debugfs file is opened, allocate memory resources which
will be shared with the hypervisor for saving the pagetable snapshot.
On close release the associated memory and we unshare it from the
hypervisor.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/ptdump.h | 12 +++
arch/arm64/kvm/Kconfig | 13 +++
arch/arm64/kvm/arm.c | 2 +
arch/arm64/mm/ptdump.c | 168 ++++++++++++++++++++++++++++++++
arch/arm64/mm/ptdump_debugfs.c | 8 +-
5 files changed, 202 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 9b2bebfcefbe..de5a5a0c0ecf 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -22,6 +22,7 @@ struct ptdump_info {
void (*ptdump_walk)(struct seq_file *s, struct ptdump_info *info);
int (*ptdump_prepare_walk)(void *file_priv);
void (*ptdump_end_walk)(void *file_priv);
+ size_t mc_len;
};
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
@@ -33,13 +34,24 @@ struct ptdump_info_file_priv {
#ifdef CONFIG_PTDUMP_DEBUGFS
#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
+void ptdump_debugfs_kvm_register(struct ptdump_info *info, const char *name,
+ struct dentry *d_entry);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
+static inline void ptdump_debugfs_kvm_register(struct ptdump_info *info,
+ const char *name,
+ struct dentry *d_entry) { }
#endif
void ptdump_check_wx(void);
#endif /* CONFIG_PTDUMP_CORE */
+#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+void ptdump_register_host_stage2(void);
+#else
+static inline void ptdump_register_host_stage2(void) { }
+#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
+
#ifdef CONFIG_DEBUG_WX
#define debug_checkwx() ptdump_check_wx()
#else
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 83c1e09be42e..cf5b7f06b152 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -71,4 +71,17 @@ config PROTECTED_NVHE_STACKTRACE
If unsure, or not using protected nVHE (pKVM), say N.
+config PTDUMP_STAGE2_DEBUGFS
+ bool "Present the stage-2 pagetables to debugfs"
+ depends on NVHE_EL2_DEBUG && PTDUMP_DEBUGFS && KVM
+ default n
+ help
+ Say Y here if you want to show the stage-2 kernel pagetables
+ layout in a debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+
+ If in doubt, say N.
+
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e5f75f1f1085..987683650576 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -28,6 +28,7 @@
#include <linux/uaccess.h>
#include <asm/ptrace.h>
+#include <asm/ptdump.h>
#include <asm/mman.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
@@ -2592,6 +2593,7 @@ static __init int kvm_arm_init(void)
if (err)
goto out_subs;
+ ptdump_register_host_stage2();
kvm_arm_initialised = true;
return 0;
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index d531e24ea0b2..0b4cb54e43ff 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -24,6 +24,9 @@
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptdump.h>
+#include <asm/kvm_pkvm.h>
+#include <asm/kvm_pgtable.h>
+#include <asm/kvm_host.h>
enum address_markers_idx {
@@ -378,6 +381,170 @@ void ptdump_check_wx(void)
pr_info("Checked W+X mappings: passed, no W+X pages found\n");
}
+#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+static struct ptdump_info stage2_kernel_ptdump_info;
+
+static phys_addr_t ptdump_host_pa(void *addr)
+{
+ return __pa(addr);
+}
+
+static void *ptdump_host_va(phys_addr_t phys)
+{
+ return __va(phys);
+}
+
+static size_t stage2_get_pgd_len(void)
+{
+ u64 mmfr0, mmfr1, vtcr;
+ u32 phys_shift = get_kvm_ipa_limit();
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
+
+ return kvm_pgtable_stage2_pgd_size(vtcr);
+}
+
+static int stage2_ptdump_prepare_walk(void *file_priv)
+{
+ struct ptdump_info_file_priv *f_priv = file_priv;
+ struct ptdump_info *info = &f_priv->info;
+ struct kvm_pgtable_snapshot *snapshot;
+ int ret, pgd_index, mc_index, pgd_pages_sz;
+ void *page_hva;
+ phys_addr_t pgd;
+
+ snapshot = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
+ if (!snapshot)
+ return -ENOMEM;
+
+ memset(snapshot, 0, PAGE_SIZE);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn(snapshot));
+ if (ret)
+ goto free_snapshot;
+
+ snapshot->pgd_len = stage2_get_pgd_len();
+ pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
+ snapshot->pgd_hva = alloc_pages_exact(snapshot->pgd_len,
+ GFP_KERNEL_ACCOUNT);
+ if (!snapshot->pgd_hva) {
+ ret = -ENOMEM;
+ goto unshare_snapshot;
+ }
+
+ for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+ virt_to_pfn(page_hva));
+ if (ret)
+ goto unshare_pgd_pages;
+ }
+
+ for (mc_index = 0; mc_index < info->mc_len; mc_index++) {
+ page_hva = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
+ if (!page_hva) {
+ ret = -ENOMEM;
+ goto free_memcache_pages;
+ }
+
+ push_hyp_memcache(&snapshot->mc, page_hva, ptdump_host_pa);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+ virt_to_pfn(page_hva));
+ if (ret) {
+ pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ goto free_memcache_pages;
+ }
+ }
+
+ ret = kvm_call_hyp_nvhe(__pkvm_copy_host_stage2, snapshot);
+ if (ret)
+ goto free_memcache_pages;
+
+ pgd = (phys_addr_t)snapshot->pgtable.pgd;
+ snapshot->pgtable.pgd = phys_to_virt(pgd);
+ f_priv->file_priv = snapshot;
+ return 0;
+
+free_memcache_pages:
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ while (page_hva) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ }
+unshare_pgd_pages:
+ pgd_index = pgd_index - 1;
+ for (; pgd_index >= 0; pgd_index--) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ }
+ free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
+unshare_snapshot:
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(snapshot)));
+free_snapshot:
+ free_pages_exact(snapshot, PAGE_SIZE);
+ f_priv->file_priv = NULL;
+ return ret;
+}
+
+static void stage2_ptdump_end_walk(void *file_priv)
+{
+ struct ptdump_info_file_priv *f_priv = file_priv;
+ struct kvm_pgtable_snapshot *snapshot = f_priv->file_priv;
+ void *page_hva;
+ int pgd_index, ret, pgd_pages_sz;
+
+ if (!snapshot)
+ return;
+
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ while (page_hva) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ }
+
+ pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
+ for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ }
+
+ free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(snapshot)));
+ free_pages_exact(snapshot, PAGE_SIZE);
+ f_priv->file_priv = NULL;
+}
+
+void ptdump_register_host_stage2(void)
+{
+ if (!is_protected_kvm_enabled())
+ return;
+
+ stage2_kernel_ptdump_info = (struct ptdump_info) {
+ .mc_len = host_s2_pgtable_pages(),
+ .ptdump_prepare_walk = stage2_ptdump_prepare_walk,
+ .ptdump_end_walk = stage2_ptdump_end_walk,
+ };
+
+ ptdump_debugfs_kvm_register(&stage2_kernel_ptdump_info,
+ "host_stage2_page_tables",
+ kvm_debugfs_dir);
+}
+#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
+
static int __init ptdump_init(void)
{
address_markers[PAGE_END_NR].start_address = PAGE_END;
@@ -386,6 +553,7 @@ static int __init ptdump_init(void)
#endif
ptdump_initialize();
ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
+
return 0;
}
device_initcall(ptdump_init);
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 3bf5de51e8c3..4821dbef784c 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -68,5 +68,11 @@ static const struct file_operations ptdump_fops = {
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name)
{
- debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
+ ptdump_debugfs_kvm_register(info, name, NULL);
+}
+
+void ptdump_debugfs_kvm_register(struct ptdump_info *info, const char *name,
+ struct dentry *d_entry)
+{
+ debugfs_create_file(name, 0400, d_entry, info, &ptdump_fops);
}
--
2.43.0.rc0.421.g78406f8d94-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v3 06/10] arm64: ptdump: Register a debugfs entry for the host stage-2 tables
2023-11-15 17:16 ` [PATCH v3 06/10] arm64: ptdump: Register a debugfs entry for the host stage-2 tables Sebastian Ene
@ 2023-11-21 17:13 ` Vincent Donnefort
2023-11-23 14:48 ` Sebastian Ene
0 siblings, 1 reply; 22+ messages in thread
From: Vincent Donnefort @ 2023-11-21 17:13 UTC (permalink / raw)
To: Sebastian Ene
Cc: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team, qperret, smostafa
On Wed, Nov 15, 2023 at 05:16:36PM +0000, Sebastian Ene wrote:
> Initialize a structures used to keep the state of the host stage-2 ptdump
> walker when pKVM is enabled. Create a new debugfs entry for the host
> stage-2 pagetables and hook the callbacks invoked when the entry is
> accessed. When the debugfs file is opened, allocate memory resources which
> will be shared with the hypervisor for saving the pagetable snapshot.
> On close release the associated memory and we unshare it from the
> hypervisor.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
> arch/arm64/include/asm/ptdump.h | 12 +++
> arch/arm64/kvm/Kconfig | 13 +++
> arch/arm64/kvm/arm.c | 2 +
> arch/arm64/mm/ptdump.c | 168 ++++++++++++++++++++++++++++++++
> arch/arm64/mm/ptdump_debugfs.c | 8 +-
> 5 files changed, 202 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
> index 9b2bebfcefbe..de5a5a0c0ecf 100644
> --- a/arch/arm64/include/asm/ptdump.h
> +++ b/arch/arm64/include/asm/ptdump.h
> @@ -22,6 +22,7 @@ struct ptdump_info {
> void (*ptdump_walk)(struct seq_file *s, struct ptdump_info *info);
> int (*ptdump_prepare_walk)(void *file_priv);
> void (*ptdump_end_walk)(void *file_priv);
> + size_t mc_len;
> };
>
> void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
> @@ -33,13 +34,24 @@ struct ptdump_info_file_priv {
> #ifdef CONFIG_PTDUMP_DEBUGFS
> #define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
> void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
> +void ptdump_debugfs_kvm_register(struct ptdump_info *info, const char *name,
> + struct dentry *d_entry);
> #else
> static inline void ptdump_debugfs_register(struct ptdump_info *info,
> const char *name) { }
> +static inline void ptdump_debugfs_kvm_register(struct ptdump_info *info,
> + const char *name,
> + struct dentry *d_entry) { }
> #endif
> void ptdump_check_wx(void);
> #endif /* CONFIG_PTDUMP_CORE */
>
> +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
> +void ptdump_register_host_stage2(void);
> +#else
> +static inline void ptdump_register_host_stage2(void) { }
> +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
> +
> #ifdef CONFIG_DEBUG_WX
> #define debug_checkwx() ptdump_check_wx()
> #else
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index 83c1e09be42e..cf5b7f06b152 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -71,4 +71,17 @@ config PROTECTED_NVHE_STACKTRACE
>
> If unsure, or not using protected nVHE (pKVM), say N.
>
> +config PTDUMP_STAGE2_DEBUGFS
> + bool "Present the stage-2 pagetables to debugfs"
> + depends on NVHE_EL2_DEBUG && PTDUMP_DEBUGFS && KVM
> + default n
> + help
> + Say Y here if you want to show the stage-2 kernel pagetables
> + layout in a debugfs file. This information is only useful for kernel developers
> + who are working in architecture specific areas of the kernel.
> + It is probably not a good idea to enable this feature in a production
> + kernel.
> +
> + If in doubt, say N.
> +
> endif # VIRTUALIZATION
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index e5f75f1f1085..987683650576 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -28,6 +28,7 @@
>
> #include <linux/uaccess.h>
> #include <asm/ptrace.h>
> +#include <asm/ptdump.h>
> #include <asm/mman.h>
> #include <asm/tlbflush.h>
> #include <asm/cacheflush.h>
> @@ -2592,6 +2593,7 @@ static __init int kvm_arm_init(void)
> if (err)
> goto out_subs;
>
> + ptdump_register_host_stage2();
> kvm_arm_initialised = true;
>
> return 0;
> diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
> index d531e24ea0b2..0b4cb54e43ff 100644
> --- a/arch/arm64/mm/ptdump.c
> +++ b/arch/arm64/mm/ptdump.c
> @@ -24,6 +24,9 @@
> #include <asm/memory.h>
> #include <asm/pgtable-hwdef.h>
> #include <asm/ptdump.h>
> +#include <asm/kvm_pkvm.h>
> +#include <asm/kvm_pgtable.h>
> +#include <asm/kvm_host.h>
>
>
> enum address_markers_idx {
> @@ -378,6 +381,170 @@ void ptdump_check_wx(void)
> pr_info("Checked W+X mappings: passed, no W+X pages found\n");
> }
>
> +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
> +static struct ptdump_info stage2_kernel_ptdump_info;
> +
> +static phys_addr_t ptdump_host_pa(void *addr)
> +{
> + return __pa(addr);
> +}
> +
> +static void *ptdump_host_va(phys_addr_t phys)
> +{
> + return __va(phys);
> +}
> +
> +static size_t stage2_get_pgd_len(void)
> +{
> + u64 mmfr0, mmfr1, vtcr;
> + u32 phys_shift = get_kvm_ipa_limit();
> +
> + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
> + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
> + vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
> +
> + return kvm_pgtable_stage2_pgd_size(vtcr);
That's a lot of conversions to go from the kvm_ipa_limit to VTCR and
VTCR back to ia_bits and the start level, but that would mean rewrite pieces of
pgtable.c there. :-\
> +}
> +
> +static int stage2_ptdump_prepare_walk(void *file_priv)
> +{
> + struct ptdump_info_file_priv *f_priv = file_priv;
> + struct ptdump_info *info = &f_priv->info;
> + struct kvm_pgtable_snapshot *snapshot;
> + int ret, pgd_index, mc_index, pgd_pages_sz;
> + void *page_hva;
> + phys_addr_t pgd;
> +
> + snapshot = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
> + if (!snapshot)
> + return -ENOMEM;
For a single page, __get_free_page is enough.
> +
> + memset(snapshot, 0, PAGE_SIZE);
> + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn(snapshot));
> + if (ret)
> + goto free_snapshot;
It'd probably be better to not share anything here, and let the hypervisor do
host_donate_hyp() and hyp_donate_host() before returning back from the HVC. This
way the hypervisor will protect itself.
> +
> + snapshot->pgd_len = stage2_get_pgd_len();
> + pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
> + snapshot->pgd_hva = alloc_pages_exact(snapshot->pgd_len,
> + GFP_KERNEL_ACCOUNT);
> + if (!snapshot->pgd_hva) {
> + ret = -ENOMEM;
> + goto unshare_snapshot;
> + }
> +
> + for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
> + page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
> + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
> + virt_to_pfn(page_hva));
> + if (ret)
> + goto unshare_pgd_pages;
> + }
> +
> + for (mc_index = 0; mc_index < info->mc_len; mc_index++) {
> + page_hva = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
ditto.
> + if (!page_hva) {
> + ret = -ENOMEM;
> + goto free_memcache_pages;
> + }
> +
> + push_hyp_memcache(&snapshot->mc, page_hva, ptdump_host_pa);
> + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
> + virt_to_pfn(page_hva));
> + if (ret) {
> + pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
> + free_pages_exact(page_hva, PAGE_SIZE);
> + goto free_memcache_pages;
> + }
Maybe for the page-table pages, it'd be better to let the hyp does the
host_donate_hyp() / hyp_donate_host()? It might be easier than sharing + pin.
> + }
> +
> + ret = kvm_call_hyp_nvhe(__pkvm_copy_host_stage2, snapshot);
> + if (ret)
> + goto free_memcache_pages;
> +
> + pgd = (phys_addr_t)snapshot->pgtable.pgd;
> + snapshot->pgtable.pgd = phys_to_virt(pgd);
> + f_priv->file_priv = snapshot;
> + return 0;
> +
> +free_memcache_pages:
> + page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
> + while (page_hva) {
> + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> + virt_to_pfn(page_hva));
> + WARN_ON(ret);
> + free_pages_exact(page_hva, PAGE_SIZE);
> + page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
> + }
> +unshare_pgd_pages:
> + pgd_index = pgd_index - 1;
> + for (; pgd_index >= 0; pgd_index--) {
> + page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
> + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> + virt_to_pfn(page_hva));
> + WARN_ON(ret);
> + }
> + free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
> +unshare_snapshot:
> + WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> + virt_to_pfn(snapshot)));
> +free_snapshot:
> + free_pages_exact(snapshot, PAGE_SIZE);
> + f_priv->file_priv = NULL;
> + return ret;
Couldn't this path be merged with stage2_ptdump_end_walk()?
> +}
> +
> +static void stage2_ptdump_end_walk(void *file_priv)
> +{
> + struct ptdump_info_file_priv *f_priv = file_priv;
> + struct kvm_pgtable_snapshot *snapshot = f_priv->file_priv;
> + void *page_hva;
> + int pgd_index, ret, pgd_pages_sz;
> +
> + if (!snapshot)
> + return;
> +
> + page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
> + while (page_hva) {
> + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> + virt_to_pfn(page_hva));
> + WARN_ON(ret);
> + free_pages_exact(page_hva, PAGE_SIZE);
> + page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
> + }
> +
> + pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
> + for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
> + page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
> + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> + virt_to_pfn(page_hva));
> + WARN_ON(ret);
> + }
> +
> + free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
> + WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> + virt_to_pfn(snapshot)));
> + free_pages_exact(snapshot, PAGE_SIZE);
> + f_priv->file_priv = NULL;
> +}
> +
> +void ptdump_register_host_stage2(void)
> +{
> + if (!is_protected_kvm_enabled())
> + return;
> +
> + stage2_kernel_ptdump_info = (struct ptdump_info) {
> + .mc_len = host_s2_pgtable_pages(),
> + .ptdump_prepare_walk = stage2_ptdump_prepare_walk,
> + .ptdump_end_walk = stage2_ptdump_end_walk,
> + };
> +
> + ptdump_debugfs_kvm_register(&stage2_kernel_ptdump_info,
> + "host_stage2_page_tables",
> + kvm_debugfs_dir);
> +}
> +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
> +
> static int __init ptdump_init(void)
> {
> address_markers[PAGE_END_NR].start_address = PAGE_END;
> @@ -386,6 +553,7 @@ static int __init ptdump_init(void)
> #endif
> ptdump_initialize();
> ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
> +
Not needed.
> return 0;
> }
> device_initcall(ptdump_init);
> diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
> index 3bf5de51e8c3..4821dbef784c 100644
> --- a/arch/arm64/mm/ptdump_debugfs.c
> +++ b/arch/arm64/mm/ptdump_debugfs.c
> @@ -68,5 +68,11 @@ static const struct file_operations ptdump_fops = {
>
> void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name)
> {
> - debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
> + ptdump_debugfs_kvm_register(info, name, NULL);
Not really related to kvm, the only difference is passing or not a dentry.
How about a single (non __init) function?
> +}
> +
> +void ptdump_debugfs_kvm_register(struct ptdump_info *info, const char *name,
> + struct dentry *d_entry)
> +{
> + debugfs_create_file(name, 0400, d_entry, info, &ptdump_fops);
> }
> --
> 2.43.0.rc0.421.g78406f8d94-goog
>
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v3 06/10] arm64: ptdump: Register a debugfs entry for the host stage-2 tables
2023-11-21 17:13 ` Vincent Donnefort
@ 2023-11-23 14:48 ` Sebastian Ene
0 siblings, 0 replies; 22+ messages in thread
From: Sebastian Ene @ 2023-11-23 14:48 UTC (permalink / raw)
To: Vincent Donnefort
Cc: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz, kvmarm,
linux-arm-kernel, linux-kernel, kernel-team, qperret, smostafa
On Tue, Nov 21, 2023 at 05:13:41PM +0000, Vincent Donnefort wrote:
> On Wed, Nov 15, 2023 at 05:16:36PM +0000, Sebastian Ene wrote:
Hi,
> > Initialize a structures used to keep the state of the host stage-2 ptdump
> > walker when pKVM is enabled. Create a new debugfs entry for the host
> > stage-2 pagetables and hook the callbacks invoked when the entry is
> > accessed. When the debugfs file is opened, allocate memory resources which
> > will be shared with the hypervisor for saving the pagetable snapshot.
> > On close release the associated memory and we unshare it from the
> > hypervisor.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> > arch/arm64/include/asm/ptdump.h | 12 +++
> > arch/arm64/kvm/Kconfig | 13 +++
> > arch/arm64/kvm/arm.c | 2 +
> > arch/arm64/mm/ptdump.c | 168 ++++++++++++++++++++++++++++++++
> > arch/arm64/mm/ptdump_debugfs.c | 8 +-
> > 5 files changed, 202 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
> > index 9b2bebfcefbe..de5a5a0c0ecf 100644
> > --- a/arch/arm64/include/asm/ptdump.h
> > +++ b/arch/arm64/include/asm/ptdump.h
> > @@ -22,6 +22,7 @@ struct ptdump_info {
> > void (*ptdump_walk)(struct seq_file *s, struct ptdump_info *info);
> > int (*ptdump_prepare_walk)(void *file_priv);
> > void (*ptdump_end_walk)(void *file_priv);
> > + size_t mc_len;
> > };
> >
> > void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
> > @@ -33,13 +34,24 @@ struct ptdump_info_file_priv {
> > #ifdef CONFIG_PTDUMP_DEBUGFS
> > #define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
> > void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
> > +void ptdump_debugfs_kvm_register(struct ptdump_info *info, const char *name,
> > + struct dentry *d_entry);
> > #else
> > static inline void ptdump_debugfs_register(struct ptdump_info *info,
> > const char *name) { }
> > +static inline void ptdump_debugfs_kvm_register(struct ptdump_info *info,
> > + const char *name,
> > + struct dentry *d_entry) { }
> > #endif
> > void ptdump_check_wx(void);
> > #endif /* CONFIG_PTDUMP_CORE */
> >
> > +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
> > +void ptdump_register_host_stage2(void);
> > +#else
> > +static inline void ptdump_register_host_stage2(void) { }
> > +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
> > +
> > #ifdef CONFIG_DEBUG_WX
> > #define debug_checkwx() ptdump_check_wx()
> > #else
> > diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> > index 83c1e09be42e..cf5b7f06b152 100644
> > --- a/arch/arm64/kvm/Kconfig
> > +++ b/arch/arm64/kvm/Kconfig
> > @@ -71,4 +71,17 @@ config PROTECTED_NVHE_STACKTRACE
> >
> > If unsure, or not using protected nVHE (pKVM), say N.
> >
> > +config PTDUMP_STAGE2_DEBUGFS
> > + bool "Present the stage-2 pagetables to debugfs"
> > + depends on NVHE_EL2_DEBUG && PTDUMP_DEBUGFS && KVM
> > + default n
> > + help
> > + Say Y here if you want to show the stage-2 kernel pagetables
> > + layout in a debugfs file. This information is only useful for kernel developers
> > + who are working in architecture specific areas of the kernel.
> > + It is probably not a good idea to enable this feature in a production
> > + kernel.
> > +
> > + If in doubt, say N.
> > +
> > endif # VIRTUALIZATION
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index e5f75f1f1085..987683650576 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -28,6 +28,7 @@
> >
> > #include <linux/uaccess.h>
> > #include <asm/ptrace.h>
> > +#include <asm/ptdump.h>
> > #include <asm/mman.h>
> > #include <asm/tlbflush.h>
> > #include <asm/cacheflush.h>
> > @@ -2592,6 +2593,7 @@ static __init int kvm_arm_init(void)
> > if (err)
> > goto out_subs;
> >
> > + ptdump_register_host_stage2();
> > kvm_arm_initialised = true;
> >
> > return 0;
> > diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
> > index d531e24ea0b2..0b4cb54e43ff 100644
> > --- a/arch/arm64/mm/ptdump.c
> > +++ b/arch/arm64/mm/ptdump.c
> > @@ -24,6 +24,9 @@
> > #include <asm/memory.h>
> > #include <asm/pgtable-hwdef.h>
> > #include <asm/ptdump.h>
> > +#include <asm/kvm_pkvm.h>
> > +#include <asm/kvm_pgtable.h>
> > +#include <asm/kvm_host.h>
> >
> >
> > enum address_markers_idx {
> > @@ -378,6 +381,170 @@ void ptdump_check_wx(void)
> > pr_info("Checked W+X mappings: passed, no W+X pages found\n");
> > }
> >
> > +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
> > +static struct ptdump_info stage2_kernel_ptdump_info;
> > +
> > +static phys_addr_t ptdump_host_pa(void *addr)
> > +{
> > + return __pa(addr);
> > +}
> > +
> > +static void *ptdump_host_va(phys_addr_t phys)
> > +{
> > + return __va(phys);
> > +}
> > +
> > +static size_t stage2_get_pgd_len(void)
> > +{
> > + u64 mmfr0, mmfr1, vtcr;
> > + u32 phys_shift = get_kvm_ipa_limit();
> > +
> > + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
> > + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
> > + vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
> > +
> > + return kvm_pgtable_stage2_pgd_size(vtcr);
>
> That's a lot of conversions to go from the kvm_ipa_limit to VTCR and
> VTCR back to ia_bits and the start level, but that would mean rewrite pieces of
> pgtable.c there. :-\
>
Right, I think with Oliver's suggestion we will no longer have to move
these bits around and the code will be self contained under /kvm.
> > +}
> > +
> > +static int stage2_ptdump_prepare_walk(void *file_priv)
> > +{
> > + struct ptdump_info_file_priv *f_priv = file_priv;
> > + struct ptdump_info *info = &f_priv->info;
> > + struct kvm_pgtable_snapshot *snapshot;
> > + int ret, pgd_index, mc_index, pgd_pages_sz;
> > + void *page_hva;
> > + phys_addr_t pgd;
> > +
> > + snapshot = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
> > + if (!snapshot)
> > + return -ENOMEM;
>
> For a single page, __get_free_page is enough.
>
I can use this, thanks.
> > +
> > + memset(snapshot, 0, PAGE_SIZE);
> > + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn(snapshot));
> > + if (ret)
> > + goto free_snapshot;
>
> It'd probably be better to not share anything here, and let the hypervisor do
> host_donate_hyp() and hyp_donate_host() before returning back from the HVC. This
> way the hypervisor will protect itself.
>
Right, as we took this discussion offline, I will update this and use
the *donate* API.
> > +
> > + snapshot->pgd_len = stage2_get_pgd_len();
> > + pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
> > + snapshot->pgd_hva = alloc_pages_exact(snapshot->pgd_len,
> > + GFP_KERNEL_ACCOUNT);
> > + if (!snapshot->pgd_hva) {
> > + ret = -ENOMEM;
> > + goto unshare_snapshot;
> > + }
> > +
> > + for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
> > + page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
> > + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
> > + virt_to_pfn(page_hva));
> > + if (ret)
> > + goto unshare_pgd_pages;
> > + }
> > +
> > + for (mc_index = 0; mc_index < info->mc_len; mc_index++) {
> > + page_hva = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
>
> ditto.
>
Ack.
> > + if (!page_hva) {
> > + ret = -ENOMEM;
> > + goto free_memcache_pages;
> > + }
> > +
> > + push_hyp_memcache(&snapshot->mc, page_hva, ptdump_host_pa);
> > + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
> > + virt_to_pfn(page_hva));
> > + if (ret) {
> > + pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
> > + free_pages_exact(page_hva, PAGE_SIZE);
> > + goto free_memcache_pages;
> > + }
>
> Maybe for the page-table pages, it'd be better to let the hyp does the
> host_donate_hyp() / hyp_donate_host()? It might be easier than sharing + pin.
>
> > + }
> > +
> > + ret = kvm_call_hyp_nvhe(__pkvm_copy_host_stage2, snapshot);
> > + if (ret)
> > + goto free_memcache_pages;
> > +
> > + pgd = (phys_addr_t)snapshot->pgtable.pgd;
> > + snapshot->pgtable.pgd = phys_to_virt(pgd);
> > + f_priv->file_priv = snapshot;
> > + return 0;
> > +
> > +free_memcache_pages:
> > + page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
> > + while (page_hva) {
> > + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> > + virt_to_pfn(page_hva));
> > + WARN_ON(ret);
> > + free_pages_exact(page_hva, PAGE_SIZE);
> > + page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
> > + }
> > +unshare_pgd_pages:
> > + pgd_index = pgd_index - 1;
> > + for (; pgd_index >= 0; pgd_index--) {
> > + page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
> > + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> > + virt_to_pfn(page_hva));
> > + WARN_ON(ret);
> > + }
> > + free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
> > +unshare_snapshot:
> > + WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> > + virt_to_pfn(snapshot)));
> > +free_snapshot:
> > + free_pages_exact(snapshot, PAGE_SIZE);
> > + f_priv->file_priv = NULL;
> > + return ret;
>
> Couldn't this path be merged with stage2_ptdump_end_walk()?
>
I think it should be doable.
> > +}
> > +
> > +static void stage2_ptdump_end_walk(void *file_priv)
> > +{
> > + struct ptdump_info_file_priv *f_priv = file_priv;
> > + struct kvm_pgtable_snapshot *snapshot = f_priv->file_priv;
> > + void *page_hva;
> > + int pgd_index, ret, pgd_pages_sz;
> > +
> > + if (!snapshot)
> > + return;
> > +
> > + page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
> > + while (page_hva) {
> > + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> > + virt_to_pfn(page_hva));
> > + WARN_ON(ret);
> > + free_pages_exact(page_hva, PAGE_SIZE);
> > + page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
> > + }
> > +
> > + pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
> > + for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
> > + page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
> > + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> > + virt_to_pfn(page_hva));
> > + WARN_ON(ret);
> > + }
> > +
> > + free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
> > + WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
> > + virt_to_pfn(snapshot)));
> > + free_pages_exact(snapshot, PAGE_SIZE);
> > + f_priv->file_priv = NULL;
> > +}
> > +
> > +void ptdump_register_host_stage2(void)
> > +{
> > + if (!is_protected_kvm_enabled())
> > + return;
> > +
> > + stage2_kernel_ptdump_info = (struct ptdump_info) {
> > + .mc_len = host_s2_pgtable_pages(),
> > + .ptdump_prepare_walk = stage2_ptdump_prepare_walk,
> > + .ptdump_end_walk = stage2_ptdump_end_walk,
> > + };
> > +
> > + ptdump_debugfs_kvm_register(&stage2_kernel_ptdump_info,
> > + "host_stage2_page_tables",
> > + kvm_debugfs_dir);
> > +}
> > +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
> > +
> > static int __init ptdump_init(void)
> > {
> > address_markers[PAGE_END_NR].start_address = PAGE_END;
> > @@ -386,6 +553,7 @@ static int __init ptdump_init(void)
> > #endif
> > ptdump_initialize();
> > ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
> > +
>
> Not needed.
>
Will remove this, checkpatch didn't seem to complain about it.
> > return 0;
> > }
> > device_initcall(ptdump_init);
> > diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
> > index 3bf5de51e8c3..4821dbef784c 100644
> > --- a/arch/arm64/mm/ptdump_debugfs.c
> > +++ b/arch/arm64/mm/ptdump_debugfs.c
> > @@ -68,5 +68,11 @@ static const struct file_operations ptdump_fops = {
> >
> > void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name)
> > {
> > - debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
> > + ptdump_debugfs_kvm_register(info, name, NULL);
>
> Not really related to kvm, the only difference is passing or not a dentry.
>
> How about a single (non __init) function?
>
I don't think it works because you have to keep the signature of the
original function. This 'ptdump_debugfs_register' is also called from the
non-arch drivers code.
> > +}
> > +
> > +void ptdump_debugfs_kvm_register(struct ptdump_info *info, const char *name,
> > + struct dentry *d_entry)
> > +{
> > + debugfs_create_file(name, 0400, d_entry, info, &ptdump_fops);
> > }
> > --
> > 2.43.0.rc0.421.g78406f8d94-goog
> >
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3 07/10] arm64: ptdump: Parse the host stage-2 page-tables from the snapshot
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (5 preceding siblings ...)
2023-11-15 17:16 ` [PATCH v3 06/10] arm64: ptdump: Register a debugfs entry for the host stage-2 tables Sebastian Ene
@ 2023-11-15 17:16 ` Sebastian Ene
2023-11-15 21:57 ` kernel test robot
2023-11-18 22:39 ` kernel test robot
2023-11-15 17:16 ` [PATCH v3 08/10] arm64: ptdump: Interpret memory attributes based on runtime configuration Sebastian Ene
` (3 subsequent siblings)
10 siblings, 2 replies; 22+ messages in thread
From: Sebastian Ene @ 2023-11-15 17:16 UTC (permalink / raw)
To: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team, vdonnefort,
qperret, smostafa, Sebastian Ene
Add a walker function which configures ptdump to parse the page-tables
from the snapshot. Define the attributes used by the stage-2 parser and
build a description of an array which holds the name of the level.
Walk the entire address space configured in the pagetable and parse the
attribute descriptors.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/mm/ptdump.c | 157 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 157 insertions(+)
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 0b4cb54e43ff..9f88542d5312 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -86,6 +86,7 @@ struct pg_state {
bool check_wx;
unsigned long wx_pages;
unsigned long uxn_pages;
+ struct ptdump_info_file_priv *f_priv;
};
struct prot_bits {
@@ -174,6 +175,66 @@ static const struct prot_bits pte_bits[] = {
}
};
+static const struct prot_bits stage2_pte_bits[] = {
+ {
+ .mask = PTE_VALID,
+ .val = PTE_VALID,
+ .set = " ",
+ .clear = "F",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
+ .val = KVM_PTE_LEAF_ATTR_HI_S2_XN,
+ .set = "XN",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
+ .set = "R",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
+ .set = "W",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_AF,
+ .set = "AF",
+ .clear = " ",
+ }, {
+ .mask = PTE_NG,
+ .val = PTE_NG,
+ .set = "FnXS",
+ .clear = " ",
+ }, {
+ .mask = PTE_CONT,
+ .val = PTE_CONT,
+ .set = "CON",
+ .clear = " ",
+ }, {
+ .mask = PTE_TABLE_BIT,
+ .val = PTE_TABLE_BIT,
+ .set = " ",
+ .clear = "BLK",
+ }, {
+ .mask = KVM_PGTABLE_PROT_SW0,
+ .val = KVM_PGTABLE_PROT_SW0,
+ .set = "SW0", /* PKVM_PAGE_SHARED_OWNED */
+ }, {
+ .mask = KVM_PGTABLE_PROT_SW1,
+ .val = KVM_PGTABLE_PROT_SW1,
+ .set = "SW1", /* PKVM_PAGE_SHARED_BORROWED */
+ }, {
+ .mask = KVM_PGTABLE_PROT_SW2,
+ .val = KVM_PGTABLE_PROT_SW2,
+ .set = "SW2",
+ }, {
+ .mask = KVM_PGTABLE_PROT_SW3,
+ .val = KVM_PGTABLE_PROT_SW3,
+ .set = "SW3",
+ },
+};
+
struct pg_level {
const struct prot_bits *bits;
const char *name;
@@ -286,6 +347,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
delta >>= 10;
unit++;
}
+
pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
pg_info[st->level].name);
if (st->current_prot && pg_info[st->level].bits)
@@ -394,6 +456,11 @@ static void *ptdump_host_va(phys_addr_t phys)
return __va(phys);
}
+static struct kvm_pgtable_mm_ops host_mmops = {
+ .phys_to_virt = ptdump_host_va,
+ .virt_to_phys = ptdump_host_pa,
+};
+
static size_t stage2_get_pgd_len(void)
{
u64 mmfr0, mmfr1, vtcr;
@@ -528,6 +595,95 @@ static void stage2_ptdump_end_walk(void *file_priv)
f_priv->file_priv = NULL;
}
+static int stage2_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit)
+{
+ struct pg_state *st = ctx->arg;
+ struct ptdump_state *pt_st = &st->ptdump;
+
+ pt_st->note_page(pt_st, ctx->addr, ctx->level, ctx->old);
+
+ return 0;
+}
+
+static void stage2_ptdump_build_levels(struct pg_level *level,
+ size_t num_levels,
+ unsigned int start_level)
+{
+ static const char * const lvl_names[] = {"PGD", "PUD", "PMD", "PTE"};
+ int i, j, name_index;
+
+ if (num_levels > KVM_PGTABLE_MAX_LEVELS && start_level > 2) {
+ pr_warn("invalid configuration %lu levels start_lvl %u\n",
+ num_levels, start_level);
+ return;
+ }
+
+ for (i = start_level; i < num_levels; i++) {
+ name_index = i - start_level;
+ name_index = name_index * start_level + name_index;
+
+ level[i].name = lvl_names[name_index];
+ level[i].num = ARRAY_SIZE(stage2_pte_bits);
+ level[i].bits = stage2_pte_bits;
+
+ for (j = 0; j < level[i].num; j++)
+ level[i].mask |= level[i].bits[j].mask;
+ }
+}
+
+static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
+{
+ struct ptdump_info_file_priv *f_priv =
+ container_of(info, struct ptdump_info_file_priv, info);
+ struct kvm_pgtable_snapshot *snapshot = f_priv->file_priv;
+ struct pg_state st;
+ struct kvm_pgtable *pgtable;
+ u64 start_ipa = 0, end_ipa;
+ struct addr_marker ipa_address_markers[3];
+ struct pg_level stage2_pg_level[KVM_PGTABLE_MAX_LEVELS] = {0};
+ struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
+ .cb = stage2_ptdump_visitor,
+ .arg = &st,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ if (snapshot == NULL || !snapshot->pgtable.pgd)
+ return;
+
+ pgtable = &snapshot->pgtable;
+ pgtable->mm_ops = &host_mmops;
+ end_ipa = BIT(pgtable->ia_bits) - 1;
+
+ memset(&ipa_address_markers[0], 0, sizeof(ipa_address_markers));
+
+ ipa_address_markers[0].start_address = start_ipa;
+ ipa_address_markers[0].name = "IPA start";
+
+ ipa_address_markers[1].start_address = end_ipa;
+ ipa_address_markers[1].name = "IPA end";
+
+ stage2_ptdump_build_levels(stage2_pg_level, KVM_PGTABLE_MAX_LEVELS,
+ pgtable->start_level);
+
+ st = (struct pg_state) {
+ .seq = s,
+ .marker = &ipa_address_markers[0],
+ .level = -1,
+ .pg_level = &stage2_pg_level[0],
+ .f_priv = f_priv,
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]) {
+ {start_ipa, end_ipa},
+ {0, 0},
+ },
+ },
+ };
+
+ kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
+}
+
void ptdump_register_host_stage2(void)
{
if (!is_protected_kvm_enabled())
@@ -537,6 +693,7 @@ void ptdump_register_host_stage2(void)
.mc_len = host_s2_pgtable_pages(),
.ptdump_prepare_walk = stage2_ptdump_prepare_walk,
.ptdump_end_walk = stage2_ptdump_end_walk,
+ .ptdump_walk = stage2_ptdump_walk,
};
ptdump_debugfs_kvm_register(&stage2_kernel_ptdump_info,
--
2.43.0.rc0.421.g78406f8d94-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v3 07/10] arm64: ptdump: Parse the host stage-2 page-tables from the snapshot
2023-11-15 17:16 ` [PATCH v3 07/10] arm64: ptdump: Parse the host stage-2 page-tables from the snapshot Sebastian Ene
@ 2023-11-15 21:57 ` kernel test robot
2023-11-18 22:39 ` kernel test robot
1 sibling, 0 replies; 22+ messages in thread
From: kernel test robot @ 2023-11-15 21:57 UTC (permalink / raw)
To: Sebastian Ene, will, Oliver Upton, James Morse, Suzuki K Poulose,
Zenghui Yu, catalin.marinas, mark.rutland, akpm, maz
Cc: oe-kbuild-all, kvmarm, linux-arm-kernel, linux-kernel,
kernel-team, vdonnefort, qperret, smostafa, Sebastian Ene
Hi Sebastian,
kernel test robot noticed the following build warnings:
[auto build test WARNING on arm64/for-next/core]
[also build test WARNING on kvmarm/next linus/master v6.7-rc1 next-20231115]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Sebastian-Ene/KVM-arm64-Add-snap-shooting-the-host-stage-2-pagetables/20231116-012017
base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link: https://lore.kernel.org/r/20231115171639.2852644-9-sebastianene%40google.com
patch subject: [PATCH v3 07/10] arm64: ptdump: Parse the host stage-2 page-tables from the snapshot
config: arm64-randconfig-001-20231116 (https://download.01.org/0day-ci/archive/20231116/202311160522.Nr71iglr-lkp@intel.com/config)
compiler: aarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231116/202311160522.Nr71iglr-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311160522.Nr71iglr-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> arch/arm64/mm/ptdump.c:178:31: warning: 'stage2_pte_bits' defined but not used [-Wunused-const-variable=]
178 | static const struct prot_bits stage2_pte_bits[] = {
| ^~~~~~~~~~~~~~~
vim +/stage2_pte_bits +178 arch/arm64/mm/ptdump.c
177
> 178 static const struct prot_bits stage2_pte_bits[] = {
179 {
180 .mask = PTE_VALID,
181 .val = PTE_VALID,
182 .set = " ",
183 .clear = "F",
184 }, {
185 .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
186 .val = KVM_PTE_LEAF_ATTR_HI_S2_XN,
187 .set = "XN",
188 .clear = " ",
189 }, {
190 .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
191 .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
192 .set = "R",
193 .clear = " ",
194 }, {
195 .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
196 .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
197 .set = "W",
198 .clear = " ",
199 }, {
200 .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF,
201 .val = KVM_PTE_LEAF_ATTR_LO_S2_AF,
202 .set = "AF",
203 .clear = " ",
204 }, {
205 .mask = PTE_NG,
206 .val = PTE_NG,
207 .set = "FnXS",
208 .clear = " ",
209 }, {
210 .mask = PTE_CONT,
211 .val = PTE_CONT,
212 .set = "CON",
213 .clear = " ",
214 }, {
215 .mask = PTE_TABLE_BIT,
216 .val = PTE_TABLE_BIT,
217 .set = " ",
218 .clear = "BLK",
219 }, {
220 .mask = KVM_PGTABLE_PROT_SW0,
221 .val = KVM_PGTABLE_PROT_SW0,
222 .set = "SW0", /* PKVM_PAGE_SHARED_OWNED */
223 }, {
224 .mask = KVM_PGTABLE_PROT_SW1,
225 .val = KVM_PGTABLE_PROT_SW1,
226 .set = "SW1", /* PKVM_PAGE_SHARED_BORROWED */
227 }, {
228 .mask = KVM_PGTABLE_PROT_SW2,
229 .val = KVM_PGTABLE_PROT_SW2,
230 .set = "SW2",
231 }, {
232 .mask = KVM_PGTABLE_PROT_SW3,
233 .val = KVM_PGTABLE_PROT_SW3,
234 .set = "SW3",
235 },
236 };
237
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v3 07/10] arm64: ptdump: Parse the host stage-2 page-tables from the snapshot
2023-11-15 17:16 ` [PATCH v3 07/10] arm64: ptdump: Parse the host stage-2 page-tables from the snapshot Sebastian Ene
2023-11-15 21:57 ` kernel test robot
@ 2023-11-18 22:39 ` kernel test robot
1 sibling, 0 replies; 22+ messages in thread
From: kernel test robot @ 2023-11-18 22:39 UTC (permalink / raw)
To: Sebastian Ene, will, Oliver Upton, James Morse, Suzuki K Poulose,
Zenghui Yu, catalin.marinas, mark.rutland, akpm, maz
Cc: oe-kbuild-all, kvmarm, linux-arm-kernel, linux-kernel,
kernel-team, vdonnefort, qperret, smostafa, Sebastian Ene
Hi Sebastian,
kernel test robot noticed the following build warnings:
[auto build test WARNING on arm64/for-next/core]
[also build test WARNING on kvmarm/next linus/master v6.7-rc1 next-20231117]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Sebastian-Ene/KVM-arm64-Add-snap-shooting-the-host-stage-2-pagetables/20231116-012017
base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link: https://lore.kernel.org/r/20231115171639.2852644-9-sebastianene%40google.com
patch subject: [PATCH v3 07/10] arm64: ptdump: Parse the host stage-2 page-tables from the snapshot
config: arm64-randconfig-r071-20231119 (https://download.01.org/0day-ci/archive/20231119/202311190602.TfLBzLiV-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231119/202311190602.TfLBzLiV-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311190602.TfLBzLiV-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> arch/arm64/mm/ptdump.c:178:31: warning: unused variable 'stage2_pte_bits' [-Wunused-const-variable]
178 | static const struct prot_bits stage2_pte_bits[] = {
| ^
1 warning generated.
vim +/stage2_pte_bits +178 arch/arm64/mm/ptdump.c
177
> 178 static const struct prot_bits stage2_pte_bits[] = {
179 {
180 .mask = PTE_VALID,
181 .val = PTE_VALID,
182 .set = " ",
183 .clear = "F",
184 }, {
185 .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
186 .val = KVM_PTE_LEAF_ATTR_HI_S2_XN,
187 .set = "XN",
188 .clear = " ",
189 }, {
190 .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
191 .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
192 .set = "R",
193 .clear = " ",
194 }, {
195 .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
196 .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
197 .set = "W",
198 .clear = " ",
199 }, {
200 .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF,
201 .val = KVM_PTE_LEAF_ATTR_LO_S2_AF,
202 .set = "AF",
203 .clear = " ",
204 }, {
205 .mask = PTE_NG,
206 .val = PTE_NG,
207 .set = "FnXS",
208 .clear = " ",
209 }, {
210 .mask = PTE_CONT,
211 .val = PTE_CONT,
212 .set = "CON",
213 .clear = " ",
214 }, {
215 .mask = PTE_TABLE_BIT,
216 .val = PTE_TABLE_BIT,
217 .set = " ",
218 .clear = "BLK",
219 }, {
220 .mask = KVM_PGTABLE_PROT_SW0,
221 .val = KVM_PGTABLE_PROT_SW0,
222 .set = "SW0", /* PKVM_PAGE_SHARED_OWNED */
223 }, {
224 .mask = KVM_PGTABLE_PROT_SW1,
225 .val = KVM_PGTABLE_PROT_SW1,
226 .set = "SW1", /* PKVM_PAGE_SHARED_BORROWED */
227 }, {
228 .mask = KVM_PGTABLE_PROT_SW2,
229 .val = KVM_PGTABLE_PROT_SW2,
230 .set = "SW2",
231 }, {
232 .mask = KVM_PGTABLE_PROT_SW3,
233 .val = KVM_PGTABLE_PROT_SW3,
234 .set = "SW3",
235 },
236 };
237
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3 08/10] arm64: ptdump: Interpret memory attributes based on runtime configuration
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (6 preceding siblings ...)
2023-11-15 17:16 ` [PATCH v3 07/10] arm64: ptdump: Parse the host stage-2 page-tables from the snapshot Sebastian Ene
@ 2023-11-15 17:16 ` Sebastian Ene
2023-11-15 17:16 ` [PATCH v3 09/10] arm64: ptdump: Interpret pKVM ownership annotations Sebastian Ene
` (2 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Sebastian Ene @ 2023-11-15 17:16 UTC (permalink / raw)
To: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team, vdonnefort,
qperret, smostafa, Sebastian Ene
When FWB is used the memory attributes stored in the descriptors have a
different bitfield layout. Introduce two callbacks that verify the current
runtime configuration before parsing the attribute fields.
Add support for parsing the memory attribute fields from the page table
descriptors.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/mm/ptdump.c | 65 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 64 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 9f88542d5312..ec7f6430f6d7 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -89,11 +89,19 @@ struct pg_state {
struct ptdump_info_file_priv *f_priv;
};
+/*
+ * This callback checks the runtime configuration before interpreting the
+ * attributes defined in the prot_bits.
+ */
+typedef bool (*is_feature_cb)(const void *ctx);
+
struct prot_bits {
u64 mask;
u64 val;
const char *set;
const char *clear;
+ is_feature_cb feature_on; /* bit ignored if the callback returns false */
+ is_feature_cb feature_off; /* bit ignored if the callback returns true */
};
static const struct prot_bits pte_bits[] = {
@@ -175,6 +183,34 @@ static const struct prot_bits pte_bits[] = {
}
};
+static bool is_fwb_enabled(const void *ctx)
+{
+ const struct pg_state *st = ctx;
+ struct ptdump_info_file_priv *f_priv = st->f_priv;
+ struct kvm_pgtable_snapshot *snapshot = f_priv->file_priv;
+ struct kvm_pgtable *pgtable = &snapshot->pgtable;
+
+ bool fwb_enabled = false;
+
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
+ fwb_enabled = !(pgtable->flags & KVM_PGTABLE_S2_NOFWB);
+
+ return fwb_enabled;
+}
+
+static bool is_table_bit_ignored(const void *ctx)
+{
+ const struct pg_state *st = ctx;
+
+ if (!(st->current_prot & PTE_VALID))
+ return true;
+
+ if (st->level == CONFIG_PGTABLE_LEVELS)
+ return true;
+
+ return false;
+}
+
static const struct prot_bits stage2_pte_bits[] = {
{
.mask = PTE_VALID,
@@ -216,6 +252,27 @@ static const struct prot_bits stage2_pte_bits[] = {
.val = PTE_TABLE_BIT,
.set = " ",
.clear = "BLK",
+ .feature_off = is_table_bit_ignored,
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | PTE_VALID,
+ .val = PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_VALID,
+ .set = "DEVICE/nGnRE",
+ .feature_off = is_fwb_enabled,
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | PTE_VALID,
+ .val = PTE_S2_MEMATTR(MT_S2_FWB_DEVICE_nGnRE) | PTE_VALID,
+ .set = "DEVICE/nGnRE FWB",
+ .feature_on = is_fwb_enabled,
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | PTE_VALID,
+ .val = PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_VALID,
+ .set = "MEM/NORMAL",
+ .feature_off = is_fwb_enabled,
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | PTE_VALID,
+ .val = PTE_S2_MEMATTR(MT_S2_FWB_NORMAL) | PTE_VALID,
+ .set = "MEM/NORMAL FWB",
+ .feature_on = is_fwb_enabled,
}, {
.mask = KVM_PGTABLE_PROT_SW0,
.val = KVM_PGTABLE_PROT_SW0,
@@ -267,13 +324,19 @@ static struct pg_level pg_level[] = {
};
static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
- size_t num)
+ size_t num)
{
unsigned i;
for (i = 0; i < num; i++, bits++) {
const char *s;
+ if (bits->feature_on && !bits->feature_on(st))
+ continue;
+
+ if (bits->feature_off && bits->feature_off(st))
+ continue;
+
if ((st->current_prot & bits->mask) == bits->val)
s = bits->set;
else
--
2.43.0.rc0.421.g78406f8d94-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v3 09/10] arm64: ptdump: Interpret pKVM ownership annotations
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (7 preceding siblings ...)
2023-11-15 17:16 ` [PATCH v3 08/10] arm64: ptdump: Interpret memory attributes based on runtime configuration Sebastian Ene
@ 2023-11-15 17:16 ` Sebastian Ene
2023-11-15 17:16 ` [PATCH v3 10/10] arm64: ptdump: Add support for guest stage-2 pagetables dumping Sebastian Ene
2023-11-22 23:18 ` [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Oliver Upton
10 siblings, 0 replies; 22+ messages in thread
From: Sebastian Ene @ 2023-11-15 17:16 UTC (permalink / raw)
To: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team, vdonnefort,
qperret, smostafa, Sebastian Ene
Add support for interpretting pKVM invalid stage-2 descriptors that hold
ownership information. We use these descriptors to keep track of the
memory donations from the host side.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/kvm_pgtable.h | 7 +++++++
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 7 -------
arch/arm64/mm/ptdump.c | 10 ++++++++++
3 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 913f34d75b29..938baffa7d4d 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -87,6 +87,13 @@ typedef u64 kvm_pte_t;
*/
#define KVM_INVALID_PTE_LOCKED BIT(10)
+/* This corresponds to page-table locking order */
+enum pkvm_component_id {
+ PKVM_ID_HOST,
+ PKVM_ID_HYP,
+ PKVM_ID_FFA,
+};
+
static inline bool kvm_pte_valid(kvm_pte_t pte)
{
return pte & KVM_PTE_VALID;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 9cfb35d68850..cc2c439ffe75 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -53,13 +53,6 @@ struct host_mmu {
};
extern struct host_mmu host_mmu;
-/* This corresponds to page-table locking order */
-enum pkvm_component_id {
- PKVM_ID_HOST,
- PKVM_ID_HYP,
- PKVM_ID_FFA,
-};
-
extern unsigned long hyp_nr_cpus;
int __pkvm_prot_finalize(void);
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index ec7f6430f6d7..ffb87237078b 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -273,6 +273,16 @@ static const struct prot_bits stage2_pte_bits[] = {
.val = PTE_S2_MEMATTR(MT_S2_FWB_NORMAL) | PTE_VALID,
.set = "MEM/NORMAL FWB",
.feature_on = is_fwb_enabled,
+ }, {
+ .mask = KVM_INVALID_PTE_OWNER_MASK | PTE_VALID,
+ .val = FIELD_PREP_CONST(KVM_INVALID_PTE_OWNER_MASK,
+ PKVM_ID_HYP),
+ .set = "HYP",
+ }, {
+ .mask = KVM_INVALID_PTE_OWNER_MASK | PTE_VALID,
+ .val = FIELD_PREP_CONST(KVM_INVALID_PTE_OWNER_MASK,
+ PKVM_ID_FFA),
+ .set = "FF-A",
}, {
.mask = KVM_PGTABLE_PROT_SW0,
.val = KVM_PGTABLE_PROT_SW0,
--
2.43.0.rc0.421.g78406f8d94-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v3 10/10] arm64: ptdump: Add support for guest stage-2 pagetables dumping
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (8 preceding siblings ...)
2023-11-15 17:16 ` [PATCH v3 09/10] arm64: ptdump: Interpret pKVM ownership annotations Sebastian Ene
@ 2023-11-15 17:16 ` Sebastian Ene
2023-11-22 23:35 ` Oliver Upton
2023-11-22 23:18 ` [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Oliver Upton
10 siblings, 1 reply; 22+ messages in thread
From: Sebastian Ene @ 2023-11-15 17:16 UTC (permalink / raw)
To: will, Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
catalin.marinas, mark.rutland, akpm, maz
Cc: kvmarm, linux-arm-kernel, linux-kernel, kernel-team, vdonnefort,
qperret, smostafa, Sebastian Ene
Register a debugfs file on guest creation to be able to view their
second translation tables with ptdump. This assumes that the host is in
control of the guest stage-2 and has direct access to the pagetables.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/ptdump.h | 7 ++++
arch/arm64/kvm/debug.c | 6 +++
arch/arm64/kvm/mmu.c | 2 +
arch/arm64/mm/ptdump.c | 68 +++++++++++++++++++++++++++++++++
4 files changed, 83 insertions(+)
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index de5a5a0c0ecf..21b281715d27 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -5,6 +5,8 @@
#ifndef __ASM_PTDUMP_H
#define __ASM_PTDUMP_H
+#include <asm/kvm_pgtable.h>
+
#ifdef CONFIG_PTDUMP_CORE
#include <linux/mm_types.h>
@@ -23,6 +25,7 @@ struct ptdump_info {
int (*ptdump_prepare_walk)(void *file_priv);
void (*ptdump_end_walk)(void *file_priv);
size_t mc_len;
+ void *priv;
};
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
@@ -48,8 +51,12 @@ void ptdump_check_wx(void);
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
void ptdump_register_host_stage2(void);
+int ptdump_register_guest_stage2(struct kvm *kvm);
+void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt);
#else
static inline void ptdump_register_host_stage2(void) { }
+static inline int ptdump_register_guest_stage2(struct kvm *kvm) { return 0; }
+static inline void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt) { }
#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
#ifdef CONFIG_DEBUG_WX
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 8725291cb00a..555d022f8ad9 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -13,6 +13,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_emulate.h>
+#include <asm/ptdump.h>
#include "trace.h"
@@ -342,3 +343,8 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE);
vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
}
+
+int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+ return ptdump_register_guest_stage2(kvm);
+}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d87c8fcc4c24..da45050596e6 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -11,6 +11,7 @@
#include <linux/sched/signal.h>
#include <trace/events/kvm.h>
#include <asm/pgalloc.h>
+#include <asm/ptdump.h>
#include <asm/cacheflush.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
@@ -1021,6 +1022,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
write_unlock(&kvm->mmu_lock);
if (pgt) {
+ ptdump_unregister_guest_stage2(pgt);
kvm_pgtable_stage2_destroy(pgt);
kfree(pgt);
}
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index ffb87237078b..741764cff105 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -27,6 +27,7 @@
#include <asm/kvm_pkvm.h>
#include <asm/kvm_pgtable.h>
#include <asm/kvm_host.h>
+#include <asm/kvm_mmu.h>
enum address_markers_idx {
@@ -519,6 +520,16 @@ void ptdump_check_wx(void)
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
static struct ptdump_info stage2_kernel_ptdump_info;
+struct ptdump_registered_guest {
+ struct list_head reg_list;
+ struct ptdump_info info;
+ struct kvm_pgtable_snapshot snapshot;
+ rwlock_t *lock;
+};
+
+static LIST_HEAD(ptdump_guest_list);
+static DEFINE_MUTEX(ptdump_list_lock);
+
static phys_addr_t ptdump_host_pa(void *addr)
{
return __pa(addr);
@@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
}
+static void guest_stage2_ptdump_walk(struct seq_file *s,
+ struct ptdump_info *info)
+{
+ struct ptdump_info_file_priv *f_priv =
+ container_of(info, struct ptdump_info_file_priv, info);
+ struct ptdump_registered_guest *guest = info->priv;
+
+ f_priv->file_priv = &guest->snapshot;
+
+ read_lock(guest->lock);
+ stage2_ptdump_walk(s, info);
+ read_unlock(guest->lock);
+}
+
+int ptdump_register_guest_stage2(struct kvm *kvm)
+{
+ struct ptdump_registered_guest *guest;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct kvm_pgtable *pgt = mmu->pgt;
+
+ guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL);
+ if (!guest)
+ return -ENOMEM;
+
+ memcpy(&guest->snapshot.pgtable, pgt, sizeof(struct kvm_pgtable));
+ guest->info = (struct ptdump_info) {
+ .ptdump_walk = guest_stage2_ptdump_walk,
+ };
+
+ guest->info.priv = guest;
+ guest->lock = &kvm->mmu_lock;
+ mutex_lock(&ptdump_list_lock);
+
+ ptdump_debugfs_kvm_register(&guest->info, "stage2_page_tables",
+ kvm->debugfs_dentry);
+
+ list_add(&guest->reg_list, &ptdump_guest_list);
+ mutex_unlock(&ptdump_list_lock);
+
+ return 0;
+}
+
+void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt)
+{
+ struct ptdump_registered_guest *guest;
+
+ mutex_lock(&ptdump_list_lock);
+ list_for_each_entry(guest, &ptdump_guest_list, reg_list) {
+ if (guest->snapshot.pgtable.pgd == pgt->pgd) {
+ list_del(&guest->reg_list);
+ kfree(guest);
+ break;
+ }
+ }
+ mutex_unlock(&ptdump_list_lock);
+}
+
void ptdump_register_host_stage2(void)
{
if (!is_protected_kvm_enabled())
--
2.43.0.rc0.421.g78406f8d94-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v3 10/10] arm64: ptdump: Add support for guest stage-2 pagetables dumping
2023-11-15 17:16 ` [PATCH v3 10/10] arm64: ptdump: Add support for guest stage-2 pagetables dumping Sebastian Ene
@ 2023-11-22 23:35 ` Oliver Upton
2023-11-23 10:58 ` Sebastian Ene
0 siblings, 1 reply; 22+ messages in thread
From: Oliver Upton @ 2023-11-22 23:35 UTC (permalink / raw)
To: Sebastian Ene
Cc: will, James Morse, Suzuki K Poulose, Zenghui Yu, catalin.marinas,
mark.rutland, akpm, maz, kvmarm, linux-arm-kernel, linux-kernel,
kernel-team, vdonnefort, qperret, smostafa
On Wed, Nov 15, 2023 at 05:16:40PM +0000, Sebastian Ene wrote:
> +struct ptdump_registered_guest {
> + struct list_head reg_list;
> + struct ptdump_info info;
> + struct kvm_pgtable_snapshot snapshot;
> + rwlock_t *lock;
> +};
Why can't we just store a pointer directly to struct kvm in ::private?
Also, shouldn't you take a reference on struct kvm when the file is
opened to protect against VM teardown?
> +static LIST_HEAD(ptdump_guest_list);
> +static DEFINE_MUTEX(ptdump_list_lock);
What is the list for?
> static phys_addr_t ptdump_host_pa(void *addr)
> {
> return __pa(addr);
> @@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
> kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
> }
>
> +static void guest_stage2_ptdump_walk(struct seq_file *s,
> + struct ptdump_info *info)
> +{
> + struct ptdump_info_file_priv *f_priv =
> + container_of(info, struct ptdump_info_file_priv, info);
> + struct ptdump_registered_guest *guest = info->priv;
> +
> + f_priv->file_priv = &guest->snapshot;
> +
> + read_lock(guest->lock);
> + stage2_ptdump_walk(s, info);
> + read_unlock(guest->lock);
Taking the mmu lock for read allows other table walkers to add new
mappings and adjust the granularity of existing ones. Should this
instead take the mmu lock for write?
> +}
> +
> +int ptdump_register_guest_stage2(struct kvm *kvm)
> +{
> + struct ptdump_registered_guest *guest;
> + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> + struct kvm_pgtable *pgt = mmu->pgt;
> +
> + guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL);
You want GFP_KERNEL_ACCOUNT here.
--
Thanks,
Oliver
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v3 10/10] arm64: ptdump: Add support for guest stage-2 pagetables dumping
2023-11-22 23:35 ` Oliver Upton
@ 2023-11-23 10:58 ` Sebastian Ene
0 siblings, 0 replies; 22+ messages in thread
From: Sebastian Ene @ 2023-11-23 10:58 UTC (permalink / raw)
To: Oliver Upton
Cc: will, James Morse, Suzuki K Poulose, Zenghui Yu, catalin.marinas,
mark.rutland, akpm, maz, kvmarm, linux-arm-kernel, linux-kernel,
kernel-team, vdonnefort, qperret, smostafa
On Wed, Nov 22, 2023 at 11:35:57PM +0000, Oliver Upton wrote:
> On Wed, Nov 15, 2023 at 05:16:40PM +0000, Sebastian Ene wrote:
> > +struct ptdump_registered_guest {
> > + struct list_head reg_list;
> > + struct ptdump_info info;
> > + struct kvm_pgtable_snapshot snapshot;
> > + rwlock_t *lock;
> > +};
>
> Why can't we just store a pointer directly to struct kvm in ::private?
I don't think it will work unless we expect a struct kvm_pgtable
in stage2_ptdump_walk file_priv field. I think it is a good ideea and will
simplify things a little bit dropping kvm_pgtable_snapshot from here.
The current code has some fileds that are reduntant (the priv pointers)
because I also made this to work with protected guests where we can't
access their pagetables directly.
> Also, shouldn't you take a reference on struct kvm when the file is
> opened to protect against VM teardown?
>
I am not sure about the need could you please elaborate a bit ? On VM
teardown we expect ptdump_unregister_guest_stage2 to be invoked.
> > +static LIST_HEAD(ptdump_guest_list);
> > +static DEFINE_MUTEX(ptdump_list_lock);
>
> What is the list for?
>
I am keeping a list of registered guests with ptdump and the lock should
protect the list against concurent VM teardowns.
> > static phys_addr_t ptdump_host_pa(void *addr)
> > {
> > return __pa(addr);
> > @@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
> > kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
> > }
> >
> > +static void guest_stage2_ptdump_walk(struct seq_file *s,
> > + struct ptdump_info *info)
> > +{
> > + struct ptdump_info_file_priv *f_priv =
> > + container_of(info, struct ptdump_info_file_priv, info);
> > + struct ptdump_registered_guest *guest = info->priv;
> > +
> > + f_priv->file_priv = &guest->snapshot;
> > +
> > + read_lock(guest->lock);
> > + stage2_ptdump_walk(s, info);
> > + read_unlock(guest->lock);
>
> Taking the mmu lock for read allows other table walkers to add new
> mappings and adjust the granularity of existing ones. Should this
> instead take the mmu lock for write?
>
Thanks for pointing our, this is exactly what I was trying to avoid,
so yes I should use the write mmu lock in this case.
> > +}
> > +
> > +int ptdump_register_guest_stage2(struct kvm *kvm)
> > +{
> > + struct ptdump_registered_guest *guest;
> > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> > + struct kvm_pgtable *pgt = mmu->pgt;
> > +
> > + guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL);
>
> You want GFP_KERNEL_ACCOUNT here.
>
Right, thanks this is because it is an untrusted allocation triggered from
userspace.
> --
> Thanks,
> Oliver
Thank you,
Seb
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables
2023-11-15 17:16 [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Sebastian Ene
` (9 preceding siblings ...)
2023-11-15 17:16 ` [PATCH v3 10/10] arm64: ptdump: Add support for guest stage-2 pagetables dumping Sebastian Ene
@ 2023-11-22 23:18 ` Oliver Upton
2023-11-23 9:49 ` Sebastian Ene
10 siblings, 1 reply; 22+ messages in thread
From: Oliver Upton @ 2023-11-22 23:18 UTC (permalink / raw)
To: Sebastian Ene
Cc: will, James Morse, Suzuki K Poulose, Zenghui Yu, catalin.marinas,
mark.rutland, akpm, maz, kvmarm, linux-arm-kernel, linux-kernel,
kernel-team, vdonnefort, qperret, smostafa
Hi Seb,
On Wed, Nov 15, 2023 at 05:16:30PM +0000, Sebastian Ene wrote:
> Hi,
>
> This can be used as a debugging tool for dumping the second stage
> page-tables.
>
> When CONFIG_PTDUMP_STAGE2_DEBUGFS is enabled, ptdump registers
> '/sys/debug/kvm/<guest_id>/stage2_page_tables' entry with debugfs
> upon guest creation. This allows userspace tools (eg. cat) to dump the
> stage-2 pagetables by reading the registered file.
>
> Reading the debugfs file shows stage-2 memory ranges in following format:
> <IPA range> <size> <descriptor type> <access permissions> <mem_attributes>
>
> Under pKVM configuration(kvm-arm.mode=protected) ptdump registers an entry
> for the host stage-2 pagetables in the following path:
> /sys/debug/kvm/host_stage2_page_tables/
>
> The tool interprets the pKVM ownership annotation stored in the invalid
> entries and dumps to the console the ownership information. To be able
> to access the host stage-2 page-tables from the kernel, a new hypervisor
> call was introduced which allows us to snapshot the page-tables in a host
> provided buffer. The hypervisor call is hidden behind CONFIG_NVHE_EL2_DEBUG
> as this should be used under debugging environment.
While I think the value of the feature you're proposing is great, I'm
not a fan of the current shape of this series.
Reusing note_page() for the stage-2 dump is somewhat convenient, but the
series pulls a **massive** amount of KVM details outside of KVM:
- Open-coding the whole snapshotting interface with EL2 outside of KVM.
This is a complete non-starter for me; the kernel<->EL2 interface
needs to be owned by the EL1 portions of KVM.
- Building page-table walkers using the KVM pgtable library outside of
KVM.
- Copying (rather than directly calling) the logic responsible for
things like FWB and PGD concatenation.
- Hoisting the definition of _software bits_ outside of KVM. I'm less
concerned about hardware bits since they have an unambiguous meaning.
I think exporting the necessary stuff from ptdump into KVM will lead to
a much cleaner implementation.
--
Thanks,
Oliver
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables
2023-11-22 23:18 ` [PATCH v3 00/10] arm64: ptdump: View the second stage page-tables Oliver Upton
@ 2023-11-23 9:49 ` Sebastian Ene
0 siblings, 0 replies; 22+ messages in thread
From: Sebastian Ene @ 2023-11-23 9:49 UTC (permalink / raw)
To: Oliver Upton
Cc: will, James Morse, Suzuki K Poulose, Zenghui Yu, catalin.marinas,
mark.rutland, akpm, maz, kvmarm, linux-arm-kernel, linux-kernel,
kernel-team, vdonnefort, qperret, smostafa
On Wed, Nov 22, 2023 at 11:18:45PM +0000, Oliver Upton wrote:
Hi Oliver,
> Hi Seb,
>
> On Wed, Nov 15, 2023 at 05:16:30PM +0000, Sebastian Ene wrote:
> > Hi,
> >
> > This can be used as a debugging tool for dumping the second stage
> > page-tables.
> >
> > When CONFIG_PTDUMP_STAGE2_DEBUGFS is enabled, ptdump registers
> > '/sys/debug/kvm/<guest_id>/stage2_page_tables' entry with debugfs
> > upon guest creation. This allows userspace tools (eg. cat) to dump the
> > stage-2 pagetables by reading the registered file.
> >
> > Reading the debugfs file shows stage-2 memory ranges in following format:
> > <IPA range> <size> <descriptor type> <access permissions> <mem_attributes>
> >
> > Under pKVM configuration(kvm-arm.mode=protected) ptdump registers an entry
> > for the host stage-2 pagetables in the following path:
> > /sys/debug/kvm/host_stage2_page_tables/
> >
> > The tool interprets the pKVM ownership annotation stored in the invalid
> > entries and dumps to the console the ownership information. To be able
> > to access the host stage-2 page-tables from the kernel, a new hypervisor
> > call was introduced which allows us to snapshot the page-tables in a host
> > provided buffer. The hypervisor call is hidden behind CONFIG_NVHE_EL2_DEBUG
> > as this should be used under debugging environment.
>
> While I think the value of the feature you're proposing is great, I'm
> not a fan of the current shape of this series.
>
> Reusing note_page() for the stage-2 dump is somewhat convenient, but the
> series pulls a **massive** amount of KVM details outside of KVM:
>
> - Open-coding the whole snapshotting interface with EL2 outside of KVM.
> This is a complete non-starter for me; the kernel<->EL2 interface
> needs to be owned by the EL1 portions of KVM.
>
> - Building page-table walkers using the KVM pgtable library outside of
> KVM.
>
> - Copying (rather than directly calling) the logic responsible for
> things like FWB and PGD concatenation.
>
> - Hoisting the definition of _software bits_ outside of KVM. I'm less
> concerned about hardware bits since they have an unambiguous meaning.
>
> I think exporting the necessary stuff from ptdump into KVM will lead to
> a much cleaner implementation.
>
Right, I had to import a lot of definitions from KVM, especially for the
prot_bits array and for the IPA size retrieval. I think it would be less
intrusive the other way around, to pull some ptdump hooks into kvm.
> --
> Thanks,
> Oliver
Thanks,
Seb
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 22+ messages in thread