* [PATCH 01/17] KVM: arm64: Add __pkvm_private_range_pa
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 02/17] KVM: arm64: Add pkvm_remove_mappings Vincent Donnefort
` (15 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Mappings in the pKVM private range are not identity mapped, making the
standard __hyp_pa() unsuitable for translating these addresses.
Introduce __pkvm_private_range_pa() to resolve physical addresses for
this range by walking the hypervisor page-table. This will be useful for
the upcoming pKVM heap allocator.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 6e83ce35c2f2..d3137c16b632 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -30,5 +30,6 @@ int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
unsigned long *haddr);
int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
+phys_addr_t __pkvm_private_range_pa(void *va);
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 3b0bee496bff..773426a68d2d 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -502,3 +502,17 @@ int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
return ret;
}
+
+phys_addr_t __pkvm_private_range_pa(void *va)
+{
+ kvm_pte_t pte = 0;
+ s8 level;
+
+ hyp_spin_lock(&pkvm_pgd_lock);
+ WARN_ON(kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)va, &pte, &level));
+ hyp_spin_unlock(&pkvm_pgd_lock);
+
+ WARN_ON(!kvm_pte_valid(pte));
+
+ return kvm_pte_to_phys(pte) + offset_in_page(va);
+}
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 02/17] KVM: arm64: Add pkvm_remove_mappings
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
2026-05-20 15:26 ` [PATCH 01/17] KVM: arm64: Add __pkvm_private_range_pa Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 03/17] KVM: arm64: Add __hyp_allocator_map for the pKVM hyp Vincent Donnefort
` (14 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Add the counterpart to pkvm_create_mappings(), allowing previously
mapped ranges to be removed. This will be useful for the upcoming pKVM
heap allocator to manage its private mappings.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index d3137c16b632..725bb0fb941d 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -25,6 +25,7 @@ int hyp_back_vmemmap(phys_addr_t back);
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
+void pkvm_remove_mappings(void *from, void *to);
int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot,
unsigned long *haddr);
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 773426a68d2d..ec59da1322a7 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -146,6 +146,20 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
return ret;
}
+void pkvm_remove_mappings(void *from, void *to)
+{
+ u64 size;
+
+ to = PTR_ALIGN(to, PAGE_SIZE);
+ from = PTR_ALIGN_DOWN(from, PAGE_SIZE);
+ size = (u64)to - (u64)from;
+ WARN_ON(from > to);
+
+ hyp_spin_lock(&pkvm_pgd_lock);
+ WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)from, size) != size);
+ hyp_spin_unlock(&pkvm_pgd_lock);
+}
+
int hyp_back_vmemmap(phys_addr_t back)
{
unsigned long i, start, size, end = 0;
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 03/17] KVM: arm64: Add __hyp_allocator_map for the pKVM hyp
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
2026-05-20 15:26 ` [PATCH 01/17] KVM: arm64: Add __pkvm_private_range_pa Vincent Donnefort
2026-05-20 15:26 ` [PATCH 02/17] KVM: arm64: Add pkvm_remove_mappings Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 04/17] KVM: arm64: Add a heap allocator " Vincent Donnefort
` (13 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
In preparation for the pKVM heap allocator, introduce __hyp_allocator_map()
to map a single physical page to a given virtual address in the hypervisor
stage-1 page-table.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 725bb0fb941d..98a7774b541c 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -32,5 +32,6 @@ int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
phys_addr_t __pkvm_private_range_pa(void *va);
+int __hyp_allocator_map(void *va, phys_addr_t phys);
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index ec59da1322a7..c1c01f81ac5f 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -160,6 +160,11 @@ void pkvm_remove_mappings(void *from, void *to)
hyp_spin_unlock(&pkvm_pgd_lock);
}
+int __hyp_allocator_map(void *va, phys_addr_t phys)
+{
+ return __pkvm_create_mappings((unsigned long)va, PAGE_SIZE, phys, PAGE_HYP);
+}
+
int hyp_back_vmemmap(phys_addr_t back)
{
unsigned long i, start, size, end = 0;
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 04/17] KVM: arm64: Add a heap allocator for the pKVM hyp
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (2 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 03/17] KVM: arm64: Add __hyp_allocator_map for the pKVM hyp Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 05/17] KVM: arm64: Allow kvm_hyp_memcache usage outside of stage-2 Vincent Donnefort
` (12 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Currently, memory used by the hypervisor comes from donations that are
embedded within HVCs. e.g. __pkvm_init_vm()'s hyp_vm.
This is cumbersome: the host needs to know the size of those
struct, the memory must be page-aligned and physically contiguous which
may be difficult to satisfy when host memory is highly fragmented.
Create a heap allocator to manage VA-contiguous memory. This allocator
grows upward, recycles unused chunks of memory and provides a simple API
to allocate and free:
hyp_alloc(size), hyp_free(addr)
This heap allocator also manages the underlying physical memory,
allowing the host to top up the allocator's pool and reclaim memory.
hyp_alloc_topup(), hyp_alloc_reclaim().
Pages remain mapped in the allocator's VA-space as long as they are not
reclaimed.
When the allocator runs out of memory, hyp_alloc() fails and
hyp_alloc_errno() returns -ENOMEM to signal that a top-up is required.
Additionally, harden the allocator by hashing chunk headers to detect
metadata corruption.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/kvm/hyp/include/nvhe/alloc.h b/arch/arm64/kvm/hyp/include/nvhe/alloc.h
new file mode 100644
index 000000000000..8f87a63f8946
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/alloc.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_NVHE_ALLOC__
+#define __KVM_NVHE_ALLOC__
+#include <linux/types.h>
+
+#include <asm/kvm_host.h>
+
+void *hyp_alloc(size_t size);
+int hyp_alloc_errno(void);
+u32 hyp_alloc_topup_needed(void);
+void hyp_free(void *addr);
+
+int hyp_alloc_init(size_t size);
+int hyp_alloc_topup(struct kvm_hyp_memcache *host_mc);
+unsigned long hyp_alloc_reclaimable(void);
+void hyp_alloc_reclaim(struct kvm_hyp_memcache *host_mc, unsigned long target);
+#endif
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 62cdfbff7562..66362bfa7061 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -23,7 +23,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
CFLAGS_switch.nvhe.o += -Wno-override-init
hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
- hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
+ hyp-main.o hyp-smp.o psci-relay.o alloc.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
diff --git a/arch/arm64/kvm/hyp/nvhe/alloc.c b/arch/arm64/kvm/hyp/nvhe/alloc.c
new file mode 100644
index 000000000000..183336f297c3
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/alloc.c
@@ -0,0 +1,1037 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026 Google LLC
+ * Author: Vincent Donnefort <vdonnefort@google.com>
+ *
+ * This heap allocator manages a reserved VA space range, dynamically mapping
+ * and unmapping physical pages on-demand to minimise the pKVM hypervisor
+ * footprint. As memory is reclaimed and relinquished to the host, unmapped
+ * holes are introduced within the VA space. To prevent orphans mapped regions,
+ * neighboring unused chunks cannot be merged if they are separated by an
+ * unmapped region.
+ *
+ */
+
+#include <nvhe/alloc.h>
+#include <nvhe/mem_protect.h>
+#include <nvhe/mm.h>
+#include <nvhe/spinlock.h>
+
+#include <linux/build_bug.h>
+#include <linux/hash.h>
+
+#define MIN_ALLOC_SIZE 8UL /* Must be a power of two */
+
+/**
+ * struct chunk_hdr - Chunk header
+ * @next: offset from this chunk header to the next one.
+ * @prev: offset from this chunk header to the previous one.
+ * @__unmapped: Internal field containing the offset to the unmapped page
+ * boundary, multiplexed with the allocation state flag.
+ * @hash: Hash computed over the chunk header.
+ */
+struct chunk_hdr {
+ u32 next;
+ u32 prev;
+#define USED_BIT_MASK 1U
+ u32 __unmapped;
+ u32 hash;
+ char data[];
+} __aligned(MIN_ALLOC_SIZE);
+
+/**
+ * struct hyp_allocator - Heap allocator
+ * @start: Start in the allocator's reserved virtual address range.
+ * @end: End in the allocator's reserved virtual address range.
+ * @last_used: Pointer to the end of the last used chunk. This is
+ * necessary for the last chunk in the list as the
+ * allocated size of a chunk is derived from the next one.
+ * @first_unmapped: Pointer to the first unmapped page in the
+ * allocator's range. This is only necessary and
+ * updated when no chunk is in the list.
+ * @head: Head of the chunk list.
+ * @tail: Tail of the chunk list.
+ * @mc: Memcache containing pre-allocated pages for mapping.
+ * @lock: Spinlock protecting the allocator state.
+ * @errno: Per-CPU error code for allocation failures.
+ * @topup_needed: Per-CPU page counter needed to top-up the memcache.
+ */
+struct hyp_allocator {
+ void *start;
+ void *end;
+ void *last_used;
+ void *first_unmapped;
+ struct chunk_hdr *head;
+ struct chunk_hdr *tail;
+ struct kvm_hyp_memcache mc;
+ hyp_spinlock_t lock;
+ int __percpu *errno;
+ u32 __percpu *topup_needed;
+};
+
+static u32 chunk_hash_compute(const struct chunk_hdr *chunk)
+{
+ u32 hash = 0;
+
+ BUILD_BUG_ON(sizeof(*chunk) != 16);
+
+ hash ^= hash_64(*(const u64 *)chunk, 32);
+ hash ^= hash_32(chunk->__unmapped, 32);
+ return hash;
+}
+
+static void chunk_set_hash(struct chunk_hdr *chunk)
+{
+ if (chunk)
+ chunk->hash = chunk_hash_compute(chunk);
+}
+
+static void chunk_check_hash(const struct chunk_hdr *chunk)
+{
+ if (chunk)
+ WARN_ON(chunk->hash != chunk_hash_compute(chunk));
+}
+
+static bool chunk_is_used(const struct chunk_hdr *chunk)
+{
+ return !!(chunk->__unmapped & USED_BIT_MASK);
+}
+
+static void chunk_set_used(struct chunk_hdr *chunk)
+{
+ chunk->__unmapped |= USED_BIT_MASK;
+}
+
+static void chunk_set_unused(struct chunk_hdr *chunk)
+{
+ chunk->__unmapped &= ~USED_BIT_MASK;
+}
+
+static void *chunk_unmapped(const struct chunk_hdr *chunk)
+{
+ u32 offset = chunk->__unmapped & ~USED_BIT_MASK;
+
+ if (!offset)
+ return NULL;
+
+ return (void *)chunk + offset;
+}
+
+static void __chunk_set_unmapped(struct chunk_hdr *chunk, u32 unmapped)
+{
+ chunk->__unmapped = unmapped | (chunk_is_used(chunk) ? USED_BIT_MASK : 0);
+}
+
+static void chunk_set_unmapped(struct chunk_hdr *chunk, void *unmapped)
+{
+ WARN_ON(!PAGE_ALIGNED(unmapped));
+
+ if (unmapped) {
+ WARN_ON((void *)chunk > unmapped);
+ __chunk_set_unmapped(chunk, unmapped - (void *)chunk);
+ } else {
+ __chunk_set_unmapped(chunk, 0);
+ }
+}
+
+static void *chunk_data(const struct chunk_hdr *chunk)
+{
+ return (void *)&chunk->data;
+}
+
+static struct chunk_hdr *__chunk_next(const struct chunk_hdr *chunk)
+{
+ if (!chunk->next)
+ return NULL;
+
+ return (struct chunk_hdr *)((void *)chunk + chunk->next);
+}
+
+static struct chunk_hdr *__chunk_prev(const struct chunk_hdr *chunk)
+{
+ if (!chunk->prev)
+ return NULL;
+
+ return (struct chunk_hdr *)((void *)chunk - chunk->prev);
+}
+
+static void chunk_set_next(struct chunk_hdr *chunk, struct chunk_hdr *next)
+{
+ if (!chunk)
+ return;
+
+ if (next) {
+ WARN_ON(chunk > next);
+ chunk->next = (void *)next - (void *)chunk;
+ } else {
+ chunk->next = 0;
+ }
+}
+
+static void chunk_set_prev(struct chunk_hdr *chunk, struct chunk_hdr *prev)
+{
+ if (!chunk)
+ return;
+
+ if (prev) {
+ WARN_ON(chunk < prev);
+ chunk->prev = (void *)chunk - (void *)prev;
+ } else {
+ chunk->prev = 0;
+ }
+}
+
+static struct chunk_hdr *chunk_get_next(const struct chunk_hdr *chunk)
+{
+ struct chunk_hdr *next = __chunk_next(chunk);
+
+ chunk_check_hash(next);
+ return next;
+}
+
+static struct chunk_hdr *chunk_get_prev(const struct chunk_hdr *chunk)
+{
+ struct chunk_hdr *prev = __chunk_prev(chunk);
+
+ chunk_check_hash(prev);
+ return prev;
+}
+
+static struct chunk_hdr *chunk_get(struct chunk_hdr *chunk)
+{
+ chunk_check_hash(chunk);
+ return chunk;
+}
+
+#define chunk_hdr_size() \
+ offsetof(struct chunk_hdr, data)
+
+#define chunk_min_size() \
+ (chunk_hdr_size() + MIN_ALLOC_SIZE)
+
+static size_t chunk_data_size(const struct chunk_hdr *chunk, struct hyp_allocator *allocator)
+{
+ struct chunk_hdr *next = chunk_get_next(chunk);
+ void *end;
+
+ if (next)
+ end = (void *)next;
+ else
+ end = allocator->end;
+
+ return end - chunk_data(chunk);
+}
+
+static size_t chunk_mapped_data_size(const struct chunk_hdr *chunk, struct hyp_allocator *allocator)
+{
+ void *unmapped = chunk_unmapped(chunk);
+
+ if (!unmapped)
+ return chunk_data_size(chunk, allocator);
+
+ return unmapped - chunk_data(chunk);
+}
+
+static size_t chunk_used_size(const struct chunk_hdr *chunk, struct hyp_allocator *allocator)
+{
+ struct chunk_hdr *next = chunk_get_next(chunk);
+
+ if (!chunk_is_used(chunk))
+ return 0;
+
+ if (next)
+ return chunk_mapped_data_size(chunk, allocator);
+
+ return allocator->last_used - chunk_data(chunk);
+}
+
+static void chunk_list_insert(struct chunk_hdr *chunk, struct chunk_hdr *prev)
+{
+ struct chunk_hdr *next = NULL;
+
+ WARN_ON(!chunk);
+
+ if (prev) {
+ next = chunk_get_next(prev);
+ chunk_set_next(prev, chunk);
+ chunk_set_hash(prev);
+ }
+
+ if (next) {
+ chunk_set_prev(next, chunk);
+ chunk_set_hash(next);
+ }
+
+ chunk_set_next(chunk, next);
+ chunk_set_prev(chunk, prev);
+}
+
+static void chunk_list_del(struct chunk_hdr *chunk)
+{
+ struct chunk_hdr *prev, *next;
+
+ WARN_ON(!chunk);
+
+ prev = chunk_get_prev(chunk);
+ next = chunk_get_next(chunk);
+
+ if (prev) {
+ chunk_set_next(prev, next);
+ chunk_set_hash(prev);
+ }
+
+ if (next) {
+ chunk_set_prev(next, prev);
+ chunk_set_hash(next);
+ }
+}
+
+/*
+ * Return a fixup start address for chunk creation. It makes sure the chunk
+ * header doesn't cross any page boundary and that it leaves enough space at the
+ * start of page. This is intended to prevent orphan mapped regions during chunk
+ * memory reclaim
+ */
+static void *chunk_start(void *start)
+{
+ void *page = PTR_ALIGN(start, PAGE_SIZE);
+
+ if (page - start < chunk_hdr_size())
+ return page;
+
+ page = PTR_ALIGN_DOWN(start, PAGE_SIZE);
+ if (start - page < chunk_min_size())
+ return page + chunk_min_size();
+
+ return start;
+}
+
+static int hyp_allocator_map(struct hyp_allocator *allocator, struct chunk_hdr *chunk,
+ struct chunk_hdr *next,
+ void *addr, void *end)
+{
+ void *unmapped = chunk ? chunk_unmapped(chunk) : allocator->first_unmapped;
+
+ /*
+ * hyp_allocator_can_create_chunk() already validates addr/end
+ * belong to the chunk.
+ */
+ WARN_ON(end <= addr);
+
+ /* The chunk does not span an unmapped region */
+ if (!unmapped)
+ return 0;
+
+ while (unmapped < end) {
+ void *page = pop_hyp_memcache(&allocator->mc, hyp_phys_to_virt);
+ int ret;
+
+ if (!page) {
+ end = PTR_ALIGN(end, PAGE_SIZE);
+ *this_cpu_ptr(allocator->topup_needed) =
+ (unsigned long)(end - unmapped) >> PAGE_SHIFT;
+ return -ENOMEM;
+ }
+
+ ret = __hyp_allocator_map(unmapped, hyp_virt_to_phys(page));
+ if (ret) {
+ push_hyp_memcache(&allocator->mc, page, hyp_virt_to_phys);
+ return ret;
+ }
+
+ unmapped += PAGE_SIZE;
+
+ /*
+ * Reset the unmap field if we've reached the next chunk or the
+ * allocator boundary.
+ */
+ if (unmapped == (next ?: allocator->end))
+ unmapped = 0;
+
+ if (chunk) {
+ chunk_set_unmapped(chunk, unmapped);
+ chunk_set_hash(chunk);
+ } else {
+ allocator->first_unmapped = unmapped;
+ }
+
+ if (!unmapped)
+ break;
+ }
+
+ return 0;
+}
+
+static void hyp_allocator_unmap(struct hyp_allocator *allocator, struct chunk_hdr *chunk,
+ void *addr, void *end)
+{
+ void *unmap = addr;
+
+ /*
+ * hyp_allocator_chunk_reclaimable() already computes valid addr/end, no
+ * need to check them again
+ */
+ WARN_ON(end <= addr);
+
+ while (unmap < end) {
+ phys_addr_t pa = __pkvm_private_range_pa((void *)unmap);
+ void *page = hyp_phys_to_virt(pa);
+
+ push_hyp_memcache(&allocator->mc, page, hyp_virt_to_phys);
+ unmap += PAGE_SIZE;
+ }
+
+ pkvm_remove_mappings((void *)addr, (void *)(end));
+
+ if (chunk) {
+ chunk_set_unmapped(chunk, addr);
+ chunk_set_hash(chunk);
+ } else {
+ allocator->first_unmapped = addr;
+ }
+}
+
+static bool hyp_allocator_can_create_chunk(struct hyp_allocator *allocator,
+ const struct chunk_hdr *prev,
+ const struct chunk_hdr *next,
+ void *addr, void *end)
+{
+ void *page, *unmapped;
+
+ if (addr < allocator->start || end > allocator->end)
+ return false;
+
+ /* First chunk created must be installed at allocator->start */
+ if (!prev)
+ return addr == allocator->start;
+
+ /* Must not overwrite the next chunk */
+ if (next && end > (void *)next)
+ return false;
+
+ /* Must not overwrite the previous chunk */
+ if (addr < (chunk_data(prev) + chunk_used_size(prev, allocator)))
+ return false;
+
+ /* Header must not cross page boundaries */
+ page = PTR_ALIGN(addr, PAGE_SIZE);
+ if (page != addr && (page - addr) < chunk_hdr_size())
+ return false;
+
+ /* Must leave a minimum distance from a page-start to maximise reclaim */
+ page = PTR_ALIGN_DOWN(addr, PAGE_SIZE);
+ if (page != addr && (addr - page) < chunk_min_size())
+ return false;
+
+ unmapped = chunk_unmapped(prev);
+ if (!unmapped)
+ return true;
+
+ /* Must never create an orphan mapped region */
+ if (addr > unmapped)
+ return false;
+
+ return true;
+}
+
+/*
+ * Tries to create a new chunk in the allocator whose header starts at @addr and
+ * whose data finishes at @end.
+ */
+static struct chunk_hdr *hyp_allocator_create_chunk(struct hyp_allocator *allocator,
+ struct chunk_hdr *prev, void *addr,
+ void *end, bool used)
+{
+ struct chunk_hdr *next, *chunk = addr;
+ void *unmapped;
+ int ret;
+
+ if (end > allocator->end)
+ return ERR_PTR(-E2BIG);
+
+ next = prev ? chunk_get_next(prev) : NULL;
+ if (!hyp_allocator_can_create_chunk(allocator, prev, next, addr, end))
+ return ERR_PTR(-EINVAL);
+
+ ret = hyp_allocator_map(allocator, prev, next, addr, end);
+ if (ret)
+ return ERR_PTR(ret);
+
+ memset(chunk, 0, sizeof(*chunk));
+ if (used)
+ chunk_set_used(chunk);
+ else
+ chunk_set_unused(chunk);
+
+ /* First chunk, first allocation */
+ if (!prev) {
+ chunk_set_unmapped(chunk, allocator->first_unmapped);
+ chunk_list_insert(chunk, NULL);
+ chunk_set_hash(chunk);
+
+ allocator->last_used = end;
+ allocator->head = allocator->tail = chunk;
+ return chunk;
+ }
+
+ /* Last chunk in the list */
+ if (!next) {
+ allocator->last_used = end;
+ allocator->tail = chunk;
+ }
+
+ /* Inherit prev's unmapped region */
+ unmapped = chunk_unmapped(prev);
+ chunk_set_unmapped(chunk, unmapped);
+ chunk_list_insert(chunk, prev);
+ chunk_set_hash(chunk);
+
+ chunk_set_unmapped(prev, 0);
+ chunk_set_hash(prev);
+
+ return chunk;
+}
+
+static bool hyp_allocator_can_destroy_chunk(struct hyp_allocator *allocator,
+ const struct chunk_hdr *prev,
+ const struct chunk_hdr *next,
+ const struct chunk_hdr *chunk)
+{
+ if (chunk_is_used(chunk))
+ return false;
+
+ /* Last chunk in the allocator */
+ if (!prev)
+ return true;
+
+ /* Can't merge down unless we are the last one in the list */
+ if (next && chunk_is_used(prev))
+ return false;
+
+ /* Must never create an orphan mapped region */
+ if (chunk_unmapped(prev))
+ return false;
+
+ return true;
+}
+
+static int hyp_allocator_destroy_chunk(struct hyp_allocator *allocator,
+ struct chunk_hdr *prev,
+ struct chunk_hdr *chunk)
+{
+ struct chunk_hdr *next;
+
+ next = prev ? chunk_get_next(chunk) : NULL;
+ if (!hyp_allocator_can_destroy_chunk(allocator, prev, next, chunk))
+ return -EINVAL;
+
+ /* Last chunk in the allocator */
+ if (!prev) {
+ allocator->first_unmapped = chunk_unmapped(chunk);
+ allocator->head = allocator->tail = NULL;
+ return 0;
+ }
+
+ /* Last chunk in the list */
+ if (!next) {
+ allocator->last_used = chunk;
+ allocator->tail = prev;
+ }
+
+ chunk_set_unmapped(prev, chunk_unmapped(chunk));
+ chunk_set_hash(prev);
+ chunk_list_del(chunk);
+
+ return 0;
+}
+
+/*
+ * Return the best unused chunk for recycling, that is the smallest chunk
+ * fitting the allocation which needs to use the least unmapped region.
+ */
+static struct chunk_hdr *hyp_allocator_find_efficient_chunk(struct hyp_allocator *allocator,
+ size_t size)
+{
+ struct chunk_hdr *chunk, *best_chunk = NULL;
+ size_t best_data_size = SIZE_MAX;
+ size_t best_missing = SIZE_MAX;
+
+ chunk = allocator->head;
+ while (chunk) {
+ size_t missing, mapped, data_size;
+
+ if (chunk_is_used(chunk))
+ goto next;
+
+ data_size = chunk_data_size(chunk, allocator);
+ if (data_size < size)
+ goto next;
+
+ mapped = chunk_mapped_data_size(chunk, allocator);
+ missing = (size > mapped) ? DIV_ROUND_UP(size - mapped, PAGE_SIZE) : 0;
+ if (missing > best_missing)
+ goto next;
+
+ if (data_size >= best_data_size)
+ goto next;
+
+ best_missing = missing;
+ best_data_size = data_size;
+ best_chunk = chunk;
+
+next:
+ chunk = chunk_get_next(chunk);
+ }
+
+ return best_chunk;
+}
+
+static struct chunk_hdr *hyp_allocator_reuse_chunk(struct hyp_allocator *allocator,
+ struct chunk_hdr *chunk, size_t size)
+{
+ struct chunk_hdr *next = chunk_get_next(chunk);
+ void *start, *end, *split, *split_end;
+ int ret;
+
+ start = chunk_data(chunk);
+ end = start + size;
+
+ /* Last chunk in the list, no need to split */
+ if (!next) {
+ split = split_end = NULL;
+ allocator->last_used = chunk_data(chunk) + size;
+ } else {
+ split = chunk_start(end);
+ split_end = split + chunk_min_size();
+
+ if (!hyp_allocator_can_create_chunk(allocator, chunk, next, split, split_end))
+ split = split_end = NULL;
+ }
+
+ /* Batch the mapping of the reused chunk and the split */
+ ret = hyp_allocator_map(allocator, chunk, next, chunk_data(chunk), split ? split_end : end);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (split)
+ WARN_ON(IS_ERR_OR_NULL(
+ hyp_allocator_create_chunk(allocator, chunk, split, split_end, false)));
+
+ chunk_set_used(chunk);
+ chunk_set_hash(chunk);
+
+ return chunk;
+}
+
+static void *hyp_allocator_alloc(struct hyp_allocator *allocator, size_t size)
+{
+ struct chunk_hdr *chunk = ERR_PTR(-E2BIG);
+ void *start, *end;
+
+ size = max(size, MIN_ALLOC_SIZE);
+
+ /* Ensure we do not overflow ALIGN(MIN_ALLOC_SIZE) */
+ if (size > U32_MAX)
+ goto errno;
+
+ size = ALIGN(size, MIN_ALLOC_SIZE);
+ if (size > (allocator->end - allocator->start - chunk_hdr_size()))
+ goto errno;
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+ /* The allocator can modify the hyp stage-1 */
+ if (WARN_ON(hyp_spin_is_locked(&pkvm_pgd_lock))) {
+ chunk = ERR_PTR(-EINVAL);
+ goto errno;
+ }
+#endif
+ hyp_spin_lock(&allocator->lock);
+
+ /* First allocation */
+ if (!allocator->head) {
+ start = allocator->start;
+ end = start + chunk_hdr_size() + size;
+ chunk = hyp_allocator_create_chunk(allocator, NULL, start, end, true);
+ goto unlock;
+ }
+
+ chunk = hyp_allocator_find_efficient_chunk(allocator, size);
+
+ /* Nothing found, create a new chunk at the end in the list */
+ if (!chunk) {
+ start = chunk_start(chunk_data(allocator->tail) +
+ chunk_used_size(allocator->tail, allocator));
+ end = start + chunk_hdr_size() + size;
+ chunk = hyp_allocator_create_chunk(allocator, allocator->tail, start, end, true);
+ goto unlock;
+ }
+
+ chunk = hyp_allocator_reuse_chunk(allocator, chunk, size);
+
+unlock:
+ hyp_spin_unlock(&allocator->lock);
+
+errno:
+ if (IS_ERR_OR_NULL(chunk)) {
+ int errno = IS_ERR(chunk) ? PTR_ERR(chunk) : -EINVAL;
+
+ *this_cpu_ptr(allocator->errno) = errno;
+ return NULL;
+ }
+
+ memset(chunk_data(chunk), 0, size);
+ return chunk_data(chunk);
+}
+
+static void hyp_allocator_free(struct hyp_allocator *allocator, void *data)
+{
+ struct chunk_hdr *chunk, *next, *prev;
+
+ if (!data)
+ return;
+
+ WARN_ON(!IS_ALIGNED((unsigned long)data, MIN_ALLOC_SIZE));
+ WARN_ON(data >= allocator->end || data < allocator->start + chunk_hdr_size());
+
+ hyp_spin_lock(&allocator->lock);
+
+ chunk = chunk_get(container_of(data, struct chunk_hdr, data));
+ WARN_ON(!chunk_is_used(chunk));
+ chunk_set_unused(chunk);
+ chunk_set_hash(chunk);
+
+ next = chunk_get_next(chunk);
+ if (next)
+ hyp_allocator_destroy_chunk(allocator, chunk, next);
+
+ prev = chunk_get_prev(chunk);
+ if (prev)
+ hyp_allocator_destroy_chunk(allocator, prev, chunk);
+
+ hyp_spin_unlock(&allocator->lock);
+}
+
+static unsigned long hyp_allocator_chunk_reclaimable(struct hyp_allocator *allocator,
+ const struct chunk_hdr *chunk,
+ u64 *__addr, u64 *__end)
+{
+ struct chunk_hdr *next;
+ void *addr, *end;
+
+ /* Last chunk in the allocator */
+ if (chunk == allocator->head && chunk == allocator->tail && !chunk_is_used(chunk)) {
+ addr = (void *)chunk;
+ end = chunk_unmapped(chunk);
+ if (!end)
+ end = allocator->end;
+ goto end;
+ }
+
+ next = chunk_get_next(chunk);
+
+ /* Last chunk in the list we can reclaim, even if used */
+ if (!next) {
+ addr = chunk_data(chunk) + chunk_used_size(chunk, allocator);
+ addr = PTR_ALIGN(addr, PAGE_SIZE);
+ end = chunk_unmapped(chunk);
+ if (!end)
+ end = allocator->end;
+ goto end;
+ }
+
+ if (chunk_is_used(chunk))
+ return 0;
+
+ addr = PTR_ALIGN(chunk_data(chunk), PAGE_SIZE);
+ end = chunk_unmapped(chunk);
+ if (!end)
+ end = PTR_ALIGN_DOWN(next, PAGE_SIZE);
+
+end:
+ if (addr >= end)
+ return 0;
+
+ if (__end)
+ *__end = (u64)end;
+ if (__addr)
+ *__addr = (u64)addr;
+
+ return (end - addr) >> PAGE_SHIFT;
+}
+
+static void hyp_allocator_reclaim_chunk(struct hyp_allocator *allocator, struct chunk_hdr *chunk,
+ void *addr, void *end)
+{
+ struct chunk_hdr *next;
+
+ WARN_ON(end <= addr);
+
+ /* We are about to destroy the last chunk in the allocator */
+ if (addr == allocator->start) {
+ allocator->tail = allocator->head = chunk = NULL;
+ goto unmap;
+ }
+
+ next = chunk_get_next(chunk);
+
+ /*
+ * Split the reclaimed chunk at the next page boundary,
+ * this ensures no orphan mapped region is created. Splitting at the page boundary is always
+ * possible because chunks always leave a minimum distance to the page start.
+ *
+ * +--------------+
+ * |______________|
+ * |______________|<- Next chunk
+ * |_ _ _ __ _ _ _|
+ * | |<- Page-aligned split
+ * +--------------+
+ * +--------------+
+ * | |
+ * | |<- Page reclaimed
+ * | |
+ * | |
+ * +--------------+
+ * +--------------+
+ * | |
+ * |______________|
+ * |______________|<- Chunk to split
+ * | |
+ * +--------------+
+ */
+ if (next && !chunk_unmapped(chunk) && next != end)
+ WARN_ON(IS_ERR_OR_NULL(hyp_allocator_create_chunk(allocator, chunk, end, next,
+ false)));
+unmap:
+ hyp_allocator_unmap(allocator, chunk, addr, end);
+}
+
+/*
+ * Return the best reclaimable chunk which is the highest chunk in the list
+ * with the biggest reclaimable region.
+ */
+static struct chunk_hdr *hyp_allocator_find_reclaimable_chunk(struct hyp_allocator *allocator,
+ u64 *addr, u64 *end)
+{
+ struct chunk_hdr *chunk, *best_chunk = NULL;
+ unsigned long best_reclaimable = 0;
+
+ chunk = allocator->head;
+ while (chunk) {
+ u64 __addr, __end;
+ unsigned long reclaimable = hyp_allocator_chunk_reclaimable(allocator, chunk,
+ &__addr, &__end);
+
+ /* Favour the top biggest chunks */
+ if (reclaimable && reclaimable >= best_reclaimable) {
+ best_reclaimable = reclaimable;
+ best_chunk = chunk;
+ *addr = __addr;
+ *end = __end;
+ }
+
+ chunk = chunk_get_next(chunk);
+ }
+
+ return best_chunk;
+}
+
+static unsigned long hyp_allocator_drain_memcache(struct hyp_allocator *allocator,
+ struct kvm_hyp_memcache *host_mc,
+ unsigned long target)
+{
+ struct kvm_hyp_memcache *mc = &allocator->mc;
+ unsigned long drained = 0;
+
+ while (target && mc->nr_pages) {
+ void *page = pop_hyp_memcache(mc, hyp_phys_to_virt);
+
+ memset(page, 0, PAGE_SIZE);
+ kvm_flush_dcache_to_poc(page, PAGE_SIZE);
+ push_hyp_memcache(host_mc, page, hyp_virt_to_phys);
+ WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(page), 1));
+
+ target--;
+ drained++;
+ }
+
+ return drained;
+}
+
+static void hyp_allocator_reclaim(struct hyp_allocator *allocator, struct kvm_hyp_memcache *host_mc,
+ unsigned long target)
+{
+ if (!target)
+ return;
+
+ hyp_spin_lock(&allocator->lock);
+
+ target -= hyp_allocator_drain_memcache(allocator, host_mc, target);
+ if (!target)
+ goto unlock;
+
+ do {
+ unsigned long reclaimable;
+ struct chunk_hdr *chunk;
+ u64 addr, end;
+
+ chunk = hyp_allocator_find_reclaimable_chunk(allocator, &addr, &end);
+ if (!chunk)
+ break;
+
+ reclaimable = min((end - addr) >> PAGE_SHIFT, target);
+ addr = end - (reclaimable << PAGE_SHIFT);
+ hyp_allocator_reclaim_chunk(allocator, chunk, (void *)addr, (void *)end);
+
+ target -= reclaimable;
+ } while (target);
+
+ hyp_allocator_drain_memcache(allocator, host_mc, ULONG_MAX);
+
+unlock:
+ hyp_spin_unlock(&allocator->lock);
+}
+
+static unsigned long hyp_allocator_reclaimable(struct hyp_allocator *allocator)
+{
+ unsigned long reclaimable = 0;
+ struct chunk_hdr *chunk;
+
+ hyp_spin_lock(&allocator->lock);
+
+ chunk = allocator->head;
+ while (chunk) {
+ reclaimable += hyp_allocator_chunk_reclaimable(allocator, chunk, NULL, NULL);
+ chunk = chunk_get_next(chunk);
+ }
+
+ hyp_spin_unlock(&allocator->lock);
+
+ return reclaimable;
+}
+
+static int hyp_allocator_topup(struct hyp_allocator *allocator,
+ struct kvm_hyp_memcache *host_mc)
+{
+ struct kvm_hyp_memcache *alloc_mc = &allocator->mc;
+ int ret;
+
+ hyp_spin_lock(&allocator->lock);
+ ret = refill_memcache(alloc_mc, host_mc->nr_pages + alloc_mc->nr_pages, host_mc);
+ hyp_spin_unlock(&allocator->lock);
+
+ return ret;
+}
+
+static u32 hyp_allocator_topup_needed(struct hyp_allocator *allocator)
+{
+ u32 *topup_needed = this_cpu_ptr(allocator->topup_needed);
+ u32 ret = *topup_needed;
+
+ *topup_needed = 0;
+
+ return ret;
+}
+
+static int hyp_allocator_errno(struct hyp_allocator *allocator)
+{
+ int *errno = this_cpu_ptr(allocator->errno);
+ int ret = *errno;
+
+ *errno = 0;
+
+ return ret;
+}
+
+
+static int hyp_allocator_init(struct hyp_allocator *allocator, size_t size)
+{
+ unsigned long start;
+ int ret;
+
+ size = PAGE_ALIGN(size);
+
+ /* constrained by chunk_hdr u32 types */
+ if (size > U32_MAX || !size)
+ return -EINVAL;
+
+ ret = pkvm_alloc_private_va_range(size, &start);
+ if (ret)
+ return ret;
+
+ allocator->first_unmapped = allocator->start = (void *)start;
+ allocator->end = allocator->start + size;
+ hyp_spin_lock_init(&allocator->lock);
+
+ return 0;
+}
+
+static DEFINE_PER_CPU(int, __hyp_allocator_errno);
+static DEFINE_PER_CPU(u32, __hyp_allocator_topup_needed);
+
+static struct hyp_allocator hyp_allocator = {
+ .errno = &__hyp_allocator_errno,
+ .topup_needed = &__hyp_allocator_topup_needed,
+};
+
+/**
+ * hyp_alloc() - Allocate memory from the heap allocator
+ *
+ * @size: Allocation size in bytes.
+ *
+ * Return: A pointer to the allocated memory on success, else NULL.
+ */
+void *hyp_alloc(size_t size)
+{
+ return hyp_allocator_alloc(&hyp_allocator, size);
+}
+
+/**
+ * hyp_free() - Free memory allocated with hyp_alloc()
+ *
+ * @data: Address returned by the original hyp_alloc().
+ *
+ * The use of any other address than one returned by hyp_alloc() will cause a
+ * hypervisor panic.
+ */
+void hyp_free(void *data)
+{
+ hyp_allocator_free(&hyp_allocator, data);
+}
+
+/**
+ * hyp_alloc_errno() - Read the errno on allocation error
+ *
+ * Get the return code from an allocation failure.
+ *
+ * Return: -ENOMEM if the allocator needs a refill from the host, -E2BIG if
+ * there is no VA space left else 0.
+ */
+int hyp_alloc_errno(void)
+{
+ return hyp_allocator_errno(&hyp_allocator);
+}
+
+int hyp_alloc_init(size_t size)
+{
+ return hyp_allocator_init(&hyp_allocator, size);
+}
+
+void hyp_alloc_reclaim(struct kvm_hyp_memcache *mc, unsigned long target)
+{
+ hyp_allocator_reclaim(&hyp_allocator, mc, target);
+}
+
+unsigned long hyp_alloc_reclaimable(void)
+{
+ return hyp_allocator_reclaimable(&hyp_allocator);
+}
+
+int hyp_alloc_topup(struct kvm_hyp_memcache *host_mc)
+{
+ return hyp_allocator_topup(&hyp_allocator, host_mc);
+}
+
+u32 hyp_alloc_topup_needed(void)
+{
+ return hyp_allocator_topup_needed(&hyp_allocator);
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index d461981616d9..95ce7496e67f 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -10,6 +10,7 @@
#include <asm/kvm_pgtable.h>
#include <asm/kvm_pkvm.h>
+#include <nvhe/alloc.h>
#include <nvhe/early_alloc.h>
#include <nvhe/ffa.h>
#include <nvhe/gfp.h>
@@ -363,6 +364,10 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long *per_cpu_bas
if (ret)
return ret;
+ ret = hyp_alloc_init(SZ_128M);
+ if (ret)
+ return ret;
+
update_nvhe_init_params();
/* Jump in the idmap page to switch to the new page-tables */
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 05/17] KVM: arm64: Allow kvm_hyp_memcache usage outside of stage-2
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (3 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 04/17] KVM: arm64: Add a heap allocator " Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 06/17] KVM: arm64: Add topup interface for the pKVM heap allocator Vincent Donnefort
` (11 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Although currently limited to guest stage-2 page-table allocations,
struct kvm_hyp_memcache is a useful primitive for passing a list of
discontiguous pages between host and hypervisor.
Introduce init_hyp_memcache() to initialise a generic hyp memcache, and
init_hyp_stage2_memcache() for stage-2 specific memcaches. The generic
initialiser will be used to top up the upcoming pKVM heap allocator.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 65eead8362e0..15c5378b70a0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -91,9 +91,22 @@ struct kvm_hyp_memcache {
struct pkvm_mapping *mapping; /* only used from EL1 */
#define HYP_MEMCACHE_ACCOUNT_STAGE2 BIT(1)
+#define HYP_MEMCACHE_ACCOUNT_KMEMCG BIT(2)
unsigned long flags;
};
+static inline void init_hyp_memcache(struct kvm_hyp_memcache *mc)
+{
+ memset(mc, 0, sizeof(*mc));
+ mc->mapping = ZERO_SIZE_PTR; /* Prevent allocation, solely useful for stage2 memcache */
+}
+
+static inline void init_hyp_stage2_memcache(struct kvm_hyp_memcache *mc)
+{
+ memset(mc, 0, sizeof(*mc));
+ mc->flags = HYP_MEMCACHE_ACCOUNT_STAGE2 | HYP_MEMCACHE_ACCOUNT_KMEMCG;
+}
+
static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
phys_addr_t *p,
phys_addr_t (*to_pa)(void *virt))
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d089c107d9b7..04dd442c127e 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1132,9 +1132,11 @@ static void hyp_mc_free_fn(void *addr, void *mc)
static void *hyp_mc_alloc_fn(void *mc)
{
struct kvm_hyp_memcache *memcache = mc;
+ gfp_t gfp = (memcache->flags & HYP_MEMCACHE_ACCOUNT_KMEMCG) ?
+ GFP_KERNEL_ACCOUNT : GFP_KERNEL;
void *addr;
- addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
+ addr = (void *)__get_free_page(gfp);
if (addr && memcache->flags & HYP_MEMCACHE_ACCOUNT_STAGE2)
kvm_account_pgtable_pages(addr, 1);
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 053e4f733e4b..8324a6a1bc48 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -111,7 +111,7 @@ static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
void *hyp_vcpu;
int ret;
- vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
+ init_hyp_stage2_memcache(&vcpu->arch.pkvm_memcache);
hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
if (!hyp_vcpu)
@@ -172,7 +172,7 @@ static int __pkvm_create_hyp_vm(struct kvm *kvm)
goto free_vm;
kvm->arch.pkvm.is_created = true;
- kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
+ init_hyp_stage2_memcache(&kvm->arch.pkvm.stage2_teardown_mc);
kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
return 0;
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 06/17] KVM: arm64: Add topup interface for the pKVM heap allocator
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (4 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 05/17] KVM: arm64: Allow kvm_hyp_memcache usage outside of stage-2 Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 07/17] KVM: arm64: Add pkvm_hyp_req infrastructure Vincent Donnefort
` (10 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Introduce a host HVC interface and a host side helper to allow refilling
the pKVM heap allocator.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 043495f7fc78..681b7bf8ac08 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -114,6 +114,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
__KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
+ __KVM_HOST_SMCCC_FUNC___pkvm_hyp_topup,
MARKER(__KVM_HOST_SMCCC_FUNC_MAX)
};
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 2954b311128c..879f1667ec67 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -17,6 +17,10 @@
#define HYP_MEMBLOCK_REGIONS 128
+enum pkvm_topup_id {
+ PKVM_TOPUP_HYP_ALLOC,
+};
+
int pkvm_init_host_vm(struct kvm *kvm, unsigned long type);
int pkvm_create_hyp_vm(struct kvm *kvm);
bool pkvm_hyp_vm_is_created(struct kvm *kvm);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 06db299c37a8..38ce834ca840 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -15,6 +15,7 @@
#include <asm/kvm_hypevents.h>
#include <asm/kvm_mmu.h>
+#include <nvhe/alloc.h>
#include <nvhe/ffa.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
@@ -613,6 +614,30 @@ static void handle___pkvm_finalize_teardown_vm(struct kvm_cpu_context *host_ctxt
cpu_reg(host_ctxt, 1) = __pkvm_finalize_teardown_vm(handle);
}
+static void handle___pkvm_hyp_topup(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(enum pkvm_topup_id, id, host_ctxt, 1);
+ DECLARE_REG(phys_addr_t, head, host_ctxt, 2);
+ DECLARE_REG(unsigned long, nr_pages, host_ctxt, 3);
+ struct kvm_hyp_memcache host_mc = {
+ .head = head,
+ .nr_pages = nr_pages,
+ };
+ int ret;
+
+ switch (id) {
+ case PKVM_TOPUP_HYP_ALLOC:
+ ret = hyp_alloc_topup(&host_mc);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ cpu_reg(host_ctxt, 1) = ret;
+ cpu_reg(host_ctxt, 2) = host_mc.head;
+ cpu_reg(host_ctxt, 3) = host_mc.nr_pages;
+}
+
static void handle___tracing_load(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, desc_hva, host_ctxt, 1);
@@ -743,6 +768,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_vcpu_load),
HANDLE_FUNC(__pkvm_vcpu_put),
HANDLE_FUNC(__pkvm_tlb_flush_vmid),
+ HANDLE_FUNC(__pkvm_hyp_topup),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 8324a6a1bc48..7abdc250b633 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -85,6 +85,29 @@ void __init kvm_hyp_reserve(void)
hyp_mem_base);
}
+static int pkvm_hyp_topup(enum pkvm_topup_id id, unsigned long nr_pages)
+{
+ struct arm_smccc_res res;
+ struct kvm_hyp_memcache mc;
+ int ret;
+
+ init_hyp_memcache(&mc);
+
+ ret = topup_hyp_memcache(&mc, nr_pages);
+ if (ret)
+ return ret;
+
+ arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_hyp_topup), id, mc.head,
+ mc.nr_pages, &res);
+ WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
+
+ mc.head = res.a2;
+ mc.nr_pages = res.a3;
+ free_hyp_memcache(&mc);
+
+ return res.a1;
+}
+
static void __pkvm_destroy_hyp_vm(struct kvm *kvm)
{
if (pkvm_hyp_vm_is_created(kvm)) {
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 07/17] KVM: arm64: Add pkvm_hyp_req infrastructure
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (5 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 06/17] KVM: arm64: Add topup interface for the pKVM heap allocator Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 08/17] KVM: arm64: Handle PKVM_HYP_REQ_HYP_ALLOC request Vincent Donnefort
` (9 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Introduce a struct pkvm_hyp_req to enable the pKVM hypervisor to request
resources from the host.
Provide serialisation helpers to transport these requests via SMCCC
registers (starting from a2):
pkvm_hyp_req_to_smccc() to encode into the SMCCC args.
smccc_to_pkvm_hyp_req() to decode them.
When the hypervisor raises a request, the host must handle it and retry
the HVC. To automate this sequence, introduce the pkvm_call_hyp_req()
macro. This intercepts pending requests, invokes the handler and retries
the HVC.
Additionally, introduce a trace event to track the handling of these
requests.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 879f1667ec67..fb4d140c99cc 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -204,6 +204,95 @@ struct pkvm_mapping {
u64 __subtree_last; /* Internal member for interval tree */
};
+enum pkvm_hyp_req_type {
+ PKVM_HYP_NO_REQ = 0,
+ __PKVM_HYP_REQ_TYPE_MAX,
+};
+
+#define PKVM_HYP_REQ_SMCCC_ARG_SIZE_MAX \
+ (sizeof(struct arm_smccc_res) - offsetof(struct arm_smccc_res, a2) - 1)
+
+struct pkvm_hyp_req {
+ u8 type;
+ union {
+ struct {
+ u32 nr_pages;
+ } mem;
+ struct {
+ /* Helper for SMCCC encoding/decoding */
+ u8 args[PKVM_HYP_REQ_SMCCC_ARG_SIZE_MAX];
+ } args;
+ };
+};
+
+static inline size_t pkvm_hyp_req_arg_size(u8 type)
+{
+ switch (type) {
+ case PKVM_HYP_NO_REQ:
+ return 0;
+ default:
+ WARN_ON(1);
+ }
+
+ return 0;
+}
+
+/* Encode the pending pkvm_hyp_req type into the SMCCC args */
+static inline void
+pkvm_hyp_req_to_smccc(struct kvm_cpu_context *host_ctxt, struct pkvm_hyp_req *req)
+{
+ u8 *dst, type = req->type;
+ size_t size;
+
+ if (type == PKVM_HYP_NO_REQ || type >= __PKVM_HYP_REQ_TYPE_MAX) {
+ host_ctxt->regs.regs[2] = 0;
+ return;
+ }
+
+ size = pkvm_hyp_req_arg_size(type);
+ if (WARN_ON(size > PKVM_HYP_REQ_SMCCC_ARG_SIZE_MAX))
+ return;
+
+ dst = (u8 *)&host_ctxt->regs.regs[2];
+ *dst = type;
+
+ memcpy(dst + 1, &req->args, size);
+}
+
+/* Return true if a pkvm_hyp_req has been decoded from the SMCCC args */
+static inline bool smccc_to_pkvm_hyp_req(struct pkvm_hyp_req *req, struct arm_smccc_res *res)
+{
+ u8 *src = (u8 *)&res->a2;
+ u8 type = *src;
+
+ if (type == PKVM_HYP_NO_REQ || type >= __PKVM_HYP_REQ_TYPE_MAX)
+ return false;
+
+ req->type = type;
+ memcpy(&req->args, src + 1, pkvm_hyp_req_arg_size(type));
+
+ return true;
+}
+
+int __pkvm_handle_smccc_req(struct arm_smccc_res *res);
+
+#define pkvm_call_hyp_req(f, ...) \
+({ \
+ struct arm_smccc_res res; \
+ int __ret; \
+ do { \
+ __ret = -1; \
+ arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f), ##__VA_ARGS__, &res); \
+ if (WARN_ON(res.a0 != SMCCC_RET_SUCCESS)) \
+ break; \
+ __ret = res.a1; \
+ if (!__ret) \
+ break; \
+ __ret = __pkvm_handle_smccc_req(&res); \
+ } while (!__ret); \
+ __ret; \
+})
+
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops);
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 7abdc250b633..ce96a6f90bd0 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -16,6 +16,9 @@
#include "hyp_constants.h"
+#define CREATE_TRACE_POINTS
+#include "trace_pkvm.h"
+
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
@@ -108,6 +111,28 @@ static int pkvm_hyp_topup(enum pkvm_topup_id id, unsigned long nr_pages)
return res.a1;
}
+static int pkvm_handle_hyp_req(struct pkvm_hyp_req *req)
+{
+ int ret = -EINVAL;
+
+ switch (req->type) {
+ }
+
+ trace_kvm_handle_pkvm_hyp_req(req, ret);
+
+ return ret;
+}
+
+int __pkvm_handle_smccc_req(struct arm_smccc_res *res)
+{
+ struct pkvm_hyp_req req;
+
+ if (smccc_to_pkvm_hyp_req(&req, res))
+ return pkvm_handle_hyp_req(&req);
+
+ return res->a1;
+}
+
static void __pkvm_destroy_hyp_vm(struct kvm *kvm)
{
if (pkvm_hyp_vm_is_created(kvm)) {
diff --git a/arch/arm64/kvm/trace_pkvm.h b/arch/arm64/kvm/trace_pkvm.h
new file mode 100644
index 000000000000..4bf57c12e7de
--- /dev/null
+++ b/arch/arm64/kvm/trace_pkvm.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_PKVM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PKVM_ARM64_KVM_H
+
+#include <linux/tracepoint.h>
+#include <asm/kvm_pkvm.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+TRACE_EVENT(kvm_handle_pkvm_hyp_req,
+ TP_PROTO(struct pkvm_hyp_req *req, int ret),
+ TP_ARGS(req, ret),
+
+ TP_STRUCT__entry(
+ __field(u8, type)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->type = req->type;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("type: %u ret: %d",
+ __entry->type, __entry->ret)
+);
+
+#endif /* _TRACE_PKVM_ARM64_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace_pkvm
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 08/17] KVM: arm64: Handle PKVM_HYP_REQ_HYP_ALLOC request
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (6 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 07/17] KVM: arm64: Add pkvm_hyp_req infrastructure Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 09/17] KVM: arm64: Add reclaim interface for the pKVM heap alloc Vincent Donnefort
` (8 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Introduce a new pkvm_hyp_request type asking the host to top up the pKVM
heap allocator.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index fb4d140c99cc..bf43235e62d3 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -206,6 +206,7 @@ struct pkvm_mapping {
enum pkvm_hyp_req_type {
PKVM_HYP_NO_REQ = 0,
+ PKVM_HYP_REQ_HYP_ALLOC,
__PKVM_HYP_REQ_TYPE_MAX,
};
@@ -227,9 +228,13 @@ struct pkvm_hyp_req {
static inline size_t pkvm_hyp_req_arg_size(u8 type)
{
+ struct pkvm_hyp_req *req;
+
switch (type) {
case PKVM_HYP_NO_REQ:
return 0;
+ case PKVM_HYP_REQ_HYP_ALLOC:
+ return sizeof(req->mem);
default:
WARN_ON(1);
}
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index ce96a6f90bd0..f5288a350069 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -116,6 +116,9 @@ static int pkvm_handle_hyp_req(struct pkvm_hyp_req *req)
int ret = -EINVAL;
switch (req->type) {
+ case PKVM_HYP_REQ_HYP_ALLOC:
+ ret = pkvm_hyp_topup(PKVM_TOPUP_HYP_ALLOC, req->mem.nr_pages);
+ break;
}
trace_kvm_handle_pkvm_hyp_req(req, ret);
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 09/17] KVM: arm64: Add reclaim interface for the pKVM heap alloc
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (7 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 08/17] KVM: arm64: Handle PKVM_HYP_REQ_HYP_ALLOC request Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 10/17] KVM: arm64: Add selftests for the pKVM heap allocator Vincent Donnefort
` (7 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Introduce a host interface to reclaim donated memory from the pKVM heap
allocator back to the host.
It specifically provides two helpers that will make it easier to
create a shrinker for pKVM:
pkvm_hyp_reclaimable()
pkvm_hyp_relaim()
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 681b7bf8ac08..b427ef790b15 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -115,6 +115,8 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
__KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
__KVM_HOST_SMCCC_FUNC___pkvm_hyp_topup,
+ __KVM_HOST_SMCCC_FUNC___pkvm_hyp_reclaim,
+ __KVM_HOST_SMCCC_FUNC___pkvm_hyp_reclaimable,
MARKER(__KVM_HOST_SMCCC_FUNC_MAX)
};
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index bf43235e62d3..ca3b5fc5f28f 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -21,6 +21,9 @@ enum pkvm_topup_id {
PKVM_TOPUP_HYP_ALLOC,
};
+unsigned long pkvm_hyp_reclaim(enum pkvm_topup_id id, unsigned long target);
+unsigned long pkvm_hyp_reclaimable(enum pkvm_topup_id id);
+
int pkvm_init_host_vm(struct kvm *kvm, unsigned long type);
int pkvm_create_hyp_vm(struct kvm *kvm);
bool pkvm_hyp_vm_is_created(struct kvm *kvm);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 38ce834ca840..20be0343abd4 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -638,6 +638,42 @@ static void handle___pkvm_hyp_topup(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 3) = host_mc.nr_pages;
}
+static void handle___pkvm_hyp_reclaim(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(enum pkvm_topup_id, id, host_ctxt, 1);
+ DECLARE_REG(unsigned long, target, host_ctxt, 2);
+ struct kvm_hyp_memcache host_mc = {};
+ int ret = 0;
+
+ switch (id) {
+ case PKVM_TOPUP_HYP_ALLOC:
+ hyp_alloc_reclaim(&host_mc, target);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ cpu_reg(host_ctxt, 1) = ret;
+ cpu_reg(host_ctxt, 2) = host_mc.head;
+ cpu_reg(host_ctxt, 3) = host_mc.nr_pages;
+}
+
+static void handle___pkvm_hyp_reclaimable(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(enum pkvm_topup_id, id, host_ctxt, 1);
+ unsigned long reclaimable = 0;
+
+ switch (id) {
+ case PKVM_TOPUP_HYP_ALLOC:
+ reclaimable = hyp_alloc_reclaimable();
+ break;
+ default:
+ reclaimable = 0;
+ }
+
+ cpu_reg(host_ctxt, 1) = reclaimable;
+}
+
static void handle___tracing_load(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, desc_hva, host_ctxt, 1);
@@ -769,6 +805,8 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_vcpu_put),
HANDLE_FUNC(__pkvm_tlb_flush_vmid),
HANDLE_FUNC(__pkvm_hyp_topup),
+ HANDLE_FUNC(__pkvm_hyp_reclaim),
+ HANDLE_FUNC(__pkvm_hyp_reclaimable),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index f5288a350069..f29134a1cc73 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -111,6 +111,30 @@ static int pkvm_hyp_topup(enum pkvm_topup_id id, unsigned long nr_pages)
return res.a1;
}
+unsigned long pkvm_hyp_reclaim(enum pkvm_topup_id id, unsigned long target)
+{
+ struct kvm_hyp_memcache mc;
+ struct arm_smccc_res res;
+ unsigned long reclaimed;
+
+ arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_hyp_reclaim), id, target, &res);
+ WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
+ if (WARN_ON_ONCE(res.a1))
+ return 0;
+
+ init_hyp_memcache(&mc);
+ mc.head = res.a2;
+ mc.nr_pages = reclaimed = res.a3;
+ free_hyp_memcache(&mc);
+
+ return reclaimed;
+}
+
+unsigned long pkvm_hyp_reclaimable(enum pkvm_topup_id id)
+{
+ return kvm_call_hyp_nvhe(__pkvm_hyp_reclaimable, id);
+}
+
static int pkvm_handle_hyp_req(struct pkvm_hyp_req *req)
{
int ret = -EINVAL;
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 10/17] KVM: arm64: Add selftests for the pKVM heap allocator
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (8 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 09/17] KVM: arm64: Add reclaim interface for the pKVM heap alloc Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 11/17] KVM: arm64: Add a shrinker for pKVM Vincent Donnefort
` (6 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Introduce a comprehensive runtime selftest for the pKVM hypervisor heap
allocator, executed during init when CONFIG_NVHE_EL2_DEBUG is enabled.
The selftest runs entirely at EL2 and exercises allocator's core
mechanisms:
* over-sized allocations
* basic allocation and alignment
* chunk recycling, splitting, merging
* memory reclaiming
* memory topup
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index b427ef790b15..07a46860c8b2 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -117,6 +117,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_hyp_topup,
__KVM_HOST_SMCCC_FUNC___pkvm_hyp_reclaim,
__KVM_HOST_SMCCC_FUNC___pkvm_hyp_reclaimable,
+ __KVM_HOST_SMCCC_FUNC___pkvm_hyp_alloc_selftest,
MARKER(__KVM_HOST_SMCCC_FUNC_MAX)
};
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index ca3b5fc5f28f..c1c9e8c1f5b6 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -19,6 +19,7 @@
enum pkvm_topup_id {
PKVM_TOPUP_HYP_ALLOC,
+ PKVM_TOPUP_HYP_ALLOC_SELFTEST,
};
unsigned long pkvm_hyp_reclaim(enum pkvm_topup_id id, unsigned long target);
@@ -210,6 +211,7 @@ struct pkvm_mapping {
enum pkvm_hyp_req_type {
PKVM_HYP_NO_REQ = 0,
PKVM_HYP_REQ_HYP_ALLOC,
+ PKVM_HYP_REQ_HYP_ALLOC_SELFTEST,
__PKVM_HYP_REQ_TYPE_MAX,
};
@@ -237,6 +239,7 @@ static inline size_t pkvm_hyp_req_arg_size(u8 type)
case PKVM_HYP_NO_REQ:
return 0;
case PKVM_HYP_REQ_HYP_ALLOC:
+ case PKVM_HYP_REQ_HYP_ALLOC_SELFTEST:
return sizeof(req->mem);
default:
WARN_ON(1);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/alloc.h b/arch/arm64/kvm/hyp/include/nvhe/alloc.h
index 8f87a63f8946..329250dad6f6 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/alloc.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/alloc.h
@@ -14,4 +14,11 @@ int hyp_alloc_init(size_t size);
int hyp_alloc_topup(struct kvm_hyp_memcache *host_mc);
unsigned long hyp_alloc_reclaimable(void);
void hyp_alloc_reclaim(struct kvm_hyp_memcache *host_mc, unsigned long target);
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+int hyp_allocator_selftest(void);
+u32 hyp_alloc_selftest_topup_needed(void);
+int hyp_alloc_selftest_topup(struct kvm_hyp_memcache *host_mc);
+void hyp_alloc_selftest_reclaim(struct kvm_hyp_memcache *host_mc, unsigned long target);
+#endif
#endif
diff --git a/arch/arm64/kvm/hyp/nvhe/alloc.c b/arch/arm64/kvm/hyp/nvhe/alloc.c
index 183336f297c3..ea79da743d71 100644
--- a/arch/arm64/kvm/hyp/nvhe/alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/alloc.c
@@ -1011,9 +1011,24 @@ int hyp_alloc_errno(void)
return hyp_allocator_errno(&hyp_allocator);
}
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static int selftest_init(void);
+#endif
+
int hyp_alloc_init(size_t size)
{
- return hyp_allocator_init(&hyp_allocator, size);
+ int ret;
+
+ ret = hyp_allocator_init(&hyp_allocator, size);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+ ret = selftest_init();
+ if (ret)
+ return ret;
+#endif
+ return 0;
}
void hyp_alloc_reclaim(struct kvm_hyp_memcache *mc, unsigned long target)
@@ -1035,3 +1050,184 @@ u32 hyp_alloc_topup_needed(void)
{
return hyp_allocator_topup_needed(&hyp_allocator);
}
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+#define SELFTEST_MAX_PAGES 6
+#define SELFTEST_MAX_SIZE (PAGE_SIZE * SELFTEST_MAX_PAGES)
+
+static DEFINE_PER_CPU(int, __selftest_errno);
+static DEFINE_PER_CPU(u32, __selftest_topup_needed);
+
+static struct hyp_allocator selftest_allocator = {
+ .errno = &__selftest_errno,
+ .topup_needed = &__selftest_topup_needed,
+ .lock = __HYP_SPIN_LOCK_UNLOCKED,
+};
+
+int hyp_alloc_selftest_topup(struct kvm_hyp_memcache *host_mc)
+{
+ return hyp_allocator_topup(&selftest_allocator, host_mc);
+}
+
+void hyp_alloc_selftest_reclaim(struct kvm_hyp_memcache *host_mc, unsigned long target)
+{
+ hyp_allocator_reclaim(&selftest_allocator, host_mc, target);
+}
+
+u32 hyp_alloc_selftest_topup_needed(void)
+{
+ return hyp_allocator_topup_needed(&selftest_allocator);
+}
+
+static int selftest_init(void)
+{
+ return hyp_allocator_init(&selftest_allocator, SELFTEST_MAX_SIZE);
+}
+
+static void *selftest_alloc(size_t size)
+{
+ return hyp_allocator_alloc(&selftest_allocator, size);
+}
+
+static void selftest_free(void *addr)
+{
+ hyp_allocator_free(&selftest_allocator, addr);
+}
+
+static int selftest_errno(void)
+{
+ return hyp_allocator_errno(&selftest_allocator);
+}
+
+int hyp_allocator_selftest(void)
+{
+ struct hyp_allocator *allocator = &selftest_allocator;
+ static DEFINE_HYP_SPINLOCK(selftest_lock);
+ struct kvm_hyp_memcache host_mc = { };
+ void *addr1, *addr2, *addr3, *addr4;
+ int ret = -EINVAL;
+
+ hyp_spin_lock(&selftest_lock);
+
+ if (allocator->mc.nr_pages < SELFTEST_MAX_PAGES) {
+ *this_cpu_ptr(allocator->topup_needed) = SELFTEST_MAX_PAGES -
+ allocator->mc.nr_pages;
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ selftest_alloc(SELFTEST_MAX_SIZE);
+ if (selftest_errno() != -E2BIG)
+ goto end;
+
+ selftest_alloc(SIZE_MAX);
+ if (selftest_errno() != -E2BIG)
+ goto end;
+
+ /* Test first chunk */
+ addr1 = selftest_alloc(0);
+ if (!addr1 || addr1 != (void *)allocator->start + chunk_hdr_size())
+ goto end;
+
+ /* Test second contiguous chunk with unaligned size */
+ addr2 = selftest_alloc(MIN_ALLOC_SIZE + 1);
+ if (!addr2)
+ goto end;
+ addr3 = selftest_alloc(0);
+ if (!addr3 ||
+ addr3 != addr2 + (2 * MIN_ALLOC_SIZE) + chunk_hdr_size())
+ goto end;
+
+ selftest_free(addr3);
+
+ /* Test chunk recycling */
+ selftest_free(addr1);
+ if (addr1 != selftest_alloc(0))
+ goto end;
+
+ /* Test chunk forward merging */
+ addr3 = selftest_alloc(0);
+ selftest_free(addr2);
+ selftest_free(addr1);
+ if (addr1 != selftest_alloc(MIN_ALLOC_SIZE * 2))
+ goto end;
+
+ selftest_free(addr1);
+
+ /* Test chunk splitting */
+ if (addr1 != selftest_alloc(0))
+ goto end;
+ if (addr2 != selftest_alloc(0))
+ goto end;
+
+ /* Test chunk backward merging */
+ selftest_free(addr1);
+ selftest_free(addr2);
+ if (addr1 != selftest_alloc(MIN_ALLOC_SIZE * 2))
+ goto end;
+
+ selftest_free(addr1);
+
+ /* Test chunk 3-way merging */
+ addr1 = selftest_alloc(0);
+ addr2 = selftest_alloc(0);
+ addr4 = selftest_alloc(0);
+ selftest_free(addr1);
+ selftest_free(addr3);
+ selftest_free(addr2);
+ if (addr1 != selftest_alloc(MIN_ALLOC_SIZE * 3))
+ goto end;
+
+ selftest_free(addr4);
+ selftest_free(addr1);
+
+ /* Test reclaiming */
+ if (addr1 != selftest_alloc(0))
+ goto end;
+ if (addr2 != selftest_alloc(PAGE_SIZE * 2))
+ goto end;
+ addr3 = selftest_alloc(0);
+ addr4 = selftest_alloc(PAGE_SIZE);
+
+ /* Test reclaiming the last chunk of the list */
+ selftest_free(addr4);
+ hyp_allocator_reclaim(allocator, &host_mc, SELFTEST_MAX_PAGES);
+ if (host_mc.nr_pages != SELFTEST_MAX_PAGES - 3)
+ goto end;
+
+ /* Test punching a hole in the middle of a free chunk ... */
+ selftest_free(addr2);
+ hyp_allocator_reclaim(allocator, &host_mc, SELFTEST_MAX_PAGES);
+ if (host_mc.nr_pages != SELFTEST_MAX_PAGES - 2)
+ goto end;
+
+ if (selftest_alloc(PAGE_SIZE))
+ goto end;
+ if (selftest_errno() != -ENOMEM)
+ goto end;
+
+ /* ... and to refill this hole */
+ ret = hyp_allocator_topup(allocator, &host_mc);
+ if (ret)
+ goto end;
+ /* Chunk at addr2 was made smaller by the reclaim */
+ if (addr2 != selftest_alloc(PAGE_SIZE))
+ goto end;
+
+ /* Test reclaiming the entire allocator from the host */
+ selftest_free(addr3);
+ selftest_free(addr2);
+ selftest_free(addr1);
+ if (addr1 != selftest_alloc(SELFTEST_MAX_PAGES * PAGE_SIZE - chunk_hdr_size()))
+ goto end;
+ selftest_free(addr1);
+
+ ret = 0;
+
+end:
+ hyp_spin_unlock(&selftest_lock);
+ return ret;
+}
+#else
+static int selftest_init(void) { return 0; }
+#endif
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 20be0343abd4..4e7db8b48614 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -614,6 +614,27 @@ static void handle___pkvm_finalize_teardown_vm(struct kvm_cpu_context *host_ctxt
cpu_reg(host_ctxt, 1) = __pkvm_finalize_teardown_vm(handle);
}
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static void handle___pkvm_hyp_alloc_selftest(struct kvm_cpu_context *host_ctxt)
+{
+ int ret = hyp_allocator_selftest();
+ struct pkvm_hyp_req req = { .type = PKVM_HYP_NO_REQ };
+
+ if (ret == -ENOMEM) {
+ req.type = PKVM_HYP_REQ_HYP_ALLOC_SELFTEST;
+ req.mem.nr_pages = hyp_alloc_selftest_topup_needed();
+ }
+
+ cpu_reg(host_ctxt, 1) = ret;
+ pkvm_hyp_req_to_smccc(host_ctxt, &req);
+}
+#else
+static void handle___pkvm_hyp_alloc_selftest(struct kvm_cpu_context *host_ctxt)
+{
+ cpu_reg(host_ctxt, 1) = -EPERM;
+}
+#endif
+
static void handle___pkvm_hyp_topup(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(enum pkvm_topup_id, id, host_ctxt, 1);
@@ -629,6 +650,11 @@ static void handle___pkvm_hyp_topup(struct kvm_cpu_context *host_ctxt)
case PKVM_TOPUP_HYP_ALLOC:
ret = hyp_alloc_topup(&host_mc);
break;
+#ifdef CONFIG_NVHE_EL2_DEBUG
+ case PKVM_TOPUP_HYP_ALLOC_SELFTEST:
+ ret = hyp_alloc_selftest_topup(&host_mc);
+ break;
+#endif
default:
ret = -EINVAL;
}
@@ -649,6 +675,11 @@ static void handle___pkvm_hyp_reclaim(struct kvm_cpu_context *host_ctxt)
case PKVM_TOPUP_HYP_ALLOC:
hyp_alloc_reclaim(&host_mc, target);
break;
+#ifdef CONFIG_NVHE_EL2_DEBUG
+ case PKVM_TOPUP_HYP_ALLOC_SELFTEST:
+ hyp_alloc_selftest_reclaim(&host_mc, target);
+ break;
+#endif
default:
ret = -EINVAL;
}
@@ -807,6 +838,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_hyp_topup),
HANDLE_FUNC(__pkvm_hyp_reclaim),
HANDLE_FUNC(__pkvm_hyp_reclaimable),
+ HANDLE_FUNC(__pkvm_hyp_alloc_selftest),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index f29134a1cc73..15281ae1be39 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -143,6 +143,9 @@ static int pkvm_handle_hyp_req(struct pkvm_hyp_req *req)
case PKVM_HYP_REQ_HYP_ALLOC:
ret = pkvm_hyp_topup(PKVM_TOPUP_HYP_ALLOC, req->mem.nr_pages);
break;
+ case PKVM_HYP_REQ_HYP_ALLOC_SELFTEST:
+ ret = pkvm_hyp_topup(PKVM_TOPUP_HYP_ALLOC_SELFTEST, req->mem.nr_pages);
+ break;
}
trace_kvm_handle_pkvm_hyp_req(req, ret);
@@ -348,6 +351,19 @@ static int __init pkvm_drop_host_privileges(void)
return ret;
}
+static void __init pkvm_selftests(void)
+{
+#ifdef CONFIG_NVHE_EL2_DEBUG
+ int ret = pkvm_call_hyp_req(__pkvm_hyp_alloc_selftest);
+
+ if (ret)
+ kvm_err("pKVM hyp allocator selftest failed (%d)\n", ret);
+ else
+ WARN_ON(pkvm_hyp_reclaim(PKVM_TOPUP_HYP_ALLOC_SELFTEST, ULONG_MAX) !=
+ 6 /* SELFTEST_MAX_PAGES */);
+#endif
+}
+
static int __init finalize_pkvm(void)
{
int ret;
@@ -368,6 +384,9 @@ static int __init finalize_pkvm(void)
if (ret)
pr_err("Failed to finalize Hyp protection: %d\n", ret);
+ if (!ret)
+ pkvm_selftests();
+
return ret;
}
device_initcall_sync(finalize_pkvm);
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 11/17] KVM: arm64: Add a shrinker for pKVM
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (9 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 10/17] KVM: arm64: Add selftests for the pKVM heap allocator Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 12/17] KVM: arm64: Filter out non-kernel addresses in kern_hyp_va Vincent Donnefort
` (5 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Integrate the pKVM memory reclaim interface with the host's memory
management subsystem.
This allows the host to automatically recover unused memory fom the
hypervisor's heap allocator when the host is under memory pressure.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 8bb2c7422cc8..34e6fab29210 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2677,6 +2677,18 @@ static void pkvm_hyp_init_ptrauth(void)
}
}
+static unsigned long
+pkvm_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return pkvm_hyp_reclaimable(PKVM_TOPUP_HYP_ALLOC) ?: SHRINK_EMPTY;
+}
+
+static unsigned long
+pkvm_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return pkvm_hyp_reclaim(PKVM_TOPUP_HYP_ALLOC, sc->nr_to_scan);
+}
+
/* Inits Hyp-mode on all online CPUs */
static int __init init_hyp_mode(void)
{
@@ -2823,6 +2835,8 @@ static int __init init_hyp_mode(void)
kvm_hyp_init_symbols();
if (is_protected_kvm_enabled()) {
+ struct shrinker *shrinker;
+
if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) &&
cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH))
pkvm_hyp_init_ptrauth();
@@ -2843,6 +2857,16 @@ static int __init init_hyp_mode(void)
kvm_err("Failed to init hyp memory protection\n");
goto out_err;
}
+
+ shrinker = shrinker_alloc(0, "pkvm");
+ if (shrinker) {
+ shrinker->count_objects = pkvm_shrinker_count;
+ shrinker->scan_objects = pkvm_shrinker_scan;
+ shrinker_register(shrinker);
+ } else {
+ kvm_err("Failed to register shrinker for pKVM\n");
+ }
+
}
return 0;
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 12/17] KVM: arm64: Filter out non-kernel addresses in kern_hyp_va
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (10 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 11/17] KVM: arm64: Add a shrinker for pKVM Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 13/17] KVM: arm64: Move hyp_vm refcount into the structure Vincent Donnefort
` (4 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
kern_hyp_va() is idempotent for the hypervisor linear space. This is
handy for nVHE hypervisor callers handling kvm_vcpu or kvm_arch
pointers. Those pointers can originate from the hypervisor space (when
protected mode is enabled, we don't trust the kernel and the hypervisor
uses its own copy) or from the kernel space (we do trust the kernel in
"non-protected" nVHE).
This idempotence does not hold for addresses within the hypervisor
private range, like the ones you get from the pKVM heap allocator
(hyp_alloc()). To resolve this, filter out non-kernel addresses based on
PAGE_OFFSET.
Leave the assembly version untouched as it has no current users.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 01e9c72d6aa7..8d608292d48c 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -126,6 +126,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
* replace the instructions with `nop`s.
*/
#ifndef __KVM_VHE_HYPERVISOR__
+ if (!is_ttbr1_addr(v))
+ return v;
+
asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" /* mask with va_mask */
"ror %0, %0, #1\n" /* rotate to the first tag bit */
"add %0, %0, #0\n" /* insert the low 12 bits of the tag */
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 13/17] KVM: arm64: Move hyp_vm refcount into the structure
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (11 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 12/17] KVM: arm64: Filter out non-kernel addresses in kern_hyp_va Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 14/17] KVM: arm64: Use noclear for PGD in __pkvm_init_vm error path Vincent Donnefort
` (3 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
In preparation for allocating hyp_vm using the pKVM heap allocator
(hyp_alloc()), move its reference count out of the page metadata
(vmemmap) and place it into the structure itself. This transition is
necessary because hyp_alloc() allows multiple small objects to share the
same physical page.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index c904647d2f76..624367d0ef5b 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -41,6 +41,7 @@ struct pkvm_hyp_vm {
struct kvm_pgtable pgt;
struct kvm_pgtable_mm_ops mm_ops;
struct hyp_pool pool;
+ unsigned short refcount;
hyp_spinlock_t lock;
/* Array of the hyp vCPU structures for this VM. */
@@ -65,6 +66,18 @@ static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm)
return kvm_vm_is_protected(&hyp_vm->kvm);
}
+static inline void pkvm_hyp_vm_ref_inc(struct pkvm_hyp_vm *hyp_vm)
+{
+ BUG_ON(hyp_vm->refcount == USHRT_MAX);
+ hyp_vm->refcount++;
+}
+
+static inline void pkvm_hyp_vm_ref_dec(struct pkvm_hyp_vm *hyp_vm)
+{
+ BUG_ON(!hyp_vm->refcount);
+ hyp_vm->refcount--;
+}
+
void pkvm_hyp_vm_table_init(void *tbl);
int __pkvm_reserve_vm(void);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index e7496eb85628..ebdbe9c92689 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -278,7 +278,7 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
}
hyp_vcpu->loaded_hyp_vcpu = this_cpu_ptr(&loaded_hyp_vcpu);
- hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
+ pkvm_hyp_vm_ref_inc(hyp_vm);
unlock:
hyp_spin_unlock(&vm_table_lock);
@@ -294,7 +294,7 @@ void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
hyp_spin_lock(&vm_table_lock);
hyp_vcpu->loaded_hyp_vcpu = NULL;
__this_cpu_write(loaded_hyp_vcpu, NULL);
- hyp_page_ref_dec(hyp_virt_to_page(hyp_vm));
+ pkvm_hyp_vm_ref_dec(hyp_vm);
hyp_spin_unlock(&vm_table_lock);
}
@@ -311,7 +311,7 @@ struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle)
hyp_spin_lock(&vm_table_lock);
hyp_vm = get_vm_by_handle(handle);
if (hyp_vm)
- hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
+ pkvm_hyp_vm_ref_inc(hyp_vm);
hyp_spin_unlock(&vm_table_lock);
return hyp_vm;
@@ -320,7 +320,7 @@ struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle)
void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm)
{
hyp_spin_lock(&vm_table_lock);
- hyp_page_ref_dec(hyp_virt_to_page(hyp_vm));
+ pkvm_hyp_vm_ref_dec(hyp_vm);
hyp_spin_unlock(&vm_table_lock);
}
@@ -950,7 +950,7 @@ static struct pkvm_hyp_vm *get_pkvm_unref_hyp_vm_locked(pkvm_handle_t handle)
hyp_assert_lock_held(&vm_table_lock);
hyp_vm = get_vm_by_handle(handle);
- if (!hyp_vm || hyp_page_count(hyp_vm))
+ if (!hyp_vm || hyp_vm->refcount)
return NULL;
return hyp_vm;
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 14/17] KVM: arm64: Use noclear for PGD in __pkvm_init_vm error path
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (12 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 13/17] KVM: arm64: Move hyp_vm refcount into the structure Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 15/17] KVM: arm64: Alloc pkvm_hyp_vm using pKVM heap allocator Vincent Donnefort
` (2 subsequent siblings)
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
In the error path of __pkvm_init_vm(), use unmap_donated_memory_noclear()
instead of the clearing variant to release the donated stage-2 PGD back
to the host.
This intends to eliminate the clearing variant of
unmap_donated_memory(), as zeroing the PGD memory before returning it to
the host is unnecessary in this failure path.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index ebdbe9c92689..3e7f7606a3da 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -845,7 +845,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
err_remove_mappings:
unmap_donated_memory(hyp_vm, vm_size);
- unmap_donated_memory(pgd, pgd_size);
+ unmap_donated_memory_noclear(pgd, pgd_size);
err_unpin_kvm:
hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
return ret;
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 15/17] KVM: arm64: Alloc pkvm_hyp_vm using pKVM heap allocator
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (13 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 14/17] KVM: arm64: Use noclear for PGD in __pkvm_init_vm error path Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 16/17] KVM: arm64: Alloc pkvm_hyp_vcpu " Vincent Donnefort
2026-05-20 15:26 ` [PATCH 17/17] KVM: arm64: Alloc simple_buffer_page using pKVM hyp allocator Vincent Donnefort
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Transition the allocation of the hypervisor VM state structure
(pkvm_hyp_vm) from the host to the hypervisor using
the new pKVM heap allocator (hyp_alloc()).
Previously, the host was responsible for calculating the size of,
allocating, and donating memory for pkvm_hyp_vm during VM creation. With
the heap allocator in place, the hypervisor now allocates this structure
dynamically at EL2.
Use the pkvm_call_hyp_req() wrapper in the host to invoke
__pkvm_init_vm, which automatically handles any top-up requests if the
hypervisor runs out of heap memory during allocation.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c
index b257a3b4bfc5..501ab35a3840 100644
--- a/arch/arm64/kvm/hyp/hyp-constants.c
+++ b/arch/arm64/kvm/hyp/hyp-constants.c
@@ -7,7 +7,6 @@
int main(void)
{
DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
- DEFINE(PKVM_HYP_VM_SIZE, sizeof(struct pkvm_hyp_vm));
DEFINE(PKVM_HYP_VCPU_SIZE, sizeof(struct pkvm_hyp_vcpu));
return 0;
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 624367d0ef5b..8e930c8729af 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -82,8 +82,7 @@ void pkvm_hyp_vm_table_init(void *tbl);
int __pkvm_reserve_vm(void);
void __pkvm_unreserve_vm(pkvm_handle_t handle);
-int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
- unsigned long pgd_hva);
+int __pkvm_init_vm(struct kvm *host_kvm, void *pgd);
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
unsigned long vcpu_hva);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 4e7db8b48614..ebd6b5c09928 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -556,14 +556,30 @@ static void handle___pkvm_unreserve_vm(struct kvm_cpu_context *host_ctxt)
__pkvm_unreserve_vm(handle);
}
+static void errno_to_smccc(int ret, struct kvm_cpu_context *host_ctxt)
+{
+ struct pkvm_hyp_req req = { .type = PKVM_HYP_NO_REQ };
+
+ switch (ret) {
+ case -ENOMEM:
+ req.type = PKVM_HYP_REQ_HYP_ALLOC;
+ req.mem.nr_pages = hyp_alloc_topup_needed();
+ break;
+ }
+
+ cpu_reg(host_ctxt, 1) = ret;
+ pkvm_hyp_req_to_smccc(host_ctxt, &req);
+}
+
static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
- DECLARE_REG(unsigned long, vm_hva, host_ctxt, 2);
- DECLARE_REG(unsigned long, pgd_hva, host_ctxt, 3);
+ DECLARE_REG(unsigned long, pgd_hva, host_ctxt, 2);
+ void *pgd;
host_kvm = kern_hyp_va(host_kvm);
- cpu_reg(host_ctxt, 1) = __pkvm_init_vm(host_kvm, vm_hva, pgd_hva);
+ pgd = (void *)kern_hyp_va(pgd_hva);
+ errno_to_smccc(__pkvm_init_vm(host_kvm, pgd), host_ctxt);
}
static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 3e7f7606a3da..7405626e103a 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -11,6 +11,7 @@
#include <asm/kvm_emulate.h>
+#include <nvhe/alloc.h>
#include <nvhe/mem_protect.h>
#include <nvhe/memory.h>
#include <nvhe/pkvm.h>
@@ -783,24 +784,22 @@ void teardown_selftest_vm(void)
* Unmap the donated memory from the host at stage 2.
*
* host_kvm: A pointer to the host's struct kvm.
- * vm_hva: The host va of the area being donated for the VM state.
- * Must be page aligned.
- * pgd_hva: The host va of the area being donated for the stage-2 PGD for
- * the VM. Must be page aligned. Its size is implied by the VM's
- * VTCR.
+ * pgd: The va of the area being donated for the stage-2 PGD for the VM. Must
+ * be page aligned. Its size is implied by the VM's VTCR.
*
* Return 0 success, negative error code on failure.
*/
-int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
- unsigned long pgd_hva)
+int __pkvm_init_vm(struct kvm *host_kvm, void *pgd)
{
struct pkvm_hyp_vm *hyp_vm = NULL;
size_t vm_size, pgd_size;
unsigned int nr_vcpus;
pkvm_handle_t handle;
- void *pgd = NULL;
int ret;
+ if (!PAGE_ALIGNED(pgd))
+ return -EINVAL;
+
ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1);
if (ret)
return ret;
@@ -820,15 +819,15 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr);
- ret = -ENOMEM;
-
- hyp_vm = map_donated_memory(vm_hva, vm_size);
- if (!hyp_vm)
- goto err_remove_mappings;
+ hyp_vm = hyp_alloc(vm_size);
+ if (!hyp_vm) {
+ ret = hyp_alloc_errno();
+ goto err_unpin_kvm;
+ }
- pgd = map_donated_memory_noclear(pgd_hva, pgd_size);
- if (!pgd)
- goto err_remove_mappings;
+ ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(pgd), PAGE_ALIGN(pgd_size) >> PAGE_SHIFT);
+ if (ret)
+ goto err_free_hyp_vm;
init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus, handle);
@@ -844,8 +843,9 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
return 0;
err_remove_mappings:
- unmap_donated_memory(hyp_vm, vm_size);
unmap_donated_memory_noclear(pgd, pgd_size);
+err_free_hyp_vm:
+ hyp_free(hyp_vm);
err_unpin_kvm:
hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
return ret;
@@ -981,7 +981,6 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
struct pkvm_hyp_vm *hyp_vm;
struct kvm *host_kvm;
unsigned int idx;
- size_t vm_size;
int err;
hyp_spin_lock(&vm_table_lock);
@@ -1024,8 +1023,7 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu));
}
- vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus);
- teardown_donated_memory(mc, hyp_vm, vm_size);
+ hyp_free(hyp_vm);
hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
return 0;
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 15281ae1be39..8fc2e954d382 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -216,8 +216,8 @@ static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
*/
static int __pkvm_create_hyp_vm(struct kvm *kvm)
{
- size_t pgd_sz, hyp_vm_sz;
- void *pgd, *hyp_vm;
+ size_t pgd_sz;
+ void *pgd;
int ret;
if (kvm->created_vcpus < 1)
@@ -234,28 +234,15 @@ static int __pkvm_create_hyp_vm(struct kvm *kvm)
if (!pgd)
return -ENOMEM;
- /* Allocate memory to donate to hyp for vm and vcpu pointers. */
- hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
- size_mul(sizeof(void *),
- kvm->created_vcpus)));
- hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
- if (!hyp_vm) {
- ret = -ENOMEM;
- goto free_pgd;
- }
-
- /* Donate the VM memory to hyp and let hyp initialize it. */
- ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd);
+ ret = pkvm_call_hyp_req(__pkvm_init_vm, kvm, pgd);
if (ret)
- goto free_vm;
+ goto free_pgd;
kvm->arch.pkvm.is_created = true;
init_hyp_stage2_memcache(&kvm->arch.pkvm.stage2_teardown_mc);
kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
return 0;
-free_vm:
- free_pages_exact(hyp_vm, hyp_vm_sz);
free_pgd:
free_pages_exact(pgd, pgd_sz);
return ret;
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 16/17] KVM: arm64: Alloc pkvm_hyp_vcpu using pKVM heap allocator
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (14 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 15/17] KVM: arm64: Alloc pkvm_hyp_vm using pKVM heap allocator Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
2026-05-20 15:26 ` [PATCH 17/17] KVM: arm64: Alloc simple_buffer_page using pKVM hyp allocator Vincent Donnefort
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
Transition the allocation of the hypervisor vCPU state structure
(pkvm_hyp_vcpu) from the host to the hypervisor using the new pKVM heap
allocator (hyp_alloc()).
Previously, the host was responsible for calculating the size of,
allocating, and donating memory for pkvm_hyp_vcpu during VM creation.
With the heap allocator in place, the hypervisor now allocates this
structure dynamically at EL2.
Use the pkvm_call_hyp_req() wrapper in the host to invoke
__pkvm_create_hyp_vcpu, which automatically handles any top-up requests
if the hypervisor runs out of heap memory during allocation.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 15c5378b70a0..d7286f2944f3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -268,7 +268,6 @@ typedef u16 pkvm_handle_t;
struct kvm_protected_vm {
pkvm_handle_t handle;
- struct kvm_hyp_memcache teardown_mc;
struct kvm_hyp_memcache stage2_teardown_mc;
bool is_protected;
bool is_created;
diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c
index 501ab35a3840..b2caae21f271 100644
--- a/arch/arm64/kvm/hyp/hyp-constants.c
+++ b/arch/arm64/kvm/hyp/hyp-constants.c
@@ -7,6 +7,5 @@
int main(void)
{
DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
- DEFINE(PKVM_HYP_VCPU_SIZE, sizeof(struct pkvm_hyp_vcpu));
return 0;
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 8e930c8729af..cfb6e409bf49 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -83,8 +83,7 @@ void pkvm_hyp_vm_table_init(void *tbl);
int __pkvm_reserve_vm(void);
void __pkvm_unreserve_vm(pkvm_handle_t handle);
int __pkvm_init_vm(struct kvm *host_kvm, void *pgd);
-int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
- unsigned long vcpu_hva);
+int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu);
int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn);
int __pkvm_start_teardown_vm(pkvm_handle_t handle);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index ebd6b5c09928..8d7e44e657eb 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -586,10 +586,9 @@ static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2);
- DECLARE_REG(unsigned long, vcpu_hva, host_ctxt, 3);
host_vcpu = kern_hyp_va(host_vcpu);
- cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
+ errno_to_smccc(__pkvm_init_vcpu(handle, host_vcpu), host_ctxt);
}
static void handle___pkvm_vcpu_in_poison_fault(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 7405626e103a..5932e8afce3e 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -645,30 +645,6 @@ static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus)
size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus));
}
-static void *map_donated_memory_noclear(unsigned long host_va, size_t size)
-{
- void *va = (void *)kern_hyp_va(host_va);
-
- if (!PAGE_ALIGNED(va))
- return NULL;
-
- if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va),
- PAGE_ALIGN(size) >> PAGE_SHIFT))
- return NULL;
-
- return va;
-}
-
-static void *map_donated_memory(unsigned long host_va, size_t size)
-{
- void *va = map_donated_memory_noclear(host_va, size);
-
- if (va)
- memset(va, 0, size);
-
- return va;
-}
-
static void __unmap_donated_memory(void *va, size_t size)
{
kvm_flush_dcache_to_poc(va, size);
@@ -676,15 +652,6 @@ static void __unmap_donated_memory(void *va, size_t size)
PAGE_ALIGN(size) >> PAGE_SHIFT));
}
-static void unmap_donated_memory(void *va, size_t size)
-{
- if (!va)
- return;
-
- memset(va, 0, size);
- __unmap_donated_memory(va, size);
-}
-
static void unmap_donated_memory_noclear(void *va, size_t size)
{
if (!va)
@@ -880,16 +847,15 @@ static int register_hyp_vcpu(struct pkvm_hyp_vm *hyp_vm,
return 0;
}
-int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
- unsigned long vcpu_hva)
+int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu)
{
struct pkvm_hyp_vcpu *hyp_vcpu;
struct pkvm_hyp_vm *hyp_vm;
int ret;
- hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu));
+ hyp_vcpu = hyp_alloc(sizeof(*hyp_vcpu));
if (!hyp_vcpu)
- return -ENOMEM;
+ return hyp_alloc_errno();
hyp_spin_lock(&vm_table_lock);
@@ -910,22 +876,10 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
}
unlock:
hyp_spin_unlock(&vm_table_lock);
-
if (ret)
- unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
- return ret;
-}
-
-static void
-teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
-{
- size = PAGE_ALIGN(size);
- memset(addr, 0, size);
-
- for (void *start = addr; start < addr + size; start += PAGE_SIZE)
- push_hyp_memcache(mc, start, hyp_virt_to_phys);
+ hyp_free(hyp_vcpu);
- unmap_donated_memory_noclear(addr, size);
+ return ret;
}
int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn)
@@ -977,7 +931,7 @@ int __pkvm_start_teardown_vm(pkvm_handle_t handle)
int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
{
- struct kvm_hyp_memcache *mc, *stage2_mc;
+ struct kvm_hyp_memcache *stage2_mc;
struct pkvm_hyp_vm *hyp_vm;
struct kvm *host_kvm;
unsigned int idx;
@@ -998,7 +952,6 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
hyp_spin_unlock(&vm_table_lock);
/* Reclaim guest pages (including page-table pages) */
- mc = &host_kvm->arch.pkvm.teardown_mc;
stage2_mc = &host_kvm->arch.pkvm.stage2_teardown_mc;
reclaim_pgtable_pages(hyp_vm, stage2_mc);
unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->kvm.created_vcpus);
@@ -1020,7 +973,7 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
unmap_donated_memory_noclear(addr, PAGE_SIZE);
}
- teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu));
+ hyp_free(hyp_vcpu);
}
hyp_free(hyp_vm);
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 8fc2e954d382..5e389099d1b6 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -178,28 +178,19 @@ static void __pkvm_destroy_hyp_vm(struct kvm *kvm)
kvm->arch.pkvm.handle = 0;
kvm->arch.pkvm.is_created = false;
- free_hyp_memcache(&kvm->arch.pkvm.teardown_mc);
free_hyp_memcache(&kvm->arch.pkvm.stage2_teardown_mc);
}
static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
{
- size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle;
- void *hyp_vcpu;
int ret;
init_hyp_stage2_memcache(&vcpu->arch.pkvm_memcache);
- hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
- if (!hyp_vcpu)
- return -ENOMEM;
-
- ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu);
+ ret = pkvm_call_hyp_req(__pkvm_init_vcpu, handle, vcpu);
if (!ret)
vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
- else
- free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
return ret;
}
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 17/17] KVM: arm64: Alloc simple_buffer_page using pKVM hyp allocator
2026-05-20 15:26 [PATCH 00/17] KVM: arm64: Introduce pKVM hypervisor heap allocator Vincent Donnefort
` (15 preceding siblings ...)
2026-05-20 15:26 ` [PATCH 16/17] KVM: arm64: Alloc pkvm_hyp_vcpu " Vincent Donnefort
@ 2026-05-20 15:26 ` Vincent Donnefort
16 siblings, 0 replies; 18+ messages in thread
From: Vincent Donnefort @ 2026-05-20 15:26 UTC (permalink / raw)
To: maz, oliver.upton, joey.gouly, suzuki.poulose, yuzenghui,
catalin.marinas, will
Cc: linux-arm-kernel, kvmarm, kernel-team, qperret, tabba,
Vincent Donnefort
In protected mode, transition the allocation of the simple_ring_buffer
structures from the host to the hypervisor using the new pKVM heap
allocator.
Previously, the host allocated and donated a contiguous backing memory
for these structures. In pKVM the hypervisor can now allocate them
dynamically.
Use the pkvm_call_hyp_req() wrapper in the host to invoke
__tracing_load, which automatically handles any top-up requests if the
hypervisor runs out of heap memory during allocation.
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 8d7e44e657eb..376bf0fd2a2d 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -725,7 +725,7 @@ static void handle___tracing_load(struct kvm_cpu_context *host_ctxt)
DECLARE_REG(unsigned long, desc_hva, host_ctxt, 1);
DECLARE_REG(size_t, desc_size, host_ctxt, 2);
- cpu_reg(host_ctxt, 1) = __tracing_load(desc_hva, desc_size);
+ errno_to_smccc(__tracing_load(desc_hva, desc_size), host_ctxt);
}
static void handle___tracing_unload(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/trace.c b/arch/arm64/kvm/hyp/nvhe/trace.c
index a6ca27b18e15..680fe1cdf4a2 100644
--- a/arch/arm64/kvm/hyp/nvhe/trace.c
+++ b/arch/arm64/kvm/hyp/nvhe/trace.c
@@ -4,6 +4,7 @@
* Author: Vincent Donnefort <vdonnefort@google.com>
*/
+#include <nvhe/alloc.h>
#include <nvhe/clock.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
@@ -62,18 +63,34 @@ static void __release_host_mem(void *start, u64 size)
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(start), size >> PAGE_SHIFT));
}
-static int hyp_trace_buffer_load_bpage_backing(struct hyp_trace_buffer *trace_buffer,
- struct hyp_trace_desc *desc)
+static int hyp_trace_buffer_alloc_bpages(struct hyp_trace_buffer *trace_buffer,
+ struct hyp_trace_desc *desc)
{
- void *start = (void *)kern_hyp_va(desc->bpages_backing_start);
- size_t size = desc->bpages_backing_size;
+ void *start;
+ size_t size;
int ret;
- ret = __admit_host_mem(start, size);
- if (ret)
- return ret;
+ if (is_protected_kvm_enabled()) {
+ struct ring_buffer_desc *rb_desc;
+ int cpu;
+
+ size = 0;
+ for_each_ring_buffer_desc(rb_desc, cpu, &desc->trace_buffer_desc)
+ size += rb_desc->nr_page_va * sizeof(struct simple_buffer_page);
+
+ start = hyp_alloc(size);
+ if (!start)
+ return hyp_alloc_errno();
+ } else {
+ start = (void *)kern_hyp_va(desc->bpages_backing_start);
+ size = desc->bpages_backing_size;
- memset(start, 0, size);
+ ret = __admit_host_mem(start, size);
+ if (ret)
+ return ret;
+
+ memset(start, 0, size);
+ }
trace_buffer->bpages_backing_start = start;
trace_buffer->bpages_backing_size = size;
@@ -81,7 +98,7 @@ static int hyp_trace_buffer_load_bpage_backing(struct hyp_trace_buffer *trace_bu
return 0;
}
-static void hyp_trace_buffer_unload_bpage_backing(struct hyp_trace_buffer *trace_buffer)
+static void hyp_trace_buffer_free_bpages(struct hyp_trace_buffer *trace_buffer)
{
void *start = trace_buffer->bpages_backing_start;
size_t size = trace_buffer->bpages_backing_size;
@@ -89,9 +106,12 @@ static void hyp_trace_buffer_unload_bpage_backing(struct hyp_trace_buffer *trace
if (!size)
return;
- memset(start, 0, size);
-
- __release_host_mem(start, size);
+ if (is_protected_kvm_enabled()) {
+ hyp_free(start);
+ } else {
+ memset(start, 0, size);
+ __release_host_mem(start, size);
+ }
trace_buffer->bpages_backing_start = 0;
trace_buffer->bpages_backing_size = 0;
@@ -128,7 +148,7 @@ static void hyp_trace_buffer_unload(struct hyp_trace_buffer *trace_buffer)
simple_ring_buffer_unload_mm(per_cpu_ptr(trace_buffer->simple_rbs, cpu),
__unpin_shared_page);
- hyp_trace_buffer_unload_bpage_backing(trace_buffer);
+ hyp_trace_buffer_free_bpages(trace_buffer);
}
static int hyp_trace_buffer_load(struct hyp_trace_buffer *trace_buffer,
@@ -143,7 +163,7 @@ static int hyp_trace_buffer_load(struct hyp_trace_buffer *trace_buffer,
if (hyp_trace_buffer_loaded(trace_buffer))
return -EINVAL;
- ret = hyp_trace_buffer_load_bpage_backing(trace_buffer, desc);
+ ret = hyp_trace_buffer_alloc_bpages(trace_buffer, desc);
if (ret)
return ret;
@@ -164,19 +184,20 @@ static int hyp_trace_buffer_load(struct hyp_trace_buffer *trace_buffer,
return ret;
}
-static bool hyp_trace_desc_validate(struct hyp_trace_desc *desc, size_t desc_size)
+static bool hyp_trace_desc_is_valid(struct hyp_trace_desc *desc, size_t desc_size)
{
struct ring_buffer_desc *rb_desc;
unsigned int cpu;
- size_t nr_bpages;
void *desc_end;
+ if (!is_protected_kvm_enabled())
+ return true;
+
/*
- * Both desc_size and bpages_backing_size are untrusted host-provided
- * values. We rely on __pkvm_host_donate_hyp() to enforce their validity.
+ * desc_size is an untrusted host-provided value. We rely on
+ * __pkvm_host_donate_hyp() to enforce its validity.
*/
desc_end = (void *)desc + desc_size;
- nr_bpages = desc->bpages_backing_size / sizeof(struct simple_buffer_page);
for_each_ring_buffer_desc(rb_desc, cpu, &desc->trace_buffer_desc) {
/* Can we read nr_page_va? */
@@ -187,17 +208,11 @@ static bool hyp_trace_desc_validate(struct hyp_trace_desc *desc, size_t desc_siz
if ((void *)rb_desc + struct_size(rb_desc, page_va, rb_desc->nr_page_va) > desc_end)
return false;
- /* Overflow bpages backing memory? */
- if (nr_bpages < rb_desc->nr_page_va)
- return false;
-
if (cpu >= hyp_nr_cpus)
return false;
if (cpu != rb_desc->cpu)
return false;
-
- nr_bpages -= rb_desc->nr_page_va;
}
return true;
@@ -212,8 +227,10 @@ int __tracing_load(unsigned long desc_hva, size_t desc_size)
if (ret)
return ret;
- if (!hyp_trace_desc_validate(desc, desc_size))
+ if (!hyp_trace_desc_is_valid(desc, desc_size)) {
+ ret = -EINVAL;
goto err_release_desc;
+ }
hyp_spin_lock(&trace_buffer.lock);
diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c
index 8b7f2bf2fba8..afc8c3ea68f5 100644
--- a/arch/arm64/kvm/hyp_trace.c
+++ b/arch/arm64/kvm/hyp_trace.c
@@ -13,6 +13,7 @@
#include <asm/kvm_host.h>
#include <asm/kvm_hyptrace.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_pkvm.h>
#include "hyp_trace.h"
@@ -157,10 +158,18 @@ static void __unshare_page(unsigned long va)
static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_buffer, size_t size)
{
- int nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1;
size_t backing_size;
+ int nr_bpages;
void *start;
+ /* pKVM uses hyp_alloc() to allocate struct simple_buffer_page */
+ if (is_protected_kvm_enabled()) {
+ trace_buffer->desc->bpages_backing_start = 0;
+ trace_buffer->desc->bpages_backing_size = 0;
+ return 0;
+ }
+
+ nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1;
backing_size = PAGE_ALIGN(sizeof(struct simple_buffer_page) * nr_bpages *
num_possible_cpus());
@@ -176,6 +185,9 @@ static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_
static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer)
{
+ if (!trace_buffer->desc->bpages_backing_start)
+ return;
+
free_pages_exact((void *)trace_buffer->desc->bpages_backing_start,
trace_buffer->desc->bpages_backing_size);
}
@@ -262,7 +274,7 @@ static struct trace_buffer_desc *hyp_trace_load(unsigned long size, void *priv)
if (ret)
goto err_free_buffer;
- ret = kvm_call_hyp_nvhe(__tracing_load, (unsigned long)desc, desc_size);
+ ret = pkvm_call_hyp_req(__tracing_load, (unsigned long)desc, desc_size);
if (ret)
goto err_unload_pages;
--
2.54.0.631.ge1b05301d1-goog
^ permalink raw reply related [flat|nested] 18+ messages in thread