From: Will Deacon <will@kernel.org>
To: kvmarm@lists.linux.dev
Cc: linux-arm-kernel@lists.infradead.org,
Will Deacon <will@kernel.org>, Marc Zyngier <maz@kernel.org>,
Oliver Upton <oupton@kernel.org>, Joey Gouly <joey.gouly@arm.com>,
Suzuki K Poulose <suzuki.poulose@arm.com>,
Zenghui Yu <yuzenghui@huawei.com>,
Catalin Marinas <catalin.marinas@arm.com>,
Quentin Perret <qperret@google.com>,
Fuad Tabba <tabba@google.com>,
Vincent Donnefort <vdonnefort@google.com>,
Mostafa Saleh <smostafa@google.com>,
Alexandru Elisei <alexandru.elisei@arm.com>
Subject: [PATCH v4 15/38] KVM: arm64: Introduce __pkvm_reclaim_dying_guest_page()
Date: Fri, 27 Mar 2026 14:00:14 +0000 [thread overview]
Message-ID: <20260327140039.21228-16-will@kernel.org> (raw)
In-Reply-To: <20260327140039.21228-1-will@kernel.org>
To enable reclaim of pages from a protected VM during teardown,
introduce a new hypercall to reclaim a single page from a protected
guest that is in the dying state.
Since the EL2 code is non-preemptible, the new hypercall deliberately
acts on a single page at a time so as to allow EL1 to reschedule
frequently during the teardown operation.
Reviewed-by: Vincent Donnefort <vdonnefort@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Co-developed-by: Quentin Perret <qperret@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
arch/arm64/include/asm/kvm_asm.h | 1 +
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 +
arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 1 +
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 +++
arch/arm64/kvm/hyp/nvhe/mem_protect.c | 79 +++++++++++++++++++
arch/arm64/kvm/hyp/nvhe/pkvm.c | 14 ++++
6 files changed, 105 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index dfc6625c8269..b6df8f64d573 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -90,6 +90,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
+ __KVM_HOST_SMCCC_FUNC___pkvm_reclaim_dying_guest_page,
__KVM_HOST_SMCCC_FUNC___pkvm_start_teardown_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_finalize_teardown_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 7061b0be340a..29f81a1d9e1f 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -40,6 +40,7 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
+int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm);
int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
enum kvm_pgtable_prot prot);
int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 04c7ca703014..506831804f64 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -74,6 +74,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
unsigned long vcpu_hva);
+int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn);
int __pkvm_start_teardown_vm(pkvm_handle_t handle);
int __pkvm_finalize_teardown_vm(pkvm_handle_t handle);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 970656318cf2..7294c94f9296 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -573,6 +573,14 @@ static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
}
+static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(u64, gfn, host_ctxt, 2);
+
+ cpu_reg(host_ctxt, 1) = __pkvm_reclaim_dying_guest_page(handle, gfn);
+}
+
static void handle___pkvm_start_teardown_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
@@ -626,6 +634,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_unreserve_vm),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
+ HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
HANDLE_FUNC(__pkvm_start_teardown_vm),
HANDLE_FUNC(__pkvm_finalize_teardown_vm),
HANDLE_FUNC(__pkvm_vcpu_load),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 03e6fa124253..ca266a4d9d50 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -738,6 +738,32 @@ static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,
return check_page_state_range(&vm->pgt, addr, size, &d);
}
+static int get_valid_guest_pte(struct pkvm_hyp_vm *vm, u64 ipa, kvm_pte_t *ptep, u64 *physp)
+{
+ kvm_pte_t pte;
+ u64 phys;
+ s8 level;
+ int ret;
+
+ ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
+ if (ret)
+ return ret;
+ if (!kvm_pte_valid(pte))
+ return -ENOENT;
+ if (level != KVM_PGTABLE_LAST_LEVEL)
+ return -E2BIG;
+
+ phys = kvm_pte_to_phys(pte);
+ ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
+ if (WARN_ON(ret))
+ return ret;
+
+ *ptep = pte;
+ *physp = phys;
+
+ return 0;
+}
+
int __pkvm_host_share_hyp(u64 pfn)
{
u64 phys = hyp_pfn_to_phys(pfn);
@@ -971,6 +997,59 @@ static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *s
return 0;
}
+static void hyp_poison_page(phys_addr_t phys)
+{
+ void *addr = hyp_fixmap_map(phys);
+
+ memset(addr, 0, PAGE_SIZE);
+ /*
+ * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page()
+ * here as the latter may elide the CMO under the assumption that FWB
+ * will be enabled on CPUs that support it. This is incorrect for the
+ * host stage-2 and would otherwise lead to a malicious host potentially
+ * being able to read the contents of newly reclaimed guest pages.
+ */
+ kvm_flush_dcache_to_poc(addr, PAGE_SIZE);
+ hyp_fixmap_unmap();
+}
+
+int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
+{
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ kvm_pte_t pte;
+ u64 phys;
+ int ret;
+
+ host_lock_component();
+ guest_lock_component(vm);
+
+ ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
+ if (ret)
+ goto unlock;
+
+ switch (guest_get_page_state(pte, ipa)) {
+ case PKVM_PAGE_OWNED:
+ WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE));
+ hyp_poison_page(phys);
+ break;
+ case PKVM_PAGE_SHARED_OWNED:
+ WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
+ break;
+ default:
+ ret = -EPERM;
+ goto unlock;
+ }
+
+ WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE));
+ WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
+
+unlock:
+ guest_unlock_component(vm);
+ host_unlock_component();
+
+ return ret;
+}
+
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index c4e05ab8b605..a2d45f4b0cf6 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -862,6 +862,20 @@ teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
unmap_donated_memory_noclear(addr, size);
}
+int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn)
+{
+ struct pkvm_hyp_vm *hyp_vm;
+ int ret = -EINVAL;
+
+ hyp_spin_lock(&vm_table_lock);
+ hyp_vm = get_vm_by_handle(handle);
+ if (hyp_vm && hyp_vm->kvm.arch.pkvm.is_dying)
+ ret = __pkvm_host_reclaim_page_guest(gfn, hyp_vm);
+ hyp_spin_unlock(&vm_table_lock);
+
+ return ret;
+}
+
int __pkvm_start_teardown_vm(pkvm_handle_t handle)
{
struct pkvm_hyp_vm *hyp_vm;
--
2.53.0.1018.g2bb0e51243-goog
next prev parent reply other threads:[~2026-03-27 14:02 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-27 13:59 [PATCH v4 00/38] KVM: arm64: Add support for protected guest memory with pKVM Will Deacon
2026-03-27 14:00 ` [PATCH v4 01/38] KVM: arm64: Remove unused PKVM_ID_FFA definition Will Deacon
2026-03-27 14:00 ` [PATCH v4 02/38] KVM: arm64: Don't leak stage-2 page-table if VM fails to init under pKVM Will Deacon
2026-03-27 14:00 ` [PATCH v4 03/38] KVM: arm64: Move handle check into pkvm_pgtable_stage2_destroy_range() Will Deacon
2026-03-27 14:00 ` [PATCH v4 04/38] KVM: arm64: Rename __pkvm_pgtable_stage2_unmap() Will Deacon
2026-03-27 14:00 ` [PATCH v4 05/38] KVM: arm64: Don't advertise unsupported features for protected guests Will Deacon
2026-03-27 14:00 ` [PATCH v4 06/38] KVM: arm64: Expose self-hosted debug regs as RAZ/WI " Will Deacon
2026-03-27 14:00 ` [PATCH v4 07/38] KVM: arm64: Remove is_protected_kvm_enabled() checks from hypercalls Will Deacon
2026-03-27 14:00 ` [PATCH v4 08/38] KVM: arm64: Ignore MMU notifier callbacks for protected VMs Will Deacon
2026-03-27 14:00 ` [PATCH v4 09/38] KVM: arm64: Prevent unsupported memslot operations on " Will Deacon
2026-03-27 14:00 ` [PATCH v4 10/38] KVM: arm64: Ignore -EAGAIN when mapping in pages for the pKVM host Will Deacon
2026-03-27 14:00 ` [PATCH v4 11/38] KVM: arm64: Split teardown hypercall into two phases Will Deacon
2026-03-27 14:00 ` [PATCH v4 12/38] KVM: arm64: Introduce __pkvm_host_donate_guest() Will Deacon
2026-03-27 14:00 ` [PATCH v4 13/38] KVM: arm64: Hook up donation hypercall to pkvm_pgtable_stage2_map() Will Deacon
2026-03-27 14:00 ` [PATCH v4 14/38] KVM: arm64: Handle aborts from protected VMs Will Deacon
2026-03-27 14:00 ` Will Deacon [this message]
2026-03-27 14:00 ` [PATCH v4 16/38] KVM: arm64: Hook up reclaim hypercall to pkvm_pgtable_stage2_destroy() Will Deacon
2026-03-27 14:00 ` [PATCH v4 17/38] KVM: arm64: Factor out pKVM host exception injection logic Will Deacon
2026-03-27 14:00 ` [PATCH v4 18/38] KVM: arm64: Support translation faults in inject_host_exception() Will Deacon
2026-03-27 14:00 ` [PATCH v4 19/38] KVM: arm64: Inject SIGSEGV on illegal accesses Will Deacon
2026-03-27 14:00 ` [PATCH v4 20/38] KVM: arm64: Avoid pointless annotation when mapping host-owned pages Will Deacon
2026-03-27 14:00 ` [PATCH v4 21/38] KVM: arm64: Generalise kvm_pgtable_stage2_set_owner() Will Deacon
2026-03-27 14:00 ` [PATCH v4 22/38] KVM: arm64: Introduce host_stage2_set_owner_metadata_locked() Will Deacon
2026-03-27 14:00 ` [PATCH v4 23/38] KVM: arm64: Change 'pkvm_handle_t' to u16 Will Deacon
2026-03-27 14:00 ` [PATCH v4 24/38] KVM: arm64: Annotate guest donations with handle and gfn in host stage-2 Will Deacon
2026-03-27 14:00 ` [PATCH v4 25/38] KVM: arm64: Introduce hypercall to force reclaim of a protected page Will Deacon
2026-03-27 14:00 ` [PATCH v4 26/38] KVM: arm64: Reclaim faulting page from pKVM in spurious fault handler Will Deacon
2026-03-27 14:00 ` [PATCH v4 27/38] KVM: arm64: Return -EFAULT from VCPU_RUN on access to a poisoned pte Will Deacon
2026-03-27 14:00 ` [PATCH v4 28/38] KVM: arm64: Add hvc handler at EL2 for hypercalls from protected VMs Will Deacon
2026-03-27 14:00 ` [PATCH v4 29/38] KVM: arm64: Implement the MEM_SHARE hypercall for " Will Deacon
2026-03-27 14:00 ` [PATCH v4 30/38] KVM: arm64: Implement the MEM_UNSHARE " Will Deacon
2026-03-27 14:00 ` [PATCH v4 31/38] KVM: arm64: Allow userspace to create protected VMs when pKVM is enabled Will Deacon
2026-03-27 14:00 ` [PATCH v4 32/38] KVM: arm64: Add some initial documentation for pKVM Will Deacon
2026-03-27 14:00 ` [PATCH v4 33/38] KVM: arm64: Extend pKVM page ownership selftests to cover guest donation Will Deacon
2026-03-27 14:00 ` [PATCH v4 34/38] KVM: arm64: Register 'selftest_vm' in the VM table Will Deacon
2026-03-27 14:00 ` [PATCH v4 35/38] KVM: arm64: Extend pKVM page ownership selftests to cover forced reclaim Will Deacon
2026-03-27 14:00 ` [PATCH v4 36/38] KVM: arm64: Extend pKVM page ownership selftests to cover guest hvcs Will Deacon
2026-03-27 14:00 ` [PATCH v4 37/38] KVM: arm64: Rename PKVM_PAGE_STATE_MASK Will Deacon
2026-03-27 14:00 ` [PATCH v4 38/38] drivers/virt: pkvm: Add Kconfig dependency on DMA_RESTRICTED_POOL Will Deacon
2026-03-27 18:13 ` [PATCH v4 00/38] KVM: arm64: Add support for protected guest memory with pKVM Will Deacon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260327140039.21228-16-will@kernel.org \
--to=will@kernel.org \
--cc=alexandru.elisei@arm.com \
--cc=catalin.marinas@arm.com \
--cc=joey.gouly@arm.com \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=maz@kernel.org \
--cc=oupton@kernel.org \
--cc=qperret@google.com \
--cc=smostafa@google.com \
--cc=suzuki.poulose@arm.com \
--cc=tabba@google.com \
--cc=vdonnefort@google.com \
--cc=yuzenghui@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox