From: Will Deacon <will@kernel.org>
To: kvmarm@lists.linux.dev
Cc: linux-arm-kernel@lists.infradead.org,
Will Deacon <will@kernel.org>, Marc Zyngier <maz@kernel.org>,
Oliver Upton <oupton@kernel.org>, Joey Gouly <joey.gouly@arm.com>,
Suzuki K Poulose <suzuki.poulose@arm.com>,
Zenghui Yu <yuzenghui@huawei.com>,
Catalin Marinas <catalin.marinas@arm.com>,
Quentin Perret <qperret@google.com>,
Fuad Tabba <tabba@google.com>,
Vincent Donnefort <vdonnefort@google.com>,
Mostafa Saleh <smostafa@google.com>,
Alexandru Elisei <alexandru.elisei@arm.com>
Subject: [PATCH v3 14/36] KVM: arm64: Introduce __pkvm_reclaim_dying_guest_page()
Date: Thu, 5 Mar 2026 14:43:27 +0000 [thread overview]
Message-ID: <20260305144351.17071-15-will@kernel.org> (raw)
In-Reply-To: <20260305144351.17071-1-will@kernel.org>
To enable reclaim of pages from a protected VM during teardown,
introduce a new hypercall to reclaim a single page from a protected
guest that is in the dying state.
Since the EL2 code is non-preemptible, the new hypercall deliberately
acts on a single page at a time so as to allow EL1 to reschedule
frequently during the teardown operation.
Reviewed-by: Vincent Donnefort <vdonnefort@google.com>
Co-developed-by: Quentin Perret <qperret@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
arch/arm64/include/asm/kvm_asm.h | 1 +
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 +
arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 1 +
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 +++
arch/arm64/kvm/hyp/nvhe/mem_protect.c | 79 +++++++++++++++++++
arch/arm64/kvm/hyp/nvhe/pkvm.c | 14 ++++
6 files changed, 105 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index dfc6625c8269..b6df8f64d573 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -90,6 +90,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
+ __KVM_HOST_SMCCC_FUNC___pkvm_reclaim_dying_guest_page,
__KVM_HOST_SMCCC_FUNC___pkvm_start_teardown_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_finalize_teardown_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 9c0cc53d1dc9..cde38a556049 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -41,6 +41,7 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
+int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm);
int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
enum kvm_pgtable_prot prot);
int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 04c7ca703014..506831804f64 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -74,6 +74,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
unsigned long vcpu_hva);
+int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn);
int __pkvm_start_teardown_vm(pkvm_handle_t handle);
int __pkvm_finalize_teardown_vm(pkvm_handle_t handle);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 970656318cf2..7294c94f9296 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -573,6 +573,14 @@ static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
}
+static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(u64, gfn, host_ctxt, 2);
+
+ cpu_reg(host_ctxt, 1) = __pkvm_reclaim_dying_guest_page(handle, gfn);
+}
+
static void handle___pkvm_start_teardown_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
@@ -626,6 +634,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_unreserve_vm),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
+ HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
HANDLE_FUNC(__pkvm_start_teardown_vm),
HANDLE_FUNC(__pkvm_finalize_teardown_vm),
HANDLE_FUNC(__pkvm_vcpu_load),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 0a9a96236841..31b6a52e5e4c 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -738,6 +738,32 @@ static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,
return check_page_state_range(&vm->pgt, addr, size, &d);
}
+static int get_valid_guest_pte(struct pkvm_hyp_vm *vm, u64 ipa, kvm_pte_t *ptep, u64 *physp)
+{
+ kvm_pte_t pte;
+ u64 phys;
+ s8 level;
+ int ret;
+
+ ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
+ if (ret)
+ return ret;
+ if (!kvm_pte_valid(pte))
+ return -ENOENT;
+ if (level != KVM_PGTABLE_LAST_LEVEL)
+ return -E2BIG;
+
+ phys = kvm_pte_to_phys(pte);
+ ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
+ if (WARN_ON(ret))
+ return ret;
+
+ *ptep = pte;
+ *physp = phys;
+
+ return 0;
+}
+
int __pkvm_host_share_hyp(u64 pfn)
{
u64 phys = hyp_pfn_to_phys(pfn);
@@ -971,6 +997,59 @@ static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *s
return 0;
}
+static void hyp_poison_page(phys_addr_t phys)
+{
+ void *addr = hyp_fixmap_map(phys);
+
+ memset(addr, 0, PAGE_SIZE);
+ /*
+ * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page()
+ * here as the latter may elide the CMO under the assumption that FWB
+ * will be enabled on CPUs that support it. This is incorrect for the
+ * host stage-2 and would otherwise lead to a malicious host potentially
+ * being able to read the contents of newly reclaimed guest pages.
+ */
+ kvm_flush_dcache_to_poc(addr, PAGE_SIZE);
+ hyp_fixmap_unmap();
+}
+
+int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
+{
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ kvm_pte_t pte;
+ u64 phys;
+ int ret;
+
+ host_lock_component();
+ guest_lock_component(vm);
+
+ ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
+ if (ret)
+ goto unlock;
+
+ switch (guest_get_page_state(pte, ipa)) {
+ case PKVM_PAGE_OWNED:
+ WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE));
+ hyp_poison_page(phys);
+ break;
+ case PKVM_PAGE_SHARED_OWNED:
+ WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
+ break;
+ default:
+ ret = -EPERM;
+ goto unlock;
+ }
+
+ WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE));
+ WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
+
+unlock:
+ guest_unlock_component(vm);
+ host_unlock_component();
+
+ return ret;
+}
+
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index c4e05ab8b605..a2d45f4b0cf6 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -862,6 +862,20 @@ teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
unmap_donated_memory_noclear(addr, size);
}
+int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn)
+{
+ struct pkvm_hyp_vm *hyp_vm;
+ int ret = -EINVAL;
+
+ hyp_spin_lock(&vm_table_lock);
+ hyp_vm = get_vm_by_handle(handle);
+ if (hyp_vm && hyp_vm->kvm.arch.pkvm.is_dying)
+ ret = __pkvm_host_reclaim_page_guest(gfn, hyp_vm);
+ hyp_spin_unlock(&vm_table_lock);
+
+ return ret;
+}
+
int __pkvm_start_teardown_vm(pkvm_handle_t handle)
{
struct pkvm_hyp_vm *hyp_vm;
--
2.53.0.473.g4a7958ca14-goog
next prev parent reply other threads:[~2026-03-05 14:44 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-05 14:43 [PATCH v3 00/36] KVM: arm64: Add support for protected guest memory with pKVM Will Deacon
2026-03-05 14:43 ` [PATCH v3 01/36] KVM: arm64: Don't leak stage-2 page-table if VM fails to init under pKVM Will Deacon
2026-03-11 12:48 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 02/36] KVM: arm64: Move handle check into pkvm_pgtable_stage2_destroy_range() Will Deacon
2026-03-11 10:15 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 03/36] KVM: arm64: Rename __pkvm_pgtable_stage2_unmap() Will Deacon
2026-03-11 12:49 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 04/36] KVM: arm64: Don't advertise unsupported features for protected guests Will Deacon
2026-03-11 10:15 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 05/36] KVM: arm64: Expose self-hosted debug regs as RAZ/WI " Will Deacon
2026-03-05 14:43 ` [PATCH v3 06/36] KVM: arm64: Remove is_protected_kvm_enabled() checks from hypercalls Will Deacon
2026-03-11 10:16 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 07/36] KVM: arm64: Ignore MMU notifier callbacks for protected VMs Will Deacon
2026-03-11 12:50 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 08/36] KVM: arm64: Prevent unsupported memslot operations on " Will Deacon
2026-03-11 10:16 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 09/36] KVM: arm64: Ignore -EAGAIN when mapping in pages for the pKVM host Will Deacon
2026-03-11 10:10 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 10/36] KVM: arm64: Split teardown hypercall into two phases Will Deacon
2026-03-11 10:22 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 11/36] KVM: arm64: Introduce __pkvm_host_donate_guest() Will Deacon
2026-03-20 12:38 ` Marc Zyngier
2026-03-23 14:55 ` Will Deacon
2026-03-05 14:43 ` [PATCH v3 12/36] KVM: arm64: Hook up donation hypercall to pkvm_pgtable_stage2_map() Will Deacon
2026-03-05 14:43 ` [PATCH v3 13/36] KVM: arm64: Handle aborts from protected VMs Will Deacon
2026-03-11 10:22 ` Fuad Tabba
2026-03-05 14:43 ` Will Deacon [this message]
2026-03-05 14:43 ` [PATCH v3 15/36] KVM: arm64: Hook up reclaim hypercall to pkvm_pgtable_stage2_destroy() Will Deacon
2026-03-05 14:43 ` [PATCH v3 16/36] KVM: arm64: Factor out pKVM host exception injection logic Will Deacon
2026-03-11 10:12 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 17/36] KVM: arm64: Support translation faults in inject_host_exception() Will Deacon
2026-03-11 10:12 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 18/36] KVM: arm64: Inject SIGSEGV on illegal accesses Will Deacon
2026-03-11 10:13 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 19/36] KVM: arm64: Avoid pointless annotation when mapping host-owned pages Will Deacon
2026-03-05 14:43 ` [PATCH v3 20/36] KVM: arm64: Generalise kvm_pgtable_stage2_set_owner() Will Deacon
2026-03-05 14:43 ` [PATCH v3 21/36] KVM: arm64: Introduce host_stage2_set_owner_metadata_locked() Will Deacon
2026-03-05 14:43 ` [PATCH v3 22/36] KVM: arm64: Change 'pkvm_handle_t' to u16 Will Deacon
2026-03-05 14:43 ` [PATCH v3 23/36] KVM: arm64: Annotate guest donations with handle and gfn in host stage-2 Will Deacon
2026-03-05 14:43 ` [PATCH v3 24/36] KVM: arm64: Introduce hypercall to force reclaim of a protected page Will Deacon
2026-03-05 14:43 ` [PATCH v3 25/36] KVM: arm64: Reclaim faulting page from pKVM in spurious fault handler Will Deacon
2026-03-20 16:20 ` Marc Zyngier
2026-03-21 9:39 ` Marc Zyngier
2026-03-23 14:58 ` Will Deacon
2026-03-05 14:43 ` [PATCH v3 26/36] KVM: arm64: Return -EFAULT from VCPU_RUN on access to a poisoned pte Will Deacon
2026-03-20 16:35 ` Marc Zyngier
2026-03-23 14:58 ` Will Deacon
2026-03-05 14:43 ` [PATCH v3 27/36] KVM: arm64: Add hvc handler at EL2 for hypercalls from protected VMs Will Deacon
2026-03-05 14:43 ` [PATCH v3 28/36] KVM: arm64: Implement the MEM_SHARE hypercall for " Will Deacon
2026-03-05 14:43 ` [PATCH v3 29/36] KVM: arm64: Implement the MEM_UNSHARE " Will Deacon
2026-03-05 14:43 ` [PATCH v3 30/36] KVM: arm64: Allow userspace to create protected VMs when pKVM is enabled Will Deacon
2026-03-11 10:25 ` Fuad Tabba
2026-03-20 13:22 ` Marc Zyngier
2026-03-23 15:00 ` Will Deacon
2026-03-05 14:43 ` [PATCH v3 31/36] KVM: arm64: Add some initial documentation for pKVM Will Deacon
2026-03-11 10:25 ` Fuad Tabba
2026-03-05 14:43 ` [PATCH v3 32/36] KVM: arm64: Extend pKVM page ownership selftests to cover guest donation Will Deacon
2026-03-05 14:43 ` [PATCH v3 33/36] KVM: arm64: Register 'selftest_vm' in the VM table Will Deacon
2026-03-05 14:43 ` [PATCH v3 34/36] KVM: arm64: Extend pKVM page ownership selftests to cover forced reclaim Will Deacon
2026-03-05 14:43 ` [PATCH v3 35/36] KVM: arm64: Extend pKVM page ownership selftests to cover guest hvcs Will Deacon
2026-03-05 14:43 ` [PATCH v3 36/36] KVM: arm64: Rename PKVM_PAGE_STATE_MASK Will Deacon
2026-03-11 10:26 ` Fuad Tabba
2026-03-11 10:07 ` [PATCH v3 00/36] KVM: arm64: Add support for protected guest memory with pKVM Fuad Tabba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260305144351.17071-15-will@kernel.org \
--to=will@kernel.org \
--cc=alexandru.elisei@arm.com \
--cc=catalin.marinas@arm.com \
--cc=joey.gouly@arm.com \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=maz@kernel.org \
--cc=oupton@kernel.org \
--cc=qperret@google.com \
--cc=smostafa@google.com \
--cc=suzuki.poulose@arm.com \
--cc=tabba@google.com \
--cc=vdonnefort@google.com \
--cc=yuzenghui@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.