public inbox for kvmarm@lists.cs.columbia.edu
 help / color / mirror / Atom feed
From: Will Deacon <will@kernel.org>
To: kvmarm@lists.linux.dev
Cc: linux-arm-kernel@lists.infradead.org,
	Will Deacon <will@kernel.org>, Marc Zyngier <maz@kernel.org>,
	Oliver Upton <oupton@kernel.org>, Joey Gouly <joey.gouly@arm.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	Zenghui Yu <yuzenghui@huawei.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Quentin Perret <qperret@google.com>,
	Fuad Tabba <tabba@google.com>,
	Vincent Donnefort <vdonnefort@google.com>,
	Mostafa Saleh <smostafa@google.com>
Subject: [PATCH 20/30] KVM: arm64: Introduce hypercall to force reclaim of a protected page
Date: Mon,  5 Jan 2026 15:49:28 +0000	[thread overview]
Message-ID: <20260105154939.11041-21-will@kernel.org> (raw)
In-Reply-To: <20260105154939.11041-1-will@kernel.org>

Introduce a new hypercall, __pkvm_force_reclaim_guest_page(), to allow
the host to forcefully reclaim a physical page that was previous donated
to a protected guest. This results in the page being zeroed and the
previous guest mapping being poisoned so that new pages cannot be
subsequently donated at the same IPA.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_asm.h              |   1 +
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |   1 +
 arch/arm64/kvm/hyp/include/nvhe/memory.h      |   6 +
 arch/arm64/kvm/hyp/include/nvhe/pkvm.h        |   1 +
 arch/arm64/kvm/hyp/nvhe/hyp-main.c            |   8 ++
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 118 +++++++++++++++++-
 arch/arm64/kvm/hyp/nvhe/pkvm.c                |   4 +-
 7 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index f14f845aeedd..286a7379a368 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -86,6 +86,7 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
 	__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
 	__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
+	__KVM_HOST_SMCCC_FUNC___pkvm_force_reclaim_guest_page,
 	__KVM_HOST_SMCCC_FUNC___pkvm_reclaim_dying_guest_page,
 	__KVM_HOST_SMCCC_FUNC___pkvm_start_teardown_vm,
 	__KVM_HOST_SMCCC_FUNC___pkvm_finalize_teardown_vm,
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index cde38a556049..f27b037abaf3 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -41,6 +41,7 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
+int __pkvm_host_force_reclaim_page_guest(phys_addr_t phys);
 int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm);
 int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
 			    enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index dee1a406b0c2..4cedb720c75d 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -30,6 +30,12 @@ enum pkvm_page_state {
 	 * struct hyp_page.
 	 */
 	PKVM_NOPAGE			= BIT(0) | BIT(1),
+
+	/*
+	 * 'Meta-states' which aren't encoded directly in the PTE's SW bits (or
+	 * the hyp_vmemmap entry for the host)
+	 */
+	PKVM_POISON			= BIT(2),
 };
 #define PKVM_PAGE_STATE_MASK		(BIT(0) | BIT(1))
 
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 506831804f64..a5a7bb453f3e 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -78,6 +78,7 @@ int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn);
 int __pkvm_start_teardown_vm(pkvm_handle_t handle);
 int __pkvm_finalize_teardown_vm(pkvm_handle_t handle);
 
+struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle);
 struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
 					 unsigned int vcpu_idx);
 void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index b1940e639ad3..7d66cdd7de57 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -570,6 +570,13 @@ static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
 	cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
 }
 
+static void handle___pkvm_force_reclaim_guest_page(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
+
+	cpu_reg(host_ctxt, 1) = __pkvm_host_force_reclaim_page_guest(phys);
+}
+
 static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_ctxt)
 {
 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
@@ -630,6 +637,7 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__pkvm_unreserve_vm),
 	HANDLE_FUNC(__pkvm_init_vm),
 	HANDLE_FUNC(__pkvm_init_vcpu),
+	HANDLE_FUNC(__pkvm_force_reclaim_guest_page),
 	HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
 	HANDLE_FUNC(__pkvm_start_teardown_vm),
 	HANDLE_FUNC(__pkvm_finalize_teardown_vm),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 1a341337b272..5d6028c41125 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -768,8 +768,17 @@ static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_pa
 	return 0;
 }
 
+#define KVM_GUEST_INVALID_PTE_POISONED	FIELD_PREP(KVM_INVALID_PTE_ANNOT_MASK, 1)
+static bool guest_pte_is_poisoned(kvm_pte_t pte)
+{
+	return pte == KVM_GUEST_INVALID_PTE_POISONED;
+}
+
 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
 {
+	if (guest_pte_is_poisoned(pte))
+		return PKVM_POISON;
+
 	if (!kvm_pte_valid(pte))
 		return PKVM_NOPAGE;
 
@@ -798,6 +807,8 @@ static int get_valid_guest_pte(struct pkvm_hyp_vm *vm, u64 ipa, kvm_pte_t *ptep,
 	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
 	if (ret)
 		return ret;
+	if (guest_pte_is_poisoned(pte))
+		return -EHWPOISON;
 	if (!kvm_pte_valid(pte))
 		return -ENOENT;
 	if (level != KVM_PGTABLE_LAST_LEVEL)
@@ -1076,6 +1087,107 @@ static u64 host_stage2_encode_gfn_meta(struct pkvm_hyp_vm *vm, u64 gfn)
 	       FIELD_PREP(KVM_HOST_INVALID_PTE_GUEST_GFN_MASK, gfn);
 }
 
+static int host_stage2_decode_gfn_meta(kvm_pte_t pte, struct pkvm_hyp_vm **vm,
+				       u64 *gfn)
+{
+	pkvm_handle_t handle;
+	u64 meta;
+
+	if (kvm_pte_valid(pte))
+		return -EINVAL;
+
+	if (FIELD_GET(KVM_INVALID_PTE_OWNER_MASK, pte) != PKVM_ID_GUEST)
+		return -EPERM;
+
+	meta = FIELD_GET(KVM_INVALID_PTE_EXTRA_MASK, pte);
+	handle = FIELD_GET(KVM_HOST_INVALID_PTE_GUEST_HANDLE_MASK, meta);
+	*vm = get_vm_by_handle(handle);
+	if (!*vm) {
+		/* We probably raced with teardown; try again */
+		return -EAGAIN;
+	}
+
+	*gfn = FIELD_GET(KVM_HOST_INVALID_PTE_GUEST_GFN_MASK, meta);
+	return 0;
+}
+
+static int host_stage2_get_guest_info(phys_addr_t phys, struct pkvm_hyp_vm **vm,
+				      u64 *gfn)
+{
+	enum pkvm_page_state state;
+	kvm_pte_t pte;
+	s8 level;
+	int ret;
+
+	if (!addr_is_memory(phys))
+		return -EFAULT;
+
+	state = get_host_state(hyp_phys_to_page(phys));
+	switch (state) {
+	case PKVM_PAGE_OWNED:
+	case PKVM_PAGE_SHARED_OWNED:
+	case PKVM_PAGE_SHARED_BORROWED:
+		/* The access should no longer fault; try again. */
+		return -EAGAIN;
+	case PKVM_NOPAGE:
+		break;
+	default:
+		return -EPERM;
+	}
+
+	ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, &pte, &level);
+	if (ret)
+		return ret;
+
+	if (WARN_ON(level != KVM_PGTABLE_LAST_LEVEL))
+		return -EINVAL;
+
+	return host_stage2_decode_gfn_meta(pte, vm, gfn);
+}
+
+int __pkvm_host_force_reclaim_page_guest(phys_addr_t phys)
+{
+	struct pkvm_hyp_vm *vm;
+	u64 gfn, ipa, pa;
+	kvm_pte_t pte;
+	int ret;
+
+	hyp_spin_lock(&vm_table_lock);
+	host_lock_component();
+
+	ret = host_stage2_get_guest_info(phys, &vm, &gfn);
+	if (ret)
+		goto unlock_host;
+
+	ipa = hyp_pfn_to_phys(gfn);
+	guest_lock_component(vm);
+	ret = get_valid_guest_pte(vm, ipa, &pte, &pa);
+	if (ret)
+		goto unlock_guest;
+
+	WARN_ON(pa != phys);
+	if (guest_get_page_state(pte, ipa) != PKVM_PAGE_OWNED) {
+		ret = -EPERM;
+		goto unlock_guest;
+	}
+
+	/* We really shouldn't be allocating, so don't pass a memcache */
+	ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, PAGE_SIZE, NULL,
+					  KVM_GUEST_INVALID_PTE_POISONED);
+	if (ret)
+		goto unlock_guest;
+
+	hyp_poison_page(phys);
+	WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
+unlock_guest:
+	guest_unlock_component(vm);
+unlock_host:
+	host_unlock_component();
+	hyp_spin_unlock(&vm_table_lock);
+
+	return ret;
+}
+
 int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
 {
 	u64 ipa = hyp_pfn_to_phys(gfn);
@@ -1110,7 +1222,11 @@ int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
 	guest_unlock_component(vm);
 	host_unlock_component();
 
-	return ret;
+	/*
+	 * -EHWPOISON implies that the page was forcefully reclaimed already
+	 * so return success for the GUP pin to be dropped.
+	 */
+	return ret && ret != -EHWPOISON ? ret : 0;
 }
 
 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 9f0997150cf5..df340de59eed 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -230,10 +230,12 @@ void pkvm_hyp_vm_table_init(void *tbl)
 /*
  * Return the hyp vm structure corresponding to the handle.
  */
-static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
+struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
 {
 	unsigned int idx = vm_handle_to_idx(handle);
 
+	hyp_assert_lock_held(&vm_table_lock);
+
 	if (unlikely(idx >= KVM_MAX_PVMS))
 		return NULL;
 
-- 
2.52.0.351.gbe84eed79e-goog


  parent reply	other threads:[~2026-01-05 15:50 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-05 15:49 [PATCH 00/30] KVM: arm64: Add support for protected guest memory with pKVM Will Deacon
2026-01-05 15:49 ` [PATCH 01/30] KVM: arm64: Invert KVM_PGTABLE_WALK_HANDLE_FAULT to fix pKVM walkers Will Deacon
2026-01-06 14:33   ` Quentin Perret
2026-01-10 10:22   ` (subset) " Oliver Upton
2026-01-05 15:49 ` [PATCH 02/30] KVM: arm64: Remove redundant 'pgt' pointer checks from MMU notifiers Will Deacon
2026-01-06 14:32   ` Quentin Perret
2026-01-09 14:31     ` Will Deacon
2026-01-09 17:31       ` Will Deacon
2026-01-05 15:49 ` [PATCH 03/30] KVM: arm64: Rename __pkvm_pgtable_stage2_unmap() Will Deacon
2026-01-05 15:49 ` [PATCH 04/30] KVM: arm64: Don't advertise unsupported features for protected guests Will Deacon
2026-01-05 15:49 ` [PATCH 05/30] KVM: arm64: Expose self-hosted debug regs as RAZ/WI " Will Deacon
2026-01-05 15:49 ` [PATCH 06/30] KVM: arm64: Remove pointless is_protected_kvm_enabled() checks from hyp Will Deacon
2026-01-06 14:40   ` Quentin Perret
2026-01-09 14:23     ` Will Deacon
2026-01-05 15:49 ` [PATCH 07/30] KVM: arm64: Ignore MMU notifier callbacks for protected VMs Will Deacon
2026-01-05 15:49 ` [PATCH 08/30] KVM: arm64: Prevent unsupported memslot operations on " Will Deacon
2026-01-05 15:49 ` [PATCH 09/30] KVM: arm64: Split teardown hypercall into two phases Will Deacon
2026-01-05 15:49 ` [PATCH 10/30] KVM: arm64: Introduce __pkvm_host_donate_guest() Will Deacon
2026-01-06 14:48   ` Quentin Perret
2026-01-09 14:30     ` Will Deacon
2026-01-09 15:10       ` Quentin Perret
2026-01-05 15:49 ` [PATCH 11/30] KVM: arm64: Hook up donation hypercall to pkvm_pgtable_stage2_map() Will Deacon
2026-01-05 15:49 ` [PATCH 12/30] KVM: arm64: Handle aborts from protected VMs Will Deacon
2026-01-05 15:49 ` [PATCH 13/30] KVM: arm64: Introduce __pkvm_reclaim_dying_guest_page() Will Deacon
2026-01-06 16:26   ` Vincent Donnefort
2026-01-05 15:49 ` [PATCH 14/30] KVM: arm64: Hook up reclaim hypercall to pkvm_pgtable_stage2_destroy() Will Deacon
2026-01-06 14:59   ` Quentin Perret
2026-01-09 14:35     ` Will Deacon
2026-01-09 14:57       ` Quentin Perret
2026-01-05 15:49 ` [PATCH 15/30] KVM: arm64: Refactor enter_exception64() Will Deacon
2026-01-05 15:49 ` [PATCH 16/30] KVM: arm64: Inject SIGSEGV on illegal accesses Will Deacon
2026-01-05 15:49 ` [PATCH 17/30] KVM: arm64: Generalise kvm_pgtable_stage2_set_owner() Will Deacon
2026-01-06 15:20   ` Quentin Perret
2026-01-09 18:46     ` Will Deacon
2026-01-17  0:03       ` Will Deacon
2026-01-05 15:49 ` [PATCH 18/30] KVM: arm64: Introduce host_stage2_set_owner_metadata_locked() Will Deacon
2026-01-05 15:49 ` [PATCH 19/30] KVM: arm64: Annotate guest donations with handle and gfn in host stage-2 Will Deacon
2026-01-06 16:01   ` Fuad Tabba
2026-01-09 14:42     ` Will Deacon
2026-01-12  9:25       ` Fuad Tabba
2026-01-05 15:49 ` Will Deacon [this message]
2026-01-06 15:44   ` [PATCH 20/30] KVM: arm64: Introduce hypercall to force reclaim of a protected page Quentin Perret
2026-01-09 17:47     ` Will Deacon
2026-01-05 15:49 ` [PATCH 21/30] KVM: arm64: Reclaim faulting page from pKVM in spurious fault handler Will Deacon
2026-01-05 15:49 ` [PATCH 22/30] KVM: arm64: Return -EFAULT from VCPU_RUN on access to a poisoned pte Will Deacon
2026-01-06 15:54   ` Quentin Perret
2026-01-09 14:57     ` Will Deacon
2026-01-09 15:29       ` Quentin Perret
2026-01-09 17:35         ` Will Deacon
2026-01-05 15:49 ` [PATCH 23/30] KVM: arm64: Add hvc handler at EL2 for hypercalls from protected VMs Will Deacon
2026-01-06 15:52   ` Vincent Donnefort
2026-01-05 15:49 ` [PATCH 24/30] KVM: arm64: Implement the MEM_SHARE hypercall for " Will Deacon
2026-01-06 15:45   ` Vincent Donnefort
2026-01-09 15:01     ` Will Deacon
2026-01-05 15:49 ` [PATCH 25/30] KVM: arm64: Implement the MEM_UNSHARE " Will Deacon
2026-01-06 15:50   ` Vincent Donnefort
2026-01-05 15:49 ` [PATCH 26/30] KVM: arm64: Allow userspace to create protected VMs when pKVM is enabled Will Deacon
2026-01-05 15:49 ` [PATCH 27/30] KVM: arm64: Add some initial documentation for pKVM Will Deacon
2026-01-06 15:59   ` Vincent Donnefort
2026-01-09 15:04     ` Will Deacon
2026-01-05 15:49 ` [PATCH 28/30] KVM: arm64: Extend pKVM page ownership selftests to cover guest donation Will Deacon
2026-01-05 15:49 ` [PATCH 29/30] KVM: arm64: Register 'selftest_vm' in the VM table Will Deacon
2026-01-05 15:49 ` [PATCH 30/30] KVM: arm64: Extend pKVM page ownership selftests to cover forced reclaim Will Deacon
2026-03-13 15:31 ` [PATCH 00/30] KVM: arm64: Add support for protected guest memory with pKVM Mostafa Saleh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260105154939.11041-21-will@kernel.org \
    --to=will@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=joey.gouly@arm.com \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=maz@kernel.org \
    --cc=oupton@kernel.org \
    --cc=qperret@google.com \
    --cc=smostafa@google.com \
    --cc=suzuki.poulose@arm.com \
    --cc=tabba@google.com \
    --cc=vdonnefort@google.com \
    --cc=yuzenghui@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox