From: Jack Thomson <jackabt.amazon@gmail.com>
To: maz@kernel.org, oupton@kernel.org, pbonzini@redhat.com
Cc: joey.gouly@arm.com, seiden@linux.ibm.com, suzuki.poulose@arm.com,
yuzenghui@huawei.com, catalin.marinas@arm.com, will@kernel.org,
shuah@kernel.org, corbet@lwn.net, vladimir.murzin@arm.com,
linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-kselftest@vger.kernel.org, linux-doc@vger.kernel.org,
isaku.yamahata@intel.com, Jack Thomson <jackabt@amazon.com>
Subject: [PATCH v5 5/5] KVM: selftests: Add nested pre-fault test for arm64
Date: Fri, 12 Jun 2026 17:23:53 +0100 [thread overview]
Message-ID: <20260612162354.73378-6-jackabt.amazon@gmail.com> (raw)
In-Reply-To: <20260612162354.73378-1-jackabt.amazon@gmail.com>
From: Jack Thomson <jackabt@amazon.com>
Add an arm64 nested-virt selftest for KVM_PRE_FAULT_MEMORY. The guest
enters vEL1 and exits to userspace with a nested/shadow stage-2 MMU as
the vCPU's last-run context.
Before prefaulting, userspace enables HCR_EL2.VM and points VTTBR_EL2 at
an empty nested stage-2 root. A prefault implementation that incorrectly
treats the userspace GPA as an L2 IPA will fail the ioctl; the correct
path swaps to the canonical stage-2 and succeeds.
Restore the original nested state before resuming the guest, then touch
the prefaulted range to check that vEL1 still runs correctly.
Signed-off-by: Jack Thomson <jackabt@amazon.com>
---
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../kvm/arm64/nv_pre_fault_memory_test.c | 200 ++++++++++++++++++
2 files changed, 201 insertions(+)
create mode 100644 tools/testing/selftests/kvm/arm64/nv_pre_fault_memory_test.c
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 4609d8f23e38..63d79245b47d 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -170,6 +170,7 @@ TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
TEST_GEN_PROGS_arm64 += arm64/hello_el2
TEST_GEN_PROGS_arm64 += arm64/host_sve
TEST_GEN_PROGS_arm64 += arm64/hypercalls
+TEST_GEN_PROGS_arm64 += arm64/nv_pre_fault_memory_test
TEST_GEN_PROGS_arm64 += arm64/external_aborts
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
TEST_GEN_PROGS_arm64 += arm64/psci_test
diff --git a/tools/testing/selftests/kvm/arm64/nv_pre_fault_memory_test.c b/tools/testing/selftests/kvm/arm64/nv_pre_fault_memory_test.c
new file mode 100644
index 000000000000..2bbd5540599c
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/nv_pre_fault_memory_test.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * nv_pre_fault_memory_test - Test KVM_PRE_FAULT_MEMORY on a vCPU whose
+ * last-run context is nested.
+ *
+ * The guest starts at vEL2, mirrors its EL2 translation regime into the
+ * real EL1 registers, drops HCR_EL2.TGE and ERETs to vEL1, then exits to
+ * userspace from vEL1 so that the vCPU's last-run context selects a
+ * shadow stage-2 MMU. Userspace then enables an empty nested stage-2
+ * before prefaulting. Prefaulting must target the canonical stage-2,
+ * regardless of the vCPU's nested state.
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+#include <linux/sizes.h>
+
+#define TEST_MEM_SLOT 10
+#define NESTED_S2_ROOT_SLOT 11
+#define TEST_MEM_SIZE SZ_2M
+#define TEST_MEM_GPA SZ_1G
+#define NESTED_S2_ROOT_GPA (TEST_MEM_GPA + TEST_MEM_SIZE)
+
+struct nested_s2_state {
+ u64 hcr_el2;
+ u64 vttbr_el2;
+};
+
+static void guest_el1_code(void)
+{
+ u64 offset;
+
+ GUEST_ASSERT_EQ(get_current_el(), 1);
+
+ /* Exit to userspace with the vEL1 (nested) context live. */
+ GUEST_SYNC(1);
+
+ /*
+ * Touch the prefaulted range. vstage-2 is disabled, so the shadow
+ * stage-2 is a 1:1 view of the canonical IPA space.
+ */
+ for (offset = 0; offset < TEST_MEM_SIZE; offset += SZ_4K)
+ READ_ONCE(*(u64 *)(TEST_MEM_GPA + offset));
+
+ GUEST_DONE();
+}
+
+static void guest_code(void)
+{
+ u64 sp;
+
+ GUEST_ASSERT_EQ(get_current_el(), 2);
+
+ /*
+ * Mirror the EL2 translation regime into the real EL1 registers so
+ * that vEL1 runs on the test's stage-1 page tables. With E2H=1, the
+ * _EL1 accessors read the EL2 registers, and the _EL12 accessors
+ * write the real EL1 registers.
+ */
+ write_sysreg_s(read_sysreg(sctlr_el1), SYS_SCTLR_EL12);
+ write_sysreg_s(read_sysreg(tcr_el1), SYS_TCR_EL12);
+ write_sysreg_s(read_sysreg(ttbr0_el1), SYS_TTBR0_EL12);
+ write_sysreg_s(read_sysreg(mair_el1), SYS_MAIR_EL12);
+ write_sysreg_s(read_sysreg(cpacr_el1), SYS_CPACR_EL12);
+
+ /* Run vEL1 on the same stack. */
+ asm volatile("mov %0, sp" : "=r"(sp));
+ write_sysreg(sp, sp_el1);
+
+ /*
+ * Drop TGE so that vEL1 is a nested context rather than host EL0.
+ * KVM backs it with a shadow stage-2 MMU even though vstage-2 is
+ * disabled (HCR_EL2.VM=0).
+ */
+ write_sysreg(read_sysreg(hcr_el2) & ~HCR_EL2_TGE, hcr_el2);
+ isb();
+
+ write_sysreg(PSR_MODE_EL1h | PSR_F_BIT | PSR_I_BIT | PSR_A_BIT |
+ PSR_D_BIT, spsr_el2);
+ write_sysreg((u64)guest_el1_code, elr_el2);
+ asm volatile("eret");
+
+ GUEST_ASSERT(false);
+}
+
+static void pre_fault(struct kvm_vcpu *vcpu, u64 gpa, u64 size)
+{
+ struct kvm_pre_fault_memory range = {
+ .gpa = gpa,
+ .size = size,
+ };
+ int ret;
+
+ do {
+ ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
+ } while (ret < 0 && errno == EINTR);
+
+ TEST_ASSERT(!ret, "KVM_PRE_FAULT_MEMORY failed, ret: %d errno: %d",
+ ret, errno);
+ TEST_ASSERT_EQ(range.size, 0);
+}
+
+static struct nested_s2_state enable_empty_nested_s2(struct kvm_vcpu *vcpu)
+{
+ struct nested_s2_state state = {
+ .hcr_el2 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2)),
+ .vttbr_el2 = vcpu_get_reg(vcpu,
+ KVM_ARM64_SYS_REG(SYS_VTTBR_EL2)),
+ };
+
+ TEST_ASSERT(!(state.hcr_el2 & HCR_EL2_TGE),
+ "vCPU should be in nested/vEL1 context");
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VTTBR_EL2),
+ NESTED_S2_ROOT_GPA);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2),
+ state.hcr_el2 | HCR_EL2_VM);
+
+ return state;
+}
+
+static void restore_nested_s2(struct kvm_vcpu *vcpu,
+ struct nested_s2_state *state)
+{
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), state->hcr_el2);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VTTBR_EL2),
+ state->vttbr_el2);
+}
+
+int main(void)
+{
+ struct nested_s2_state s2;
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ u64 npages;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ npages = TEST_MEM_SIZE / vm->page_size;
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, TEST_MEM_GPA,
+ TEST_MEM_SLOT, npages, 0);
+ virt_map(vm, TEST_MEM_GPA, TEST_MEM_GPA, npages);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ NESTED_S2_ROOT_GPA, NESTED_S2_ROOT_SLOT,
+ 1, 0);
+
+ /* Run the guest until it has ERET'd from vEL2 to vEL1. */
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT_EQ(uc.args[1], 1);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ }
+
+ /*
+ * The vCPU's last-run context is vEL1, backed by a shadow stage-2
+ * MMU. Enable nested stage-2 with an empty root so that the ioctl
+ * fails if it tries to interpret the userspace GPA as an L2 IPA.
+ * Prefault in two halves so that the second ioctl exercises a
+ * repeated shadow-MMU attach and canonical stage-2 swap.
+ */
+ s2 = enable_empty_nested_s2(vcpu);
+ pre_fault(vcpu, TEST_MEM_GPA, TEST_MEM_SIZE / 2);
+ pre_fault(vcpu, TEST_MEM_GPA + TEST_MEM_SIZE / 2, TEST_MEM_SIZE / 2);
+ restore_nested_s2(vcpu, &s2);
+
+ /* Resume at vEL1 and touch the prefaulted range. */
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
--
2.43.0
prev parent reply other threads:[~2026-06-12 16:24 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-12 16:23 [PATCH v5 0/5] KVM: arm64: Add KVM_PRE_FAULT_MEMORY support Jack Thomson
2026-06-12 16:23 ` [PATCH v5 1/5] KVM: arm64: Pass walk flags to kvm_pgtable_get_leaf() Jack Thomson
2026-06-12 16:23 ` [PATCH v5 2/5] KVM: arm64: Add pre_fault_memory implementation Jack Thomson
2026-06-12 16:23 ` [PATCH v5 3/5] KVM: selftests: Enable pre_fault_memory_test for arm64 Jack Thomson
2026-06-12 16:23 ` [PATCH v5 4/5] KVM: selftests: Add option for different backing in pre-fault tests Jack Thomson
2026-06-12 16:23 ` Jack Thomson [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260612162354.73378-6-jackabt.amazon@gmail.com \
--to=jackabt.amazon@gmail.com \
--cc=catalin.marinas@arm.com \
--cc=corbet@lwn.net \
--cc=isaku.yamahata@intel.com \
--cc=jackabt@amazon.com \
--cc=joey.gouly@arm.com \
--cc=kvm@vger.kernel.org \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=maz@kernel.org \
--cc=oupton@kernel.org \
--cc=pbonzini@redhat.com \
--cc=seiden@linux.ibm.com \
--cc=shuah@kernel.org \
--cc=suzuki.poulose@arm.com \
--cc=vladimir.murzin@arm.com \
--cc=will@kernel.org \
--cc=yuzenghui@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox