Kernel KVM virtualization development
 help / color / mirror / Atom feed
From: Yosry Ahmed <yosry@kernel.org>
To: Sean Christopherson <seanjc@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	Yosry Ahmed <yosry@kernel.org>
Subject: [PATCH v2 07/10] KVM: selftests: Add basic stress test for save+restore and #PF handling
Date: Thu,  4 Jun 2026 20:35:43 +0000	[thread overview]
Message-ID: <20260604203546.365658-8-yosry@kernel.org> (raw)
In-Reply-To: <20260604203546.365658-1-yosry@kernel.org>

Add a basic stress test for handling #PFs in a guest while the host is
doing save+restore cycles. The guest periodically accesses non-present
memory causing a #PF, and the #PF handler walks the page tables and
updates the PTE to be present, like a proper #PF handler.

After every access (and #PF), the guest triggers a sync and the test
performs save+restore of the VM. This is not very meaningful as
save+restore are performed after the access and #PF handling complete,
but following changes will change that.

Assisted-by: Gemini:gemini-3.1-pro
Signed-off-by: Yosry Ahmed <yosry@kernel.org>
---
 tools/testing/selftests/kvm/Makefile.kvm      |   1 +
 .../selftests/kvm/include/x86/processor.h     |  14 ++
 .../kvm/x86/stress_save_restore_pf_test.c     | 182 ++++++++++++++++++
 3 files changed, 197 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86/stress_save_restore_pf_test.c

diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index e0ddd3ff9472d..731ef7de1afb4 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -111,6 +111,7 @@ TEST_GEN_PROGS_x86 += x86/set_sregs_test
 TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test
 TEST_GEN_PROGS_x86 += x86/smm_test
 TEST_GEN_PROGS_x86 += x86/state_test
+TEST_GEN_PROGS_x86 += x86/stress_save_restore_pf_test
 TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86 += x86/svm_vmcall_test
 TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 57f81f0bcc42d..10d26e079a1a8 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -585,6 +585,15 @@ static inline void set_cr0(u64 val)
 	__asm__ __volatile__("mov %0, %%cr0" : : "r" (val) : "memory");
 }
 
+static inline u64 get_cr2(void)
+{
+	u64 cr2;
+
+	__asm__ __volatile__("mov %%cr2, %[cr2]"
+			     : /* output */ [cr2]"=r"(cr2));
+	return cr2;
+}
+
 static inline u64 get_cr3(void)
 {
 	u64 cr3;
@@ -880,6 +889,11 @@ static inline void write_sse_reg(int reg, const sse128_t *data)
 	}
 }
 
+static inline void invlpg(u64 addr)
+{
+	__asm__ __volatile__("invlpg (%0)" : : "r"(addr) : "memory");
+}
+
 static inline void cpu_relax(void)
 {
 	asm volatile("rep; nop" ::: "memory");
diff --git a/tools/testing/selftests/kvm/x86/stress_save_restore_pf_test.c b/tools/testing/selftests/kvm/x86/stress_save_restore_pf_test.c
new file mode 100644
index 0000000000000..622d102179e66
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/stress_save_restore_pf_test.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define NR_ITERATIONS		500
+
+#define GOTO_PREV_LINE		"\033[A\r"
+#define PRINT_ITER(s, x)					\
+do {								\
+	if (x == 1)						\
+		printf(s "%d\n", x);                            \
+	else							\
+		printf(GOTO_PREV_LINE s "%d\n", x);		\
+	fflush(stdout);						\
+} while (0)
+
+#define PTRS_PER_PTE		512
+#define PXD_INDEX(vaddr, level)	(((vaddr) >> PG_LEVEL_SHIFT(level)) & (PTRS_PER_PTE - 1))
+
+#define TEST_MEM_BASE_GVA	0xc0000000ULL
+#define TEST_PGTABLE_GVA_OFFSET	0xd0000000ULL
+#define NR_TEST_ADDRS		PTRS_PER_PTE
+#define PATTERN			0xabcdefabcdefabcdULL
+
+static u64 pte_present_mask;
+static u64 pte_huge_mask;
+
+static u64 expected_vaddr;
+static u64 guest_faults;
+
+static u64 *guest_get_pte(u64 vaddr)
+{
+	u64 pgtable_pa, pte;
+	u64 *pgtable;
+	int level;
+	bool la57;
+
+	la57 = !!(get_cr4() & X86_CR4_LA57);
+	level = la57 ? PG_LEVEL_256T : PG_LEVEL_512G;
+
+	pgtable_pa = get_cr3() & PHYSICAL_PAGE_MASK;
+	for (; level > PG_LEVEL_4K; level--) {
+		pgtable = (u64 *)(pgtable_pa + TEST_PGTABLE_GVA_OFFSET);
+		pte = pgtable[PXD_INDEX(vaddr, level)];
+		GUEST_ASSERT(pte & pte_present_mask);
+		GUEST_ASSERT(!(pte & pte_huge_mask));
+		pgtable_pa = PTE_GET_PA(pte);
+	}
+
+	pgtable = (u64 *)(pgtable_pa + TEST_PGTABLE_GVA_OFFSET);
+	return &pgtable[PXD_INDEX(vaddr, PG_LEVEL_4K)];
+}
+
+static void guest_pf_handler(struct ex_regs *regs)
+{
+	u64 fault_addr;
+	u64 *ptep;
+
+	fault_addr = get_cr2();
+	GUEST_ASSERT_EQ(fault_addr, READ_ONCE(expected_vaddr));
+
+	ptep = guest_get_pte(fault_addr);
+	GUEST_ASSERT(ptep);
+	GUEST_ASSERT(!(*ptep & pte_present_mask));
+
+	*ptep |= pte_present_mask;
+	invlpg(fault_addr);
+
+	guest_faults++;
+}
+
+static void guest_access_memory(void *arg)
+{
+	u64 vaddr, val;
+	int i = 0;
+
+	for (;; i++) {
+		vaddr = TEST_MEM_BASE_GVA + (i % NR_TEST_ADDRS) * PAGE_SIZE;
+		WRITE_ONCE(expected_vaddr, vaddr);
+
+		/* Read to trigger #PF */
+		val = READ_ONCE(*(u64 *)vaddr);
+		GUEST_ASSERT_EQ(val, PATTERN);
+
+		/* Clear the present bit again so it faults next time */
+		*guest_get_pte(vaddr) &= ~pte_present_mask;
+		invlpg(vaddr);
+
+		GUEST_SYNC(guest_faults);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_x86_state *state;
+	int r, i, level, count = 0;
+	gpa_t gpa, pgtable_gpa;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct ucall uc;
+	u64 *pgtable;
+	gva_t gva;
+	u64 pte;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_access_memory);
+	vm_install_exception_handler(vm, PF_VECTOR, guest_pf_handler);
+
+	pte_present_mask = PTE_PRESENT_MASK(&vm->mmu);
+	pte_huge_mask = PTE_HUGE_MASK(&vm->mmu);
+	sync_global_to_guest(vm, pte_present_mask);
+	sync_global_to_guest(vm, pte_huge_mask);
+
+	/* Allocate a page and write the pattern to it */
+	gva = vm_alloc_page(vm);
+	*(u64 *)addr_gva2hva(vm, gva) = PATTERN;
+	gpa = addr_gva2gpa(vm, gva);
+
+	/*
+	 * Map all virtual addresses to the pattern page and clear the present
+	 * bit such that guest accesses will cause a #PF.
+	 */
+	for (i = 0; i < NR_TEST_ADDRS; i++) {
+		gva = TEST_MEM_BASE_GVA + i * getpagesize();
+		virt_pg_map(vm, gva, gpa);
+		*vm_get_pte(vm, gva) &= ~pte_present_mask;
+	}
+
+	/*
+	 * Now create mappings for the page tables created above so that the
+	 * guest #PF handler can walk them. All PTEs for test virtual addresses
+	 * should lie on the same PTE page, so one page is mapped for each page
+	 * table level.
+	 *
+	 * Use an offset for the GVA instead of creating identity mappings to
+	 * avoid collision with existing mappings at low GVAs (e.g. ELF).
+	 */
+	pgtable_gpa = vm->mmu.pgd;
+	for (level = vm->mmu.pgtable_levels; level >= PG_LEVEL_4K; level--) {
+		virt_map(vm, pgtable_gpa + TEST_PGTABLE_GVA_OFFSET, pgtable_gpa, 1);
+		pgtable = addr_gpa2hva(vm, pgtable_gpa);
+		pte = pgtable[PXD_INDEX(TEST_MEM_BASE_GVA, level)];
+		pgtable_gpa = PTE_GET_PA(pte);
+	}
+
+	while (count++ < NR_ITERATIONS) {
+		r = __vcpu_run(vcpu);
+		TEST_ASSERT(!r, "vcpu_run failed");
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		get_ucall(vcpu, &uc);
+		if (uc.cmd == UCALL_ABORT) {
+			REPORT_GUEST_ASSERT(uc);
+			break;
+		}
+		TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);
+		TEST_ASSERT_EQ(uc.args[1], count - 1);
+
+		state = vcpu_save_state(vcpu);
+
+		kvm_vm_release(vm);
+		vcpu = vm_recreate_with_one_vcpu(vm);
+		vcpu_load_state(vcpu, state);
+		kvm_x86_state_cleanup(state);
+
+		PRINT_ITER("Save+restore iterations: ", count);
+	}
+
+	sync_global_from_guest(vm, guest_faults);
+	pr_info("Guest page faults: %lu\n", guest_faults);
+
+	kvm_vm_free(vm);
+	return 0;
+}
-- 
2.54.0.1032.g2f8565e1d1-goog


  parent reply	other threads:[~2026-06-04 20:36 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-04 20:35 [PATCH v2 00/10] KVM: selftests: Stress save+restore and #PF (ft. nested) Yosry Ahmed
2026-06-04 20:35 ` [PATCH v2 01/10] KVM: selftests: Move STR() and XSTR() definitions to test_util.h Yosry Ahmed
2026-06-04 20:35 ` [PATCH v2 02/10] KVM: selftests: Fix RAX and RFLAGS VMCB offsets when running L2 Yosry Ahmed
2026-06-04 20:35 ` [PATCH v2 03/10] KVM: selftests: Use an array for guest_regs (and fix offsets) Yosry Ahmed
2026-06-04 20:44   ` sashiko-bot
2026-06-04 20:49     ` Yosry Ahmed
2026-06-04 21:37       ` Yosry Ahmed
2026-06-04 20:35 ` [PATCH v2 04/10] KVM: selftests: Move GPR load/save definitions outside of nSVM code Yosry Ahmed
2026-06-04 20:47   ` sashiko-bot
2026-06-04 20:35 ` [PATCH v2 05/10] KVM: selftests: Reuse GPR switching logic for nVMX Yosry Ahmed
2026-06-04 20:52   ` sashiko-bot
2026-06-04 20:35 ` [PATCH v2 06/10] KVM: selftests: Drop HORRIFIC_L2_UCALL_CLOBBER_HACK Yosry Ahmed
2026-06-04 20:50   ` sashiko-bot
2026-06-04 21:11     ` Yosry Ahmed
2026-06-04 20:35 ` Yosry Ahmed [this message]
2026-06-05 16:31   ` [PATCH v2 07/10] KVM: selftests: Add basic stress test for save+restore and #PF handling Yosry Ahmed
2026-06-04 20:35 ` [PATCH v2 08/10] KVM: selftests: Trigger save+restore randomly in the #PF stress test Yosry Ahmed
2026-06-04 20:49   ` sashiko-bot
2026-06-04 20:55     ` Yosry Ahmed
2026-06-04 20:35 ` [PATCH v2 09/10] KVM: selftests: Support running stress save+restore and #PF test in L2 Yosry Ahmed
2026-06-04 20:35 ` [PATCH v2 10/10] KVM: selftests: Trigger L2->L1 exits stress save+restore and #PF test Yosry Ahmed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260604203546.365658-8-yosry@kernel.org \
    --to=yosry@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox