From: Sean Christopherson <seanjc@google.com>
To: Paolo Bonzini <pbonzini@redhat.com>,
Marc Zyngier <maz@kernel.org>, Oliver Upton <oupton@kernel.org>,
Tianrui Zhao <zhaotianrui@loongson.cn>,
Bibo Mao <maobibo@loongson.cn>,
Huacai Chen <chenhuacai@kernel.org>,
Anup Patel <anup@brainfault.org>, Paul Walmsley <pjw@kernel.org>,
Palmer Dabbelt <palmer@dabbelt.com>,
Albert Ou <aou@eecs.berkeley.edu>,
Christian Borntraeger <borntraeger@linux.ibm.com>,
Janosch Frank <frankja@linux.ibm.com>,
Claudio Imbrenda <imbrenda@linux.ibm.com>,
Sean Christopherson <seanjc@google.com>
Cc: kvm@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
kvmarm@lists.linux.dev, loongarch@lists.linux.dev,
kvm-riscv@lists.infradead.org, linux-riscv@lists.infradead.org,
linux-kernel@vger.kernel.org,
Yosry Ahmed <yosry.ahmed@linux.dev>
Subject: [PATCH v4 13/21] KVM: selftests: Reuse virt mapping functions for nested EPTs
Date: Tue, 30 Dec 2025 15:01:42 -0800 [thread overview]
Message-ID: <20251230230150.4150236-14-seanjc@google.com> (raw)
In-Reply-To: <20251230230150.4150236-1-seanjc@google.com>
From: Yosry Ahmed <yosry.ahmed@linux.dev>
Rework tdp_map() and friends to use __virt_pg_map() and drop the custom
EPT code in __tdp_pg_map() and tdp_create_pte(). The EPT code and
__virt_pg_map() are practically identical, the main differences are:
- EPT uses the EPT struct overlay instead of the PTE masks.
- EPT always assumes 4-level EPTs.
To reuse __virt_pg_map(), extend the PTE masks to work with EPT's RWX and
X-only capabilities, and provide a tdp_mmu_init() API so that EPT can pass
in the EPT PTE masks along with the root page level (which is currently
hardcoded to '4').
Don't reuse KVM's insane overloading of the USER bit for EPT_R as there's
no reason to multiplex bits in the selftests, e.g. selftests aren't trying
to shadow guest PTEs and thus don't care about funnelling protections into
a common permissions check.
Another benefit of reusing the code is having separate handling for
upper-level PTEs vs 4K PTEs, which avoids some quirks like setting the
large bit on a 4K PTE in the EPTs.
For all intents and purposes, no functional change intended.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
.../selftests/kvm/include/x86/kvm_util_arch.h | 4 +-
.../selftests/kvm/include/x86/processor.h | 16 ++-
.../testing/selftests/kvm/lib/x86/processor.c | 21 +++-
tools/testing/selftests/kvm/lib/x86/vmx.c | 119 +++---------------
4 files changed, 52 insertions(+), 108 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
index 05a1fc1780f2..1cf84b8212c6 100644
--- a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
@@ -14,6 +14,8 @@ struct pte_masks {
uint64_t present;
uint64_t writable;
uint64_t user;
+ uint64_t readable;
+ uint64_t executable;
uint64_t accessed;
uint64_t dirty;
uint64_t huge;
@@ -37,8 +39,6 @@ struct kvm_vm_arch {
uint64_t s_bit;
int sev_fd;
bool is_pt_protected;
-
- struct kvm_mmu *tdp_mmu;
};
static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 973f2069cd3b..4c0d2fc83c1c 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -1442,6 +1442,8 @@ enum pg_level {
#define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present)
#define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable)
#define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user)
+#define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable)
+#define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable)
#define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed)
#define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty)
#define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge)
@@ -1449,13 +1451,23 @@ enum pg_level {
#define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c)
#define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s)
-#define is_present_pte(mmu, pte) (!!(*(pte) & PTE_PRESENT_MASK(mmu)))
+/*
+ * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present
+ * if it's executable or readable, as EPT supports execute-only PTEs, but not
+ * write-only PTEs.
+ */
+#define is_present_pte(mmu, pte) \
+ (PTE_PRESENT_MASK(mmu) ? \
+ !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \
+ !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))
+#define is_executable_pte(mmu, pte) \
+ ((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu))
#define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu)))
#define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu)))
#define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu)))
#define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
#define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
-#define is_nx_pte(mmu, pte) (!!(*(pte) & PTE_NX_MASK(mmu)))
+#define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte))
void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
struct pte_masks *pte_masks);
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index 8a9298a72897..41316cac94e0 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -165,6 +165,10 @@ static void virt_mmu_init(struct kvm_vm *vm, struct kvm_mmu *mmu,
mmu->pgd_created = true;
mmu->arch.pte_masks = *pte_masks;
}
+
+ TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5,
+ "Selftests MMU only supports 4-level and 5-level paging, not %u-level paging",
+ mmu->pgtable_levels);
}
void virt_arch_pgd_alloc(struct kvm_vm *vm)
@@ -180,6 +184,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
.dirty = BIT_ULL(6),
.huge = BIT_ULL(7),
.nx = BIT_ULL(63),
+ .executable = 0,
.c = vm->arch.c_bit,
.s = vm->arch.s_bit,
};
@@ -190,10 +195,10 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
struct pte_masks *pte_masks)
{
- TEST_ASSERT(!vm->arch.tdp_mmu, "TDP MMU already initialized");
+ TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized");
- vm->arch.tdp_mmu = calloc(1, sizeof(*vm->arch.tdp_mmu));
- virt_mmu_init(vm, vm->arch.tdp_mmu, pte_masks);
+ vm->stage2_mmu.pgtable_levels = pgtable_levels;
+ virt_mmu_init(vm, &vm->stage2_mmu, pte_masks);
}
static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu,
@@ -223,7 +228,8 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
paddr = vm_untag_gpa(vm, paddr);
if (!is_present_pte(mmu, pte)) {
- *pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu);
+ *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
+ PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu);
if (current_level == target_level)
*pte |= PTE_HUGE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
else
@@ -269,6 +275,9 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
"Unexpected bits in paddr: %lx", paddr);
+ TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu),
+ "X and NX bit masks cannot be used simultaneously");
+
/*
* Allocate upper level page tables, if not already present. Return
* early if a hugepage was created.
@@ -286,7 +295,9 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
pte = virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K);
TEST_ASSERT(!is_present_pte(mmu, pte),
"PTE already present for 4k page at vaddr: 0x%lx", vaddr);
- *pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
+ *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
+ PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) |
+ (paddr & PHYSICAL_PAGE_MASK);
/*
* Neither SEV nor TDX supports shared page tables, so only the final
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index ea1c09f9e8ab..e3737b3d9120 100644
--- a/tools/testing/selftests/kvm/lib/x86/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -25,21 +25,6 @@ bool enable_evmcs;
struct hv_enlightened_vmcs *current_evmcs;
struct hv_vp_assist_page *current_vp_assist;
-struct eptPageTableEntry {
- uint64_t readable:1;
- uint64_t writable:1;
- uint64_t executable:1;
- uint64_t memory_type:3;
- uint64_t ignore_pat:1;
- uint64_t page_size:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t ignored_11_10:2;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t suppress_ve:1;
-};
-
int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
{
uint16_t evmcs_ver;
@@ -58,13 +43,24 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
void vm_enable_ept(struct kvm_vm *vm)
{
+ struct pte_masks pte_masks;
+
TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
- if (vm->arch.tdp_mmu)
- return;
-
- /* TODO: Drop eptPageTableEntry in favor of PTE masks. */
- struct pte_masks pte_masks = (struct pte_masks) {
+ /*
+ * EPTs do not have 'present' or 'user' bits, instead bit 0 is the
+ * 'readable' bit.
+ */
+ pte_masks = (struct pte_masks) {
+ .present = 0,
+ .user = 0,
+ .readable = BIT_ULL(0),
+ .writable = BIT_ULL(1),
+ .executable = BIT_ULL(2),
+ .huge = BIT_ULL(7),
+ .accessed = BIT_ULL(8),
+ .dirty = BIT_ULL(9),
+ .nx = 0,
};
/* TODO: Add support for 5-level EPT. */
@@ -120,8 +116,8 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
- if (vm->arch.tdp_mmu)
- vmx->eptp_gpa = vm->arch.tdp_mmu->pgd;
+ if (vm->stage2_mmu.pgd_created)
+ vmx->eptp_gpa = vm->stage2_mmu.pgd;
*p_vmx_gva = vmx_gva;
return vmx;
@@ -377,82 +373,6 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_guest_state(guest_rip, guest_rsp);
}
-static void tdp_create_pte(struct kvm_vm *vm,
- struct eptPageTableEntry *pte,
- uint64_t nested_paddr,
- uint64_t paddr,
- int current_level,
- int target_level)
-{
- if (!pte->readable) {
- pte->writable = true;
- pte->readable = true;
- pte->executable = true;
- pte->page_size = (current_level == target_level);
- if (pte->page_size)
- pte->address = paddr >> vm->page_shift;
- else
- pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
- } else {
- /*
- * Entry already present. Assert that the caller doesn't want
- * a hugepage at this level, and that there isn't a hugepage at
- * this level.
- */
- TEST_ASSERT(current_level != target_level,
- "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- TEST_ASSERT(!pte->page_size,
- "Cannot create page table at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- }
-}
-
-
-void __tdp_pg_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
- int target_level)
-{
- const uint64_t page_size = PG_LEVEL_SIZE(target_level);
- void *eptp_hva = addr_gpa2hva(vm, vm->arch.tdp_mmu->pgd);
- struct eptPageTableEntry *pt = eptp_hva, *pte;
- uint16_t index;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
- "Unknown or unsupported guest mode: 0x%x", vm->mode);
-
- TEST_ASSERT((nested_paddr >> 48) == 0,
- "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT",
- nested_paddr);
- TEST_ASSERT((nested_paddr % page_size) == 0,
- "Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx page_size: 0x%lx",
- nested_paddr, page_size);
- TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx page_size: 0x%lx",
- paddr, page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
- index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
- pte = &pt[index];
-
- tdp_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
-
- if (pte->page_size)
- break;
-
- pt = addr_gpa2hva(vm, pte->address * vm->page_size);
- }
-}
-
/*
* Map a range of EPT guest physical addresses to the VM's physical address
*
@@ -473,6 +393,7 @@ void __tdp_pg_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
uint64_t size, int level)
{
+ struct kvm_mmu *mmu = &vm->stage2_mmu;
size_t page_size = PG_LEVEL_SIZE(level);
size_t npages = size / page_size;
@@ -480,7 +401,7 @@ void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- __tdp_pg_map(vm, nested_paddr, paddr, level);
+ __virt_pg_map(vm, mmu, nested_paddr, paddr, level);
nested_paddr += page_size;
paddr += page_size;
}
--
2.52.0.351.gbe84eed79e-goog
next prev parent reply other threads:[~2025-12-30 23:03 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-30 23:01 [PATCH v4 00/21] KVM: selftests: Add Nested NPT support Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 01/21] KVM: selftests: Make __vm_get_page_table_entry() static Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 02/21] KVM: selftests: Stop passing a memslot to nested_map_memslot() Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 03/21] KVM: selftests: Rename nested TDP mapping functions Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 04/21] KVM: selftests: Kill eptPageTablePointer Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 05/21] KVM: selftests: Stop setting A/D bits when creating EPT PTEs Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 06/21] KVM: selftests: Add "struct kvm_mmu" to track a given MMU instance Sean Christopherson
2026-01-02 16:50 ` Yosry Ahmed
2025-12-30 23:01 ` [PATCH v4 07/21] KVM: selftests: Plumb "struct kvm_mmu" into x86's MMU APIs Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 08/21] KVM: selftests: Add a "struct kvm_mmu_arch arch" member to kvm_mmu Sean Christopherson
2026-01-02 16:53 ` Yosry Ahmed
2026-01-02 17:02 ` Yosry Ahmed
2025-12-30 23:01 ` [PATCH v4 09/21] KVM: selftests: Move PTE bitmasks " Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 10/21] KVM: selftests: Use a TDP MMU to share EPT page tables between vCPUs Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 11/21] KVM: selftests: Stop passing VMX metadata to TDP mapping functions Sean Christopherson
2026-01-02 16:58 ` Yosry Ahmed
2026-01-02 17:12 ` Yosry Ahmed
2025-12-30 23:01 ` [PATCH v4 12/21] KVM: selftests: Add a stage-2 MMU instance to kvm_vm Sean Christopherson
2026-01-02 17:03 ` Yosry Ahmed
2025-12-30 23:01 ` Sean Christopherson [this message]
2025-12-30 23:01 ` [PATCH v4 14/21] KVM: selftests: Move TDP mapping functions outside of vmx.c Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 15/21] KVM: selftests: Allow kvm_cpu_has_ept() to be called on AMD CPUs Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 16/21] KVM: selftests: Add support for nested NPTs Sean Christopherson
2026-01-07 23:12 ` Yosry Ahmed
2025-12-30 23:01 ` [PATCH v4 17/21] KVM: selftests: Set the user bit on nested NPT PTEs Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 18/21] KVM: selftests: Extend vmx_dirty_log_test to cover SVM Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 19/21] KVM: selftests: Extend memstress to run on nested SVM Sean Christopherson
2025-12-30 23:01 ` [PATCH v4 20/21] KVM: selftests: Rename vm_get_page_table_entry() to vm_get_pte() Sean Christopherson
2026-01-02 17:10 ` Yosry Ahmed
2025-12-30 23:01 ` [PATCH v4 21/21] KVM: selftests: Test READ=>WRITE dirty logging behavior for shadow MMU Sean Christopherson
2026-01-02 17:36 ` Yosry Ahmed
2026-01-08 16:32 ` Sean Christopherson
2026-01-08 18:01 ` Yosry Ahmed
2026-01-08 18:31 ` Sean Christopherson
2026-01-08 20:24 ` Yosry Ahmed
2026-01-08 20:29 ` Sean Christopherson
2026-01-08 20:33 ` Yosry Ahmed
2026-01-08 20:26 ` Yosry Ahmed
2026-01-12 17:38 ` [PATCH v4 00/21] KVM: selftests: Add Nested NPT support Sean Christopherson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251230230150.4150236-14-seanjc@google.com \
--to=seanjc@google.com \
--cc=anup@brainfault.org \
--cc=aou@eecs.berkeley.edu \
--cc=borntraeger@linux.ibm.com \
--cc=chenhuacai@kernel.org \
--cc=frankja@linux.ibm.com \
--cc=imbrenda@linux.ibm.com \
--cc=kvm-riscv@lists.infradead.org \
--cc=kvm@vger.kernel.org \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=loongarch@lists.linux.dev \
--cc=maobibo@loongson.cn \
--cc=maz@kernel.org \
--cc=oupton@kernel.org \
--cc=palmer@dabbelt.com \
--cc=pbonzini@redhat.com \
--cc=pjw@kernel.org \
--cc=yosry.ahmed@linux.dev \
--cc=zhaotianrui@loongson.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox