From: Paolo Bonzini <pbonzini@redhat.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: jon@nutanix.com, mtosatti@redhat.com
Subject: [PATCH 22/22] KVM: x86/mmu: use kvm_page_format to test SPTEs
Date: Mon, 11 May 2026 11:06:48 -0400 [thread overview]
Message-ID: <20260511150648.685374-23-pbonzini@redhat.com> (raw)
In-Reply-To: <20260511150648.685374-1-pbonzini@redhat.com>
is_access_allowed(), and is_executable_pte() within it, are effectively
a special version of permission_fault() that only supports a subset
of roles. In particular it does not allow SMEP, SMAP and PKE.
Replace its implementation with a modified version of permission_fault();
the new version will support SMEP (and hence AMD GMET) for free as soon
as update_spte_permission_bitmask() stops hardcoding cr4_smep == false.
This prepares for a possible future where TDP entries could have XS!=XU,
for example as part of implementing Hyper-V VSM natively inside KVM.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/mmu/mmu.c | 18 ++++++++++++---
arch/x86/kvm/mmu/spte.h | 46 +++++++++++++++++++++-----------------
arch/x86/kvm/mmu/tdp_mmu.c | 3 ++-
3 files changed, 42 insertions(+), 25 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index ddda1f1be686..0ec8c9dc2c33 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3670,6 +3670,7 @@ static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
*/
static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
+ struct kvm_mmu *mmu;
struct kvm_mmu_page *sp;
int ret = RET_PF_INVALID;
u64 spte;
@@ -3679,6 +3680,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
if (!page_fault_can_be_fast(vcpu->kvm, fault))
return ret;
+ mmu = vcpu->arch.mmu;
walk_shadow_page_lockless_begin(vcpu);
do {
@@ -3714,7 +3716,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
* Need not check the access of upper level table entries since
* they are always ACC_ALL.
*/
- if (is_access_allowed(fault, spte)) {
+ if (!spte_permission_fault(mmu, spte, fault)) {
ret = RET_PF_SPURIOUS;
break;
}
@@ -3737,7 +3739,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
* that were write-protected for dirty-logging or access
* tracking are handled here. Don't bother checking if the
* SPTE is writable to prioritize running with A/D bits enabled.
- * The is_access_allowed() check above handles the common case
+ * The spte_permission_fault() check above handles the common case
* of the fault being spurious, and the SPTE is known to be
* shadow-present, i.e. except for access tracking restoration
* making the new SPTE writable, the check is wasteful.
@@ -3762,7 +3764,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
/* Verify that the fault can be handled in the fast path */
if (new_spte == spte ||
- !is_access_allowed(fault, new_spte))
+ spte_permission_fault(mmu, new_spte, fault))
break;
/*
@@ -5675,6 +5677,12 @@ static void update_permission_bitmask(struct kvm_pagewalk *w, bool tdp, bool ept
is_cr0_wp(w), is_efer_nx(w));
}
+static void update_spte_permission_bitmask(struct kvm_mmu *mmu, bool tdp, bool ept)
+{
+ __update_permission_bitmask(&mmu->fmt, tdp, ept,
+ mmu->root_role.cr4_smep, false, true, true);
+}
+
/*
* PKU is an additional mechanism by which the paging controls access to
* user-mode addresses based on the value in the PKRU register. Protection
@@ -5884,6 +5892,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu,
context->page_fault = kvm_tdp_page_fault;
context->sync_spte = NULL;
+ update_spte_permission_bitmask(context, true, shadow_xs_mask);
reset_tdp_shadow_zero_bits_mask(context);
}
@@ -5902,6 +5911,7 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
else
paging32_init_context(context);
+ update_spte_permission_bitmask(context, context == &vcpu->arch.guest_mmu, false);
reset_shadow_zero_bits_mask(vcpu, context);
}
@@ -6030,6 +6040,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
update_permission_bitmask(tdp_walk, true, true);
tdp_walk->fmt.pkru_mask = 0;
reset_rsvds_bits_mask_ept(vcpu, execonly, huge_page_level);
+
+ update_spte_permission_bitmask(context, true, true);
reset_ept_shadow_zero_bits_mask(context, execonly);
}
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 918533e61b98..9bddfa0e02b9 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -357,17 +357,6 @@ static inline bool is_last_spte(u64 pte, int level)
return (level == PG_LEVEL_4K) || is_large_pte(pte);
}
-static inline bool is_executable_pte(u64 spte)
-{
- /*
- * For now, return true if either the XS or XU bit is set
- * This function is only used for fast_page_fault,
- * which never processes shadow EPT, and regular page
- * tables always have XS==XU.
- */
- return (spte & (shadow_xs_mask | shadow_xu_mask | shadow_nx_mask)) != shadow_nx_mask;
-}
-
static inline kvm_pfn_t spte_to_pfn(u64 pte)
{
return (pte & SPTE_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -496,20 +485,35 @@ static inline bool is_mmu_writable_spte(u64 spte)
}
/*
- * Returns true if the access indicated by @fault is allowed by the existing
- * SPTE protections. Note, the caller is responsible for checking that the
- * SPTE is a shadow-present, leaf SPTE (either before or after).
+ * Returns true if the access indicated by @fault is forbidden by the existing
+ * SPTE protections.
*/
-static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
+static inline bool spte_permission_fault(struct kvm_mmu *mmu, u64 spte,
+ struct kvm_page_fault *fault)
{
- if (fault->exec)
- return is_executable_pte(spte);
+ unsigned int pfec = fault->error_code;
+ int index = pfec >> 1;
+ int pte_access;
- if (fault->write)
- return is_writable_pte(spte);
+ if (!is_shadow_present_pte(spte))
+ return true;
- /* Fault was on Read access */
- return spte & PT_PRESENT_MASK;
+ BUILD_BUG_ON(PT_PRESENT_MASK != ACC_READ_MASK);
+ BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
+ BUILD_BUG_ON(VMX_EPT_READABLE_MASK != ACC_READ_MASK);
+ BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != ACC_WRITE_MASK);
+
+ /* strip nested paging fault error codes */
+ pte_access = spte & (PT_PRESENT_MASK | PT_WRITABLE_MASK);
+ if (shadow_nx_mask) {
+ pte_access |= spte & shadow_user_mask ? ACC_USER_MASK : 0;
+ pte_access |= spte & shadow_nx_mask ? 0 : ACC_EXEC_MASK;
+ } else {
+ pte_access |= spte & shadow_xs_mask ? ACC_EXEC_MASK : 0;
+ pte_access |= spte & shadow_xu_mask ? ACC_USER_EXEC_MASK : 0;
+ }
+
+ return (mmu->fmt.permissions[index] >> pte_access) & 1;
}
/*
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 5a2f8ce9a32b..839a8e416510 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1169,6 +1169,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault,
struct tdp_iter *iter)
{
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
u64 new_spte;
int ret = RET_PF_FIXED;
@@ -1178,7 +1179,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
return RET_PF_RETRY;
if (is_shadow_present_pte(iter->old_spte) &&
- (fault->prefetch || is_access_allowed(fault, iter->old_spte)) &&
+ (fault->prefetch || !spte_permission_fault(mmu, iter->old_spte, fault)) &&
is_last_spte(iter->old_spte, iter->level)) {
WARN_ON_ONCE(fault->pfn != spte_to_pfn(iter->old_spte));
return RET_PF_SPURIOUS;
--
2.52.0
prev parent reply other threads:[~2026-05-11 15:07 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-11 15:06 [RFC PATCH 00/22] KVM: apply chainsaw to struct kvm_mmu Paolo Bonzini
2026-05-11 15:06 ` [PATCH 01/22] KVM: x86: remove nested_mmu from mmu_is_nested() Paolo Bonzini
2026-05-11 15:06 ` [PATCH 02/22] KVM: x86: move pdptrs out of the MMU Paolo Bonzini
2026-05-11 15:06 ` [PATCH 03/22] KVM: x86: check that kvm_handle_invpcid is only invoked with shadow paging Paolo Bonzini
2026-05-11 15:06 ` [PATCH 04/22] KVM: x86/hyperv: remove unnecessary mmu_is_nested() check Paolo Bonzini
2026-05-11 15:06 ` [PATCH 05/22] KVM: x86/mmu: introduce struct kvm_pagewalk Paolo Bonzini
2026-05-11 15:06 ` [PATCH 06/22] KVM: x86/mmu: move get_guest_pgd to " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 07/22] KVM: x86/mmu: move gva_to_gpa " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 08/22] KVM: x86/mmu: move get_pdptr " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 09/22] KVM: x86/mmu: move inject_page_fault " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 10/22] KVM: x86/mmu: move CPU-related fields " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 11/22] KVM: x86/mmu: change CPU-role accessor fields to take " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 12/22] KVM: x86/mmu: move remaining permission fields to " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 13/22] KVM: x86/mmu: pass struct kvm_pagewalk to kvm_mmu_invalidate_addr Paolo Bonzini
2026-05-11 15:06 ` [PATCH 14/22] KVM: x86/mmu: change walk_mmu to struct kvm_pagewalk Paolo Bonzini
2026-05-11 15:06 ` [PATCH 15/22] KVM: x86/mmu: change nested_mmu.w to nested_cpu_walk Paolo Bonzini
2026-05-11 15:06 ` [PATCH 16/22] KVM: x86/mmu: make cpu_walk a value Paolo Bonzini
2026-05-11 15:06 ` [PATCH 17/22] KVM: x86/mmu: pull struct kvm_pagewalk out of struct kvm_mmu Paolo Bonzini
2026-05-11 15:06 ` [PATCH 18/22] KVM: x86/mmu: cleanup functions that initialize shadow MMU Paolo Bonzini
2026-05-11 15:06 ` [PATCH 19/22] KVM: x86/mmu: pull page format to a new struct Paolo Bonzini
2026-05-11 15:06 ` [PATCH 20/22] KVM: x86/mmu: merge struct rsvd_bits_validate into struct kvm_page_format Paolo Bonzini
2026-05-11 15:06 ` [PATCH 21/22] KVM: x86/mmu: parameterize update_permission_bitmask() Paolo Bonzini
2026-05-11 15:06 ` Paolo Bonzini [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260511150648.685374-23-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=jon@nutanix.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox