Kernel KVM virtualization development
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: jon@nutanix.com, mtosatti@redhat.com
Subject: [PATCH 22/22] KVM: x86/mmu: use kvm_page_format to test SPTEs
Date: Mon, 11 May 2026 11:06:48 -0400	[thread overview]
Message-ID: <20260511150648.685374-23-pbonzini@redhat.com> (raw)
In-Reply-To: <20260511150648.685374-1-pbonzini@redhat.com>

is_access_allowed(), and is_executable_pte() within it, are effectively
a special version of permission_fault() that only supports a subset
of roles.  In particular it does not allow SMEP, SMAP and PKE.

Replace its implementation with a modified version of permission_fault();
the new version will support SMEP (and hence AMD GMET) for free as soon
as update_spte_permission_bitmask() stops hardcoding cr4_smep == false.

This prepares for a possible future where TDP entries could have XS!=XU,
for example as part of implementing Hyper-V VSM natively inside KVM.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 18 ++++++++++++---
 arch/x86/kvm/mmu/spte.h    | 46 +++++++++++++++++++++-----------------
 arch/x86/kvm/mmu/tdp_mmu.c |  3 ++-
 3 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index ddda1f1be686..0ec8c9dc2c33 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3670,6 +3670,7 @@ static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
  */
 static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 {
+	struct kvm_mmu *mmu;
 	struct kvm_mmu_page *sp;
 	int ret = RET_PF_INVALID;
 	u64 spte;
@@ -3679,6 +3680,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 	if (!page_fault_can_be_fast(vcpu->kvm, fault))
 		return ret;
 
+	mmu = vcpu->arch.mmu;
 	walk_shadow_page_lockless_begin(vcpu);
 
 	do {
@@ -3714,7 +3716,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		 * Need not check the access of upper level table entries since
 		 * they are always ACC_ALL.
 		 */
-		if (is_access_allowed(fault, spte)) {
+		if (!spte_permission_fault(mmu, spte, fault)) {
 			ret = RET_PF_SPURIOUS;
 			break;
 		}
@@ -3737,7 +3739,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		 * that were write-protected for dirty-logging or access
 		 * tracking are handled here.  Don't bother checking if the
 		 * SPTE is writable to prioritize running with A/D bits enabled.
-		 * The is_access_allowed() check above handles the common case
+		 * The spte_permission_fault() check above handles the common case
 		 * of the fault being spurious, and the SPTE is known to be
 		 * shadow-present, i.e. except for access tracking restoration
 		 * making the new SPTE writable, the check is wasteful.
@@ -3762,7 +3764,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
 		/* Verify that the fault can be handled in the fast path */
 		if (new_spte == spte ||
-		    !is_access_allowed(fault, new_spte))
+		    spte_permission_fault(mmu, new_spte, fault))
 			break;
 
 		/*
@@ -5675,6 +5677,12 @@ static void update_permission_bitmask(struct kvm_pagewalk *w, bool tdp, bool ept
 				    is_cr0_wp(w), is_efer_nx(w));
 }
 
+static void update_spte_permission_bitmask(struct kvm_mmu *mmu, bool tdp, bool ept)
+{
+	__update_permission_bitmask(&mmu->fmt, tdp, ept,
+				    mmu->root_role.cr4_smep, false, true, true);
+}
+
 /*
 * PKU is an additional mechanism by which the paging controls access to
 * user-mode addresses based on the value in the PKRU register.  Protection
@@ -5884,6 +5892,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu,
 	context->page_fault = kvm_tdp_page_fault;
 	context->sync_spte = NULL;
 
+	update_spte_permission_bitmask(context, true, shadow_xs_mask);
 	reset_tdp_shadow_zero_bits_mask(context);
 }
 
@@ -5902,6 +5911,7 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 	else
 		paging32_init_context(context);
 
+	update_spte_permission_bitmask(context, context == &vcpu->arch.guest_mmu, false);
 	reset_shadow_zero_bits_mask(vcpu, context);
 }
 
@@ -6030,6 +6040,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 		update_permission_bitmask(tdp_walk, true, true);
 		tdp_walk->fmt.pkru_mask = 0;
 		reset_rsvds_bits_mask_ept(vcpu, execonly, huge_page_level);
+
+		update_spte_permission_bitmask(context, true, true);
 		reset_ept_shadow_zero_bits_mask(context, execonly);
 	}
 
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 918533e61b98..9bddfa0e02b9 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -357,17 +357,6 @@ static inline bool is_last_spte(u64 pte, int level)
 	return (level == PG_LEVEL_4K) || is_large_pte(pte);
 }
 
-static inline bool is_executable_pte(u64 spte)
-{
-	/*
-	 * For now, return true if either the XS or XU bit is set
-	 * This function is only used for fast_page_fault,
-	 * which never processes shadow EPT, and regular page
-	 * tables always have XS==XU.
-	 */
-	return (spte & (shadow_xs_mask | shadow_xu_mask | shadow_nx_mask)) != shadow_nx_mask;
-}
-
 static inline kvm_pfn_t spte_to_pfn(u64 pte)
 {
 	return (pte & SPTE_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -496,20 +485,35 @@ static inline bool is_mmu_writable_spte(u64 spte)
 }
 
 /*
- * Returns true if the access indicated by @fault is allowed by the existing
- * SPTE protections.  Note, the caller is responsible for checking that the
- * SPTE is a shadow-present, leaf SPTE (either before or after).
+ * Returns true if the access indicated by @fault is forbidden by the existing
+ * SPTE protections.
  */
-static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
+static inline bool spte_permission_fault(struct kvm_mmu *mmu, u64 spte,
+					 struct kvm_page_fault *fault)
 {
-	if (fault->exec)
-		return is_executable_pte(spte);
+	unsigned int pfec = fault->error_code;
+	int index = pfec >> 1;
+	int pte_access;
 
-	if (fault->write)
-		return is_writable_pte(spte);
+	if (!is_shadow_present_pte(spte))
+		return true;
 
-	/* Fault was on Read access */
-	return spte & PT_PRESENT_MASK;
+	BUILD_BUG_ON(PT_PRESENT_MASK != ACC_READ_MASK);
+	BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
+	BUILD_BUG_ON(VMX_EPT_READABLE_MASK != ACC_READ_MASK);
+	BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != ACC_WRITE_MASK);
+
+	/* strip nested paging fault error codes */
+	pte_access = spte & (PT_PRESENT_MASK | PT_WRITABLE_MASK);
+	if (shadow_nx_mask) {
+		pte_access |= spte & shadow_user_mask ? ACC_USER_MASK : 0;
+		pte_access |= spte & shadow_nx_mask ? 0 : ACC_EXEC_MASK;
+	} else {
+		pte_access |= spte & shadow_xs_mask ? ACC_EXEC_MASK : 0;
+		pte_access |= spte & shadow_xu_mask ? ACC_USER_EXEC_MASK : 0;
+	}
+
+	return (mmu->fmt.permissions[index] >> pte_access) & 1;
 }
 
 /*
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 5a2f8ce9a32b..839a8e416510 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1169,6 +1169,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
 					  struct kvm_page_fault *fault,
 					  struct tdp_iter *iter)
 {
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
 	u64 new_spte;
 	int ret = RET_PF_FIXED;
@@ -1178,7 +1179,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
 		return RET_PF_RETRY;
 
 	if (is_shadow_present_pte(iter->old_spte) &&
-	    (fault->prefetch || is_access_allowed(fault, iter->old_spte)) &&
+	    (fault->prefetch || !spte_permission_fault(mmu, iter->old_spte, fault)) &&
 	    is_last_spte(iter->old_spte, iter->level)) {
 		WARN_ON_ONCE(fault->pfn != spte_to_pfn(iter->old_spte));
 		return RET_PF_SPURIOUS;
-- 
2.52.0


      parent reply	other threads:[~2026-05-11 15:07 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-11 15:06 [RFC PATCH 00/22] KVM: apply chainsaw to struct kvm_mmu Paolo Bonzini
2026-05-11 15:06 ` [PATCH 01/22] KVM: x86: remove nested_mmu from mmu_is_nested() Paolo Bonzini
2026-05-11 15:06 ` [PATCH 02/22] KVM: x86: move pdptrs out of the MMU Paolo Bonzini
2026-05-11 15:06 ` [PATCH 03/22] KVM: x86: check that kvm_handle_invpcid is only invoked with shadow paging Paolo Bonzini
2026-05-11 15:06 ` [PATCH 04/22] KVM: x86/hyperv: remove unnecessary mmu_is_nested() check Paolo Bonzini
2026-05-11 15:06 ` [PATCH 05/22] KVM: x86/mmu: introduce struct kvm_pagewalk Paolo Bonzini
2026-05-11 15:06 ` [PATCH 06/22] KVM: x86/mmu: move get_guest_pgd to " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 07/22] KVM: x86/mmu: move gva_to_gpa " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 08/22] KVM: x86/mmu: move get_pdptr " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 09/22] KVM: x86/mmu: move inject_page_fault " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 10/22] KVM: x86/mmu: move CPU-related fields " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 11/22] KVM: x86/mmu: change CPU-role accessor fields to take " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 12/22] KVM: x86/mmu: move remaining permission fields to " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 13/22] KVM: x86/mmu: pass struct kvm_pagewalk to kvm_mmu_invalidate_addr Paolo Bonzini
2026-05-11 15:06 ` [PATCH 14/22] KVM: x86/mmu: change walk_mmu to struct kvm_pagewalk Paolo Bonzini
2026-05-11 15:06 ` [PATCH 15/22] KVM: x86/mmu: change nested_mmu.w to nested_cpu_walk Paolo Bonzini
2026-05-11 15:06 ` [PATCH 16/22] KVM: x86/mmu: make cpu_walk a value Paolo Bonzini
2026-05-11 15:06 ` [PATCH 17/22] KVM: x86/mmu: pull struct kvm_pagewalk out of struct kvm_mmu Paolo Bonzini
2026-05-11 15:06 ` [PATCH 18/22] KVM: x86/mmu: cleanup functions that initialize shadow MMU Paolo Bonzini
2026-05-11 15:06 ` [PATCH 19/22] KVM: x86/mmu: pull page format to a new struct Paolo Bonzini
2026-05-11 15:06 ` [PATCH 20/22] KVM: x86/mmu: merge struct rsvd_bits_validate into struct kvm_page_format Paolo Bonzini
2026-05-11 15:06 ` [PATCH 21/22] KVM: x86/mmu: parameterize update_permission_bitmask() Paolo Bonzini
2026-05-11 15:06 ` Paolo Bonzini [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260511150648.685374-23-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=jon@nutanix.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox