public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: d.riley@proxmox.com, jon@nutanix.com
Subject: [PATCH 13/28] KVM: x86/mmu: split XS/XU bits for EPT
Date: Thu, 30 Apr 2026 11:07:32 -0400	[thread overview]
Message-ID: <20260430150747.76749-14-pbonzini@redhat.com> (raw)
In-Reply-To: <20260430150747.76749-1-pbonzini@redhat.com>

When EPT is in use, replace ACC_USER_MASK with ACC_USER_EXEC_MASK,
so that supervisor and user-mode execution can be controlled
independently (ACC_USER_MASK would not allow a setting similar to
XU=0 XS=1 W=1 R=1).

Replace shadow_x_mask with shadow_xs_mask/shadow_xu_mask, to allow
setting XS and XU bits separately in EPT entries.

Note that ACC_USER_EXEC_MASK is already set through ACC_ALL in
the kvm_mmu_page roles, but it does not propagate to the XU bit
because (for now) shadow_xs_mask == shadow_xu_mask.

Tested-by: David Riley <d.riley@proxmox.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.h          |  3 +-
 arch/x86/kvm/mmu/mmu.c      |  2 +-
 arch/x86/kvm/mmu/mmutrace.h |  6 ++--
 arch/x86/kvm/mmu/spte.c     | 60 ++++++++++++++++++++++++++-----------
 arch/x86/kvm/mmu/spte.h     | 11 +++++--
 5 files changed, 58 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 63be5c5efed9..d8c13e43c2d7 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -39,7 +39,8 @@ extern bool __read_mostly enable_mmio_caching;
 
 #define ACC_READ_MASK    PT_PRESENT_MASK
 #define ACC_WRITE_MASK   PT_WRITABLE_MASK
-#define ACC_USER_MASK    PT_USER_MASK
+#define ACC_USER_MASK    PT_USER_MASK   /* non EPT */
+#define ACC_USER_EXEC_MASK ACC_USER_MASK /* EPT only */
 #define ACC_EXEC_MASK    8
 #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 88d0ff95fc8c..617a3204a5e0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5491,7 +5491,7 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 static inline bool boot_cpu_is_amd(void)
 {
 	WARN_ON_ONCE(!tdp_enabled);
-	return shadow_x_mask == 0;
+	return shadow_xs_mask == 0;
 }
 
 /*
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index dcfdfedfc4e9..3429c1413f42 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -357,8 +357,8 @@ TRACE_EVENT(
 		__entry->sptep = virt_to_phys(sptep);
 		__entry->level = level;
 		__entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
-		__entry->x = is_executable_pte(__entry->spte);
-		__entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
+		__entry->x = (__entry->spte & (shadow_xs_mask | shadow_nx_mask)) == shadow_xs_mask;
+		__entry->u = !!(__entry->spte & (shadow_xu_mask | shadow_user_mask));
 	),
 
 	TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
@@ -366,7 +366,7 @@ TRACE_EVENT(
 		  __entry->r ? "r" : "-",
 		  __entry->spte & PT_WRITABLE_MASK ? "w" : "-",
 		  __entry->x ? "x" : "-",
-		  __entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
+		  __entry->u ? "u" : "-",
 		  __entry->level, __entry->sptep
 	)
 );
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 7b5f118ae211..4575dd77f854 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -29,8 +29,9 @@ bool __read_mostly kvm_ad_enabled;
 u64 __read_mostly shadow_host_writable_mask;
 u64 __read_mostly shadow_mmu_writable_mask;
 u64 __read_mostly shadow_nx_mask;
-u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
 u64 __read_mostly shadow_user_mask;
+u64 __read_mostly shadow_xs_mask; /* mutual exclusive with nx_mask and user_mask */
+u64 __read_mostly shadow_xu_mask; /* mutual exclusive with nx_mask and user_mask */
 u64 __read_mostly shadow_accessed_mask;
 u64 __read_mostly shadow_dirty_mask;
 u64 __read_mostly shadow_mmio_value;
@@ -217,21 +218,26 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	 * would tie make_spte() further to vCPU/MMU state, and add complexity
 	 * just to optimize a mode that is anything but performance critical.
 	 */
-	if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) &&
-	    is_nx_huge_page_enabled(vcpu->kvm)) {
+	if (level > PG_LEVEL_4K && is_nx_huge_page_enabled(vcpu->kvm)) {
 		pte_access &= ~ACC_EXEC_MASK;
+		if (shadow_xu_mask)
+			pte_access &= ~ACC_USER_EXEC_MASK;
 	}
 
 	if (pte_access & ACC_READ_MASK)
 		spte |= PT_PRESENT_MASK; /* or VMX_EPT_READABLE_MASK */
 
-	if (pte_access & ACC_EXEC_MASK)
-		spte |= shadow_x_mask;
-	else
-		spte |= shadow_nx_mask;
-
-	if (pte_access & ACC_USER_MASK)
-		spte |= shadow_user_mask;
+	if (shadow_nx_mask) {
+		if (!(pte_access & ACC_EXEC_MASK))
+			spte |= shadow_nx_mask;
+		if (pte_access & ACC_USER_MASK)
+			spte |= shadow_user_mask;
+	} else {
+		if (pte_access & ACC_EXEC_MASK)
+			spte |= shadow_xs_mask;
+		if (pte_access & ACC_USER_EXEC_MASK)
+			spte |= shadow_xu_mask;
+	}
 
 	if (level > PG_LEVEL_4K)
 		spte |= PT_PAGE_SIZE_MASK;
@@ -318,11 +324,13 @@ static u64 make_spte_executable(u64 spte, u8 access)
 {
 	u64 set, clear;
 
-	if (access & ACC_EXEC_MASK)
-		set = shadow_x_mask;
+	if (shadow_nx_mask)
+		set = (access & ACC_EXEC_MASK) ? 0 : shadow_nx_mask;
 	else
-		set = shadow_nx_mask;
-	clear = set ^ (shadow_nx_mask | shadow_x_mask);
+		set =
+			(access & ACC_EXEC_MASK ? shadow_xs_mask : 0) |
+			(access & ACC_USER_EXEC_MASK ? shadow_xu_mask : 0);
+	clear = set ^ (shadow_nx_mask | shadow_xs_mask | shadow_xu_mask);
 	return modify_spte_protections(spte, set, clear);
 }
 
@@ -389,7 +397,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
 
 	spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
 		PT_PRESENT_MASK /* or VMX_EPT_READABLE_MASK */ |
-		shadow_user_mask | shadow_x_mask | shadow_me_value;
+		shadow_user_mask | shadow_xs_mask | shadow_xu_mask | shadow_me_value;
 
 	if (ad_disabled)
 		spte |= SPTE_TDP_AD_DISABLED;
@@ -497,7 +505,24 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits)
 	shadow_accessed_mask	= VMX_EPT_ACCESS_BIT;
 	shadow_dirty_mask	= VMX_EPT_DIRTY_BIT;
 	shadow_nx_mask		= 0ull;
-	shadow_x_mask		= VMX_EPT_EXECUTABLE_MASK;
+	shadow_xs_mask		= VMX_EPT_EXECUTABLE_MASK;
+
+	/*
+	 * The MMU always maps ACC_EXEC_MASK and ACC_USER_EXEC_MASK to the
+	 * XS and XU bits of shadow EPT entries, regardless of whether MBEC
+	 * is available on the host or enabled by the L1 hypervisor's EPTP.
+	 *
+	 * For the non-nested case, pages are mapped with ACC_EXEC_MASK
+	 * and ACC_USER_EXEC_MASK set in tandem, so XS == XU and the
+	 * host's MBEC setting does not matter.  On hardware without MBEC
+	 * the XU bit is reserved-as-ignored, and setting it does no harm.
+	 *
+	 * For nested EPT MBEC is not supported, but bit 10 of the gPTE has
+	 * no effect because (a) is_present_gpte() does not treat it as a
+	 * present bit, and (b) permission_fault() uses an mmu->permissions[]
+	 * array that effectively ignores ACC_USER_EXEC_MASK.
+	 */
+	shadow_xu_mask		= VMX_EPT_USER_EXECUTABLE_MASK;
 	shadow_present_mask	= VMX_EPT_SUPPRESS_VE_BIT;
 
 	shadow_acc_track_mask	= VMX_EPT_RWX_MASK;
@@ -548,7 +573,8 @@ void kvm_mmu_reset_all_pte_masks(void)
 	shadow_accessed_mask	= PT_ACCESSED_MASK;
 	shadow_dirty_mask	= PT_DIRTY_MASK;
 	shadow_nx_mask		= PT64_NX_MASK;
-	shadow_x_mask		= 0;
+	shadow_xs_mask		= 0;
+	shadow_xu_mask		= 0;
 	shadow_present_mask	= PT_PRESENT_MASK;
 
 	shadow_acc_track_mask	= 0;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 8a4c09c5cdbf..0ed690f78e17 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -178,8 +178,9 @@ extern bool __read_mostly kvm_ad_enabled;
 extern u64 __read_mostly shadow_host_writable_mask;
 extern u64 __read_mostly shadow_mmu_writable_mask;
 extern u64 __read_mostly shadow_nx_mask;
-extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
 extern u64 __read_mostly shadow_user_mask;
+extern u64 __read_mostly shadow_xs_mask; /* mutual exclusive with nx_mask and user_mask */
+extern u64 __read_mostly shadow_xu_mask; /* mutual exclusive with nx_mask and user_mask */
 extern u64 __read_mostly shadow_accessed_mask;
 extern u64 __read_mostly shadow_dirty_mask;
 extern u64 __read_mostly shadow_mmio_value;
@@ -357,7 +358,13 @@ static inline bool is_last_spte(u64 pte, int level)
 
 static inline bool is_executable_pte(u64 spte)
 {
-	return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
+	/*
+	 * For now, return true if either the XS or XU bit is set
+	 * This function is only used for fast_page_fault,
+	 * which never processes shadow EPT, and regular page
+	 * tables always have XS==XU.
+	 */
+	return (spte & (shadow_xs_mask | shadow_xu_mask | shadow_nx_mask)) != shadow_nx_mask;
 }
 
 static inline kvm_pfn_t spte_to_pfn(u64 pte)
-- 
2.52.0



  parent reply	other threads:[~2026-04-30 15:08 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-30 15:07 [PATCH v5 00/28] KVM: combined patchset for MBEC/GMET support Paolo Bonzini
2026-04-30 15:07 ` [PATCH 01/28] KVM: TDX/VMX: rework EPT_VIOLATION_EXEC_FOR_RING3_LIN into PROT_MASK Paolo Bonzini
2026-04-30 15:07 ` [PATCH 02/28] KVM: x86/mmu: remove SPTE_PERM_MASK Paolo Bonzini
2026-04-30 15:07 ` [PATCH 03/28] KVM: x86/mmu: free up bit 10 of PTEs in preparation for MBEC Paolo Bonzini
2026-04-30 18:04   ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 04/28] KVM: x86/mmu: shuffle high bits of SPTEs " Paolo Bonzini
2026-04-30 18:15   ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 05/28] KVM: x86/mmu: remove SPTE_EPT_* Paolo Bonzini
2026-04-30 15:07 ` [PATCH 06/28] KVM: x86/mmu: merge make_spte_{non,}executable Paolo Bonzini
2026-04-30 18:22   ` Sean Christopherson
2026-05-01  3:51     ` Paolo Bonzini
2026-04-30 15:07 ` [PATCH 07/28] KVM: x86/mmu: rename and clarify BYTE_MASK Paolo Bonzini
2026-04-30 18:39   ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 08/28] KVM: x86/mmu: separate more EPT/non-EPT permission_fault() Paolo Bonzini
2026-04-30 18:35   ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 09/28] KVM: x86/mmu: introduce ACC_READ_MASK Paolo Bonzini
2026-04-30 15:07 ` [PATCH 10/28] KVM: x86/mmu: pass PFERR_GUEST_PAGE/FINAL_MASK to kvm_translate_gpa Paolo Bonzini
2026-04-30 18:50   ` Sean Christopherson
2026-05-01  3:52     ` Paolo Bonzini
2026-04-30 15:07 ` [PATCH 11/28] KVM: x86/mmu: pass pte_access for final nGPA->GPA walk Paolo Bonzini
2026-04-30 15:07 ` [PATCH 12/28] KVM: x86: make translate_nested_gpa vendor-specific Paolo Bonzini
2026-04-30 18:53   ` Sean Christopherson
2026-05-01  3:53     ` Paolo Bonzini
2026-04-30 15:07 ` Paolo Bonzini [this message]
2026-04-30 15:07 ` [PATCH 14/28] KVM: x86/mmu: move cr4_smep to base role Paolo Bonzini
2026-04-30 15:07 ` [PATCH 15/28] KVM: VMX: enable use of MBEC Paolo Bonzini
2026-04-30 19:00   ` Sean Christopherson
2026-05-05 10:02   ` David Riley
2026-04-30 15:07 ` [PATCH 16/28] KVM: nVMX: pass advanced EPT violation vmexit info to guest Paolo Bonzini
2026-04-30 15:07 ` [PATCH 17/28] KVM: nVMX: pass PFERR_USER_MASK to MMU on EPT violations Paolo Bonzini
2026-04-30 19:03   ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 18/28] KVM: x86/mmu: add support for MBEC to EPT page table walks Paolo Bonzini
2026-04-30 15:07 ` [PATCH 19/28] KVM: nVMX: advertise MBEC to nested guests Paolo Bonzini
2026-04-30 15:07 ` [PATCH 20/28] KVM: nVMX: allow MBEC with EVMCS Paolo Bonzini
2026-04-30 15:07 ` [PATCH 21/28] KVM: x86/mmu: propagate access mask from root pages down Paolo Bonzini
2026-04-30 15:07 ` [PATCH 22/28] KVM: x86/mmu: introduce cpu_role bit for availability of PFEC.I/D Paolo Bonzini
2026-04-30 15:07 ` [PATCH 23/28] KVM: SVM: add GMET bit definitions Paolo Bonzini
2026-04-30 15:07 ` [PATCH 24/28] KVM: x86/mmu: hard code more bits in kvm_init_shadow_npt_mmu Paolo Bonzini
2026-04-30 19:09   ` Sean Christopherson
2026-05-01  3:50     ` Paolo Bonzini
2026-05-01 13:29       ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 25/28] KVM: x86/mmu: add support for GMET to NPT page table walks Paolo Bonzini
2026-04-30 15:07 ` [PATCH 26/28] KVM: SVM: enable GMET and set it in MMU role Paolo Bonzini
2026-04-30 19:15   ` Sean Christopherson
2026-05-01  3:59     ` Paolo Bonzini
2026-05-01 14:38       ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 27/28] KVM: SVM: work around errata 1218 Paolo Bonzini
2026-04-30 15:07 ` [PATCH 28/28] KVM: nSVM: enable GMET for guests Paolo Bonzini
2026-04-30 16:16 ` [PATCH v5 00/28] KVM: combined patchset for MBEC/GMET support Paolo Bonzini
2026-04-30 19:17   ` Sean Christopherson
2026-05-01  3:46     ` Paolo Bonzini
  -- strict thread matches above, loose matches on Subject: below --
2026-05-05 19:51 [PATCH v6 " Paolo Bonzini
2026-05-05 19:52 ` [PATCH 13/28] KVM: x86/mmu: split XS/XU bits for EPT Paolo Bonzini
2026-04-28 11:09 [PATCH v4 00/28] KVM: combined patchset for MBEC/GMET support Paolo Bonzini
2026-04-28 11:09 ` [PATCH 13/28] KVM: x86/mmu: split XS/XU bits for EPT Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260430150747.76749-14-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=d.riley@proxmox.com \
    --cc=jon@nutanix.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox