From: Paolo Bonzini <pbonzini@redhat.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: d.riley@proxmox.com, jon@nutanix.com
Subject: [PATCH 13/28] KVM: x86/mmu: split XS/XU bits for EPT
Date: Thu, 30 Apr 2026 11:07:32 -0400 [thread overview]
Message-ID: <20260430150747.76749-14-pbonzini@redhat.com> (raw)
In-Reply-To: <20260430150747.76749-1-pbonzini@redhat.com>
When EPT is in use, replace ACC_USER_MASK with ACC_USER_EXEC_MASK,
so that supervisor and user-mode execution can be controlled
independently (ACC_USER_MASK would not allow a setting similar to
XU=0 XS=1 W=1 R=1).
Replace shadow_x_mask with shadow_xs_mask/shadow_xu_mask, to allow
setting XS and XU bits separately in EPT entries.
Note that ACC_USER_EXEC_MASK is already set through ACC_ALL in
the kvm_mmu_page roles, but it does not propagate to the XU bit
because (for now) shadow_xs_mask == shadow_xu_mask.
Tested-by: David Riley <d.riley@proxmox.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/mmu.h | 3 +-
arch/x86/kvm/mmu/mmu.c | 2 +-
arch/x86/kvm/mmu/mmutrace.h | 6 ++--
arch/x86/kvm/mmu/spte.c | 60 ++++++++++++++++++++++++++-----------
arch/x86/kvm/mmu/spte.h | 11 +++++--
5 files changed, 58 insertions(+), 24 deletions(-)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 63be5c5efed9..d8c13e43c2d7 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -39,7 +39,8 @@ extern bool __read_mostly enable_mmio_caching;
#define ACC_READ_MASK PT_PRESENT_MASK
#define ACC_WRITE_MASK PT_WRITABLE_MASK
-#define ACC_USER_MASK PT_USER_MASK
+#define ACC_USER_MASK PT_USER_MASK /* non EPT */
+#define ACC_USER_EXEC_MASK ACC_USER_MASK /* EPT only */
#define ACC_EXEC_MASK 8
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 88d0ff95fc8c..617a3204a5e0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5491,7 +5491,7 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
static inline bool boot_cpu_is_amd(void)
{
WARN_ON_ONCE(!tdp_enabled);
- return shadow_x_mask == 0;
+ return shadow_xs_mask == 0;
}
/*
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index dcfdfedfc4e9..3429c1413f42 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -357,8 +357,8 @@ TRACE_EVENT(
__entry->sptep = virt_to_phys(sptep);
__entry->level = level;
__entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
- __entry->x = is_executable_pte(__entry->spte);
- __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
+ __entry->x = (__entry->spte & (shadow_xs_mask | shadow_nx_mask)) == shadow_xs_mask;
+ __entry->u = !!(__entry->spte & (shadow_xu_mask | shadow_user_mask));
),
TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
@@ -366,7 +366,7 @@ TRACE_EVENT(
__entry->r ? "r" : "-",
__entry->spte & PT_WRITABLE_MASK ? "w" : "-",
__entry->x ? "x" : "-",
- __entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
+ __entry->u ? "u" : "-",
__entry->level, __entry->sptep
)
);
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 7b5f118ae211..4575dd77f854 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -29,8 +29,9 @@ bool __read_mostly kvm_ad_enabled;
u64 __read_mostly shadow_host_writable_mask;
u64 __read_mostly shadow_mmu_writable_mask;
u64 __read_mostly shadow_nx_mask;
-u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
u64 __read_mostly shadow_user_mask;
+u64 __read_mostly shadow_xs_mask; /* mutual exclusive with nx_mask and user_mask */
+u64 __read_mostly shadow_xu_mask; /* mutual exclusive with nx_mask and user_mask */
u64 __read_mostly shadow_accessed_mask;
u64 __read_mostly shadow_dirty_mask;
u64 __read_mostly shadow_mmio_value;
@@ -217,21 +218,26 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* would tie make_spte() further to vCPU/MMU state, and add complexity
* just to optimize a mode that is anything but performance critical.
*/
- if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) &&
- is_nx_huge_page_enabled(vcpu->kvm)) {
+ if (level > PG_LEVEL_4K && is_nx_huge_page_enabled(vcpu->kvm)) {
pte_access &= ~ACC_EXEC_MASK;
+ if (shadow_xu_mask)
+ pte_access &= ~ACC_USER_EXEC_MASK;
}
if (pte_access & ACC_READ_MASK)
spte |= PT_PRESENT_MASK; /* or VMX_EPT_READABLE_MASK */
- if (pte_access & ACC_EXEC_MASK)
- spte |= shadow_x_mask;
- else
- spte |= shadow_nx_mask;
-
- if (pte_access & ACC_USER_MASK)
- spte |= shadow_user_mask;
+ if (shadow_nx_mask) {
+ if (!(pte_access & ACC_EXEC_MASK))
+ spte |= shadow_nx_mask;
+ if (pte_access & ACC_USER_MASK)
+ spte |= shadow_user_mask;
+ } else {
+ if (pte_access & ACC_EXEC_MASK)
+ spte |= shadow_xs_mask;
+ if (pte_access & ACC_USER_EXEC_MASK)
+ spte |= shadow_xu_mask;
+ }
if (level > PG_LEVEL_4K)
spte |= PT_PAGE_SIZE_MASK;
@@ -318,11 +324,13 @@ static u64 make_spte_executable(u64 spte, u8 access)
{
u64 set, clear;
- if (access & ACC_EXEC_MASK)
- set = shadow_x_mask;
+ if (shadow_nx_mask)
+ set = (access & ACC_EXEC_MASK) ? 0 : shadow_nx_mask;
else
- set = shadow_nx_mask;
- clear = set ^ (shadow_nx_mask | shadow_x_mask);
+ set =
+ (access & ACC_EXEC_MASK ? shadow_xs_mask : 0) |
+ (access & ACC_USER_EXEC_MASK ? shadow_xu_mask : 0);
+ clear = set ^ (shadow_nx_mask | shadow_xs_mask | shadow_xu_mask);
return modify_spte_protections(spte, set, clear);
}
@@ -389,7 +397,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
PT_PRESENT_MASK /* or VMX_EPT_READABLE_MASK */ |
- shadow_user_mask | shadow_x_mask | shadow_me_value;
+ shadow_user_mask | shadow_xs_mask | shadow_xu_mask | shadow_me_value;
if (ad_disabled)
spte |= SPTE_TDP_AD_DISABLED;
@@ -497,7 +505,24 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits)
shadow_accessed_mask = VMX_EPT_ACCESS_BIT;
shadow_dirty_mask = VMX_EPT_DIRTY_BIT;
shadow_nx_mask = 0ull;
- shadow_x_mask = VMX_EPT_EXECUTABLE_MASK;
+ shadow_xs_mask = VMX_EPT_EXECUTABLE_MASK;
+
+ /*
+ * The MMU always maps ACC_EXEC_MASK and ACC_USER_EXEC_MASK to the
+ * XS and XU bits of shadow EPT entries, regardless of whether MBEC
+ * is available on the host or enabled by the L1 hypervisor's EPTP.
+ *
+ * For the non-nested case, pages are mapped with ACC_EXEC_MASK
+ * and ACC_USER_EXEC_MASK set in tandem, so XS == XU and the
+ * host's MBEC setting does not matter. On hardware without MBEC
+ * the XU bit is reserved-as-ignored, and setting it does no harm.
+ *
+ * For nested EPT MBEC is not supported, but bit 10 of the gPTE has
+ * no effect because (a) is_present_gpte() does not treat it as a
+ * present bit, and (b) permission_fault() uses an mmu->permissions[]
+ * array that effectively ignores ACC_USER_EXEC_MASK.
+ */
+ shadow_xu_mask = VMX_EPT_USER_EXECUTABLE_MASK;
shadow_present_mask = VMX_EPT_SUPPRESS_VE_BIT;
shadow_acc_track_mask = VMX_EPT_RWX_MASK;
@@ -548,7 +573,8 @@ void kvm_mmu_reset_all_pte_masks(void)
shadow_accessed_mask = PT_ACCESSED_MASK;
shadow_dirty_mask = PT_DIRTY_MASK;
shadow_nx_mask = PT64_NX_MASK;
- shadow_x_mask = 0;
+ shadow_xs_mask = 0;
+ shadow_xu_mask = 0;
shadow_present_mask = PT_PRESENT_MASK;
shadow_acc_track_mask = 0;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 8a4c09c5cdbf..0ed690f78e17 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -178,8 +178,9 @@ extern bool __read_mostly kvm_ad_enabled;
extern u64 __read_mostly shadow_host_writable_mask;
extern u64 __read_mostly shadow_mmu_writable_mask;
extern u64 __read_mostly shadow_nx_mask;
-extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
extern u64 __read_mostly shadow_user_mask;
+extern u64 __read_mostly shadow_xs_mask; /* mutual exclusive with nx_mask and user_mask */
+extern u64 __read_mostly shadow_xu_mask; /* mutual exclusive with nx_mask and user_mask */
extern u64 __read_mostly shadow_accessed_mask;
extern u64 __read_mostly shadow_dirty_mask;
extern u64 __read_mostly shadow_mmio_value;
@@ -357,7 +358,13 @@ static inline bool is_last_spte(u64 pte, int level)
static inline bool is_executable_pte(u64 spte)
{
- return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
+ /*
+ * For now, return true if either the XS or XU bit is set
+ * This function is only used for fast_page_fault,
+ * which never processes shadow EPT, and regular page
+ * tables always have XS==XU.
+ */
+ return (spte & (shadow_xs_mask | shadow_xu_mask | shadow_nx_mask)) != shadow_nx_mask;
}
static inline kvm_pfn_t spte_to_pfn(u64 pte)
--
2.52.0
next prev parent reply other threads:[~2026-04-30 15:08 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-30 15:07 [PATCH v5 00/28] KVM: combined patchset for MBEC/GMET support Paolo Bonzini
2026-04-30 15:07 ` [PATCH 01/28] KVM: TDX/VMX: rework EPT_VIOLATION_EXEC_FOR_RING3_LIN into PROT_MASK Paolo Bonzini
2026-04-30 15:07 ` [PATCH 02/28] KVM: x86/mmu: remove SPTE_PERM_MASK Paolo Bonzini
2026-04-30 15:07 ` [PATCH 03/28] KVM: x86/mmu: free up bit 10 of PTEs in preparation for MBEC Paolo Bonzini
2026-04-30 18:04 ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 04/28] KVM: x86/mmu: shuffle high bits of SPTEs " Paolo Bonzini
2026-04-30 18:15 ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 05/28] KVM: x86/mmu: remove SPTE_EPT_* Paolo Bonzini
2026-04-30 15:07 ` [PATCH 06/28] KVM: x86/mmu: merge make_spte_{non,}executable Paolo Bonzini
2026-04-30 18:22 ` Sean Christopherson
2026-05-01 3:51 ` Paolo Bonzini
2026-04-30 15:07 ` [PATCH 07/28] KVM: x86/mmu: rename and clarify BYTE_MASK Paolo Bonzini
2026-04-30 18:39 ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 08/28] KVM: x86/mmu: separate more EPT/non-EPT permission_fault() Paolo Bonzini
2026-04-30 18:35 ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 09/28] KVM: x86/mmu: introduce ACC_READ_MASK Paolo Bonzini
2026-04-30 15:07 ` [PATCH 10/28] KVM: x86/mmu: pass PFERR_GUEST_PAGE/FINAL_MASK to kvm_translate_gpa Paolo Bonzini
2026-04-30 18:50 ` Sean Christopherson
2026-05-01 3:52 ` Paolo Bonzini
2026-04-30 15:07 ` [PATCH 11/28] KVM: x86/mmu: pass pte_access for final nGPA->GPA walk Paolo Bonzini
2026-04-30 15:07 ` [PATCH 12/28] KVM: x86: make translate_nested_gpa vendor-specific Paolo Bonzini
2026-04-30 18:53 ` Sean Christopherson
2026-05-01 3:53 ` Paolo Bonzini
2026-04-30 15:07 ` Paolo Bonzini [this message]
2026-04-30 15:07 ` [PATCH 14/28] KVM: x86/mmu: move cr4_smep to base role Paolo Bonzini
2026-04-30 15:07 ` [PATCH 15/28] KVM: VMX: enable use of MBEC Paolo Bonzini
2026-04-30 19:00 ` Sean Christopherson
2026-05-05 10:02 ` David Riley
2026-04-30 15:07 ` [PATCH 16/28] KVM: nVMX: pass advanced EPT violation vmexit info to guest Paolo Bonzini
2026-04-30 15:07 ` [PATCH 17/28] KVM: nVMX: pass PFERR_USER_MASK to MMU on EPT violations Paolo Bonzini
2026-04-30 19:03 ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 18/28] KVM: x86/mmu: add support for MBEC to EPT page table walks Paolo Bonzini
2026-04-30 15:07 ` [PATCH 19/28] KVM: nVMX: advertise MBEC to nested guests Paolo Bonzini
2026-04-30 15:07 ` [PATCH 20/28] KVM: nVMX: allow MBEC with EVMCS Paolo Bonzini
2026-04-30 15:07 ` [PATCH 21/28] KVM: x86/mmu: propagate access mask from root pages down Paolo Bonzini
2026-04-30 15:07 ` [PATCH 22/28] KVM: x86/mmu: introduce cpu_role bit for availability of PFEC.I/D Paolo Bonzini
2026-04-30 15:07 ` [PATCH 23/28] KVM: SVM: add GMET bit definitions Paolo Bonzini
2026-04-30 15:07 ` [PATCH 24/28] KVM: x86/mmu: hard code more bits in kvm_init_shadow_npt_mmu Paolo Bonzini
2026-04-30 19:09 ` Sean Christopherson
2026-05-01 3:50 ` Paolo Bonzini
2026-05-01 13:29 ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 25/28] KVM: x86/mmu: add support for GMET to NPT page table walks Paolo Bonzini
2026-04-30 15:07 ` [PATCH 26/28] KVM: SVM: enable GMET and set it in MMU role Paolo Bonzini
2026-04-30 19:15 ` Sean Christopherson
2026-05-01 3:59 ` Paolo Bonzini
2026-05-01 14:38 ` Sean Christopherson
2026-04-30 15:07 ` [PATCH 27/28] KVM: SVM: work around errata 1218 Paolo Bonzini
2026-04-30 15:07 ` [PATCH 28/28] KVM: nSVM: enable GMET for guests Paolo Bonzini
2026-04-30 16:16 ` [PATCH v5 00/28] KVM: combined patchset for MBEC/GMET support Paolo Bonzini
2026-04-30 19:17 ` Sean Christopherson
2026-05-01 3:46 ` Paolo Bonzini
-- strict thread matches above, loose matches on Subject: below --
2026-05-05 19:51 [PATCH v6 " Paolo Bonzini
2026-05-05 19:52 ` [PATCH 13/28] KVM: x86/mmu: split XS/XU bits for EPT Paolo Bonzini
2026-04-28 11:09 [PATCH v4 00/28] KVM: combined patchset for MBEC/GMET support Paolo Bonzini
2026-04-28 11:09 ` [PATCH 13/28] KVM: x86/mmu: split XS/XU bits for EPT Paolo Bonzini
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260430150747.76749-14-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=d.riley@proxmox.com \
--cc=jon@nutanix.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox