From: Paolo Bonzini <pbonzini@redhat.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: jon@nutanix.com, d.riley@proxmox.com
Subject: [PATCH 13/28] KVM: x86/mmu: split XS/XU bits for EPT
Date: Tue, 28 Apr 2026 07:09:31 -0400 [thread overview]
Message-ID: <20260428110946.11466-14-pbonzini@redhat.com> (raw)
In-Reply-To: <20260428110946.11466-1-pbonzini@redhat.com>
When EPT is in use, replace ACC_USER_MASK with ACC_USER_EXEC_MASK,
so that supervisor and user-mode execution can be controlled
independently (ACC_USER_MASK would not allow a setting similar to
XU=0 XS=1 W=1 R=1).
Replace shadow_x_mask with shadow_xs_mask/shadow_xu_mask, to allow
setting XS and XU bits separately in EPT entries.
Note that ACC_USER_EXEC_MASK is already set through ACC_ALL in
the kvm_mmu_page roles, but it does not propagate to the XU bit
because (for now) shadow_xs_mask == shadow_xu_mask.
Tested-by: David Riley <d.riley@proxmox.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/mmu.h | 3 ++-
arch/x86/kvm/mmu/mmu.c | 2 +-
arch/x86/kvm/mmu/mmutrace.h | 6 ++---
arch/x86/kvm/mmu/spte.c | 44 +++++++++++++++++++++++--------------
arch/x86/kvm/mmu/spte.h | 11 ++++++++--
5 files changed, 42 insertions(+), 24 deletions(-)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 63be5c5efed9..d8c13e43c2d7 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -39,7 +39,8 @@ extern bool __read_mostly enable_mmio_caching;
#define ACC_READ_MASK PT_PRESENT_MASK
#define ACC_WRITE_MASK PT_WRITABLE_MASK
-#define ACC_USER_MASK PT_USER_MASK
+#define ACC_USER_MASK PT_USER_MASK /* non EPT */
+#define ACC_USER_EXEC_MASK ACC_USER_MASK /* EPT only */
#define ACC_EXEC_MASK 8
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c82d151ca6c1..72f56791122e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5485,7 +5485,7 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
static inline bool boot_cpu_is_amd(void)
{
WARN_ON_ONCE(!tdp_enabled);
- return shadow_x_mask == 0;
+ return shadow_xs_mask == 0;
}
/*
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index dcfdfedfc4e9..3429c1413f42 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -357,8 +357,8 @@ TRACE_EVENT(
__entry->sptep = virt_to_phys(sptep);
__entry->level = level;
__entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
- __entry->x = is_executable_pte(__entry->spte);
- __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
+ __entry->x = (__entry->spte & (shadow_xs_mask | shadow_nx_mask)) == shadow_xs_mask;
+ __entry->u = !!(__entry->spte & (shadow_xu_mask | shadow_user_mask));
),
TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
@@ -366,7 +366,7 @@ TRACE_EVENT(
__entry->r ? "r" : "-",
__entry->spte & PT_WRITABLE_MASK ? "w" : "-",
__entry->x ? "x" : "-",
- __entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
+ __entry->u ? "u" : "-",
__entry->level, __entry->sptep
)
);
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 7b5f118ae211..779ee44893b0 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -29,8 +29,9 @@ bool __read_mostly kvm_ad_enabled;
u64 __read_mostly shadow_host_writable_mask;
u64 __read_mostly shadow_mmu_writable_mask;
u64 __read_mostly shadow_nx_mask;
-u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
u64 __read_mostly shadow_user_mask;
+u64 __read_mostly shadow_xs_mask; /* mutual exclusive with nx_mask and user_mask */
+u64 __read_mostly shadow_xu_mask; /* mutual exclusive with nx_mask and user_mask */
u64 __read_mostly shadow_accessed_mask;
u64 __read_mostly shadow_dirty_mask;
u64 __read_mostly shadow_mmio_value;
@@ -217,21 +218,26 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* would tie make_spte() further to vCPU/MMU state, and add complexity
* just to optimize a mode that is anything but performance critical.
*/
- if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) &&
- is_nx_huge_page_enabled(vcpu->kvm)) {
+ if (level > PG_LEVEL_4K && is_nx_huge_page_enabled(vcpu->kvm)) {
pte_access &= ~ACC_EXEC_MASK;
+ if (shadow_xu_mask)
+ pte_access &= ~ACC_USER_EXEC_MASK;
}
if (pte_access & ACC_READ_MASK)
spte |= PT_PRESENT_MASK; /* or VMX_EPT_READABLE_MASK */
- if (pte_access & ACC_EXEC_MASK)
- spte |= shadow_x_mask;
- else
- spte |= shadow_nx_mask;
-
- if (pte_access & ACC_USER_MASK)
- spte |= shadow_user_mask;
+ if (shadow_nx_mask) {
+ if (!(pte_access & ACC_EXEC_MASK))
+ spte |= shadow_nx_mask;
+ if (pte_access & ACC_USER_MASK)
+ spte |= shadow_user_mask;
+ } else {
+ if (pte_access & ACC_EXEC_MASK)
+ spte |= shadow_xs_mask;
+ if (pte_access & ACC_USER_EXEC_MASK)
+ spte |= shadow_xu_mask;
+ }
if (level > PG_LEVEL_4K)
spte |= PT_PAGE_SIZE_MASK;
@@ -318,11 +324,13 @@ static u64 make_spte_executable(u64 spte, u8 access)
{
u64 set, clear;
- if (access & ACC_EXEC_MASK)
- set = shadow_x_mask;
+ if (shadow_nx_mask)
+ set = (access & ACC_EXEC_MASK) ? 0 : shadow_nx_mask;
else
- set = shadow_nx_mask;
- clear = set ^ (shadow_nx_mask | shadow_x_mask);
+ set =
+ (access & ACC_EXEC_MASK ? shadow_xs_mask : 0) |
+ (access & ACC_USER_EXEC_MASK ? shadow_xu_mask : 0);
+ clear = set ^ (shadow_nx_mask | shadow_xs_mask | shadow_xu_mask);
return modify_spte_protections(spte, set, clear);
}
@@ -389,7 +397,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
PT_PRESENT_MASK /* or VMX_EPT_READABLE_MASK */ |
- shadow_user_mask | shadow_x_mask | shadow_me_value;
+ shadow_user_mask | shadow_xs_mask | shadow_xu_mask | shadow_me_value;
if (ad_disabled)
spte |= SPTE_TDP_AD_DISABLED;
@@ -497,7 +505,8 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits)
shadow_accessed_mask = VMX_EPT_ACCESS_BIT;
shadow_dirty_mask = VMX_EPT_DIRTY_BIT;
shadow_nx_mask = 0ull;
- shadow_x_mask = VMX_EPT_EXECUTABLE_MASK;
+ shadow_xs_mask = VMX_EPT_EXECUTABLE_MASK;
+ shadow_xu_mask = VMX_EPT_EXECUTABLE_MASK;
shadow_present_mask = VMX_EPT_SUPPRESS_VE_BIT;
shadow_acc_track_mask = VMX_EPT_RWX_MASK;
@@ -548,7 +557,8 @@ void kvm_mmu_reset_all_pte_masks(void)
shadow_accessed_mask = PT_ACCESSED_MASK;
shadow_dirty_mask = PT_DIRTY_MASK;
shadow_nx_mask = PT64_NX_MASK;
- shadow_x_mask = 0;
+ shadow_xs_mask = 0;
+ shadow_xu_mask = 0;
shadow_present_mask = PT_PRESENT_MASK;
shadow_acc_track_mask = 0;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index fe71ae131ec1..958605c6a5ea 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -178,8 +178,9 @@ extern bool __read_mostly kvm_ad_enabled;
extern u64 __read_mostly shadow_host_writable_mask;
extern u64 __read_mostly shadow_mmu_writable_mask;
extern u64 __read_mostly shadow_nx_mask;
-extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
extern u64 __read_mostly shadow_user_mask;
+extern u64 __read_mostly shadow_xs_mask; /* mutual exclusive with nx_mask and user_mask */
+extern u64 __read_mostly shadow_xu_mask; /* mutual exclusive with nx_mask and user_mask */
extern u64 __read_mostly shadow_accessed_mask;
extern u64 __read_mostly shadow_dirty_mask;
extern u64 __read_mostly shadow_mmio_value;
@@ -357,7 +358,13 @@ static inline bool is_last_spte(u64 pte, int level)
static inline bool is_executable_pte(u64 spte)
{
- return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
+ /*
+ * For now, return true if either the XS or XU bit is set
+ * This function is only used for fast_page_fault,
+ * which never processes shadow EPT, and regular page
+ * tables always have XS==XU.
+ */
+ return (spte & (shadow_xs_mask | shadow_xu_mask | shadow_nx_mask)) != shadow_nx_mask;
}
static inline kvm_pfn_t spte_to_pfn(u64 pte)
--
2.52.0
next prev parent reply other threads:[~2026-04-28 11:10 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-28 11:09 [PATCH v4 00/28] KVM: combined patchset for MBEC/GMET support Paolo Bonzini
2026-04-28 11:09 ` [PATCH 01/28] KVM: TDX/VMX: rework EPT_VIOLATION_EXEC_FOR_RING3_LIN into PROT_MASK Paolo Bonzini
2026-04-28 11:09 ` [PATCH 02/28] KVM: x86/mmu: remove SPTE_PERM_MASK Paolo Bonzini
2026-04-28 11:09 ` [PATCH 03/28] KVM: x86/mmu: free up bit 10 of PTEs in preparation for MBEC Paolo Bonzini
2026-04-28 11:09 ` [PATCH 04/28] KVM: x86/mmu: shuffle high bits of SPTEs " Paolo Bonzini
2026-04-28 11:09 ` [PATCH 05/28] KVM: x86/mmu: remove SPTE_EPT_* Paolo Bonzini
2026-04-28 11:09 ` [PATCH 06/28] KVM: x86/mmu: merge make_spte_{non,}executable Paolo Bonzini
2026-04-28 11:09 ` [PATCH 07/28] KVM: x86/mmu: rename and clarify BYTE_MASK Paolo Bonzini
2026-04-28 11:09 ` [PATCH 08/28] KVM: x86/mmu: introduce ACC_READ_MASK Paolo Bonzini
2026-04-28 11:09 ` [PATCH 09/28] KVM: x86/mmu: separate more EPT/non-EPT permission_fault() Paolo Bonzini
2026-04-28 11:09 ` [PATCH 10/28] KVM: x86/mmu: pass PFERR_GUEST_PAGE/FINAL_MASK to kvm_translate_gpa Paolo Bonzini
2026-04-28 11:09 ` [PATCH 11/28] KVM: x86/mmu: pass pte_access for final nGPA->GPA walk Paolo Bonzini
2026-04-29 7:58 ` Paolo Bonzini
2026-04-28 11:09 ` [PATCH 12/28] KVM: x86: make translate_nested_gpa vendor-specific Paolo Bonzini
2026-04-28 11:09 ` Paolo Bonzini [this message]
2026-04-28 11:09 ` [PATCH 14/28] KVM: x86/mmu: move cr4_smep to base role Paolo Bonzini
2026-04-28 11:09 ` [PATCH 15/28] KVM: VMX: enable use of MBEC Paolo Bonzini
2026-04-28 11:09 ` [PATCH 16/28] KVM: nVMX: pass advanced EPT violation vmexit info to guest Paolo Bonzini
2026-04-28 11:09 ` [PATCH 17/28] KVM: nVMX: pass PFERR_USER_MASK to MMU on EPT violations Paolo Bonzini
2026-04-28 11:09 ` [PATCH 18/28] KVM: x86/mmu: add support for MBEC to EPT page table walks Paolo Bonzini
2026-04-28 11:09 ` [PATCH 19/28] KVM: nVMX: advertise MBEC to nested guests Paolo Bonzini
2026-04-28 11:09 ` [PATCH 20/28] KVM: nVMX: allow MBEC with EVMCS Paolo Bonzini
2026-04-28 11:09 ` [PATCH 21/28] KVM: x86/mmu: propagate access mask from root pages down Paolo Bonzini
2026-04-28 11:09 ` [PATCH 22/28] KVM: x86/mmu: introduce cpu_role bit for availability of PFEC.I/D Paolo Bonzini
2026-04-28 11:09 ` [PATCH 23/28] KVM: SVM: add GMET bit definitions Paolo Bonzini
2026-04-28 11:09 ` [PATCH 24/28] KVM: x86/mmu: set CR0.WP=1 for shadow NPT MMU Paolo Bonzini
2026-04-28 11:09 ` [PATCH 25/28] KVM: x86/mmu: add support for GMET to NPT page table walks Paolo Bonzini
2026-04-28 11:09 ` [PATCH 26/28] KVM: SVM: enable GMET and set it in MMU role Paolo Bonzini
2026-04-28 11:09 ` [PATCH 27/28] KVM: SVM: work around errata 1218 Paolo Bonzini
2026-04-28 11:09 ` [PATCH 28/28] KVM: nSVM: enable GMET for guests Paolo Bonzini
2026-04-29 13:05 ` [PATCH v4 00/28] KVM: combined patchset for MBEC/GMET support David Riley
2026-04-29 13:12 ` Paolo Bonzini
2026-04-30 10:28 ` Paolo Bonzini
2026-04-30 12:08 ` David Riley
2026-05-05 1:14 ` Jon Kohler
2026-05-05 5:31 ` Paolo Bonzini
-- strict thread matches above, loose matches on Subject: below --
2026-04-30 15:07 [PATCH v5 " Paolo Bonzini
2026-04-30 15:07 ` [PATCH 13/28] KVM: x86/mmu: split XS/XU bits for EPT Paolo Bonzini
2026-05-05 19:51 [PATCH v6 00/28] KVM: combined patchset for MBEC/GMET support Paolo Bonzini
2026-05-05 19:52 ` [PATCH 13/28] KVM: x86/mmu: split XS/XU bits for EPT Paolo Bonzini
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260428110946.11466-14-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=d.riley@proxmox.com \
--cc=jon@nutanix.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.