From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Xen-devel <xen-devel@lists.xenproject.org>
Cc: "Andrew Cooper" <andrew.cooper3@citrix.com>,
"Jan Beulich" <jbeulich@suse.com>,
"Jan Beulich" <JBeulich@suse.com>,
"Roger Pau Monné" <roger.pau@citrix.com>
Subject: [PATCH v3 18/22] x86/pv: Adjust GS handling for FRED mode
Date: Fri, 3 Oct 2025 23:53:30 +0100 [thread overview]
Message-ID: <20251003225334.2123667-19-andrew.cooper3@citrix.com> (raw)
In-Reply-To: <20251003225334.2123667-1-andrew.cooper3@citrix.com>
When FRED is active, hardware automatically swaps GS when changing privilege,
and the SWAPGS instruction is disallowed.
For native OSes using GS as the thread local pointer this is a massive
improvement on the pre-FRED architecture, but under Xen it makes handling PV
guests more complicated. Specifically, it means that GS_BASE and GS_SHADOW
are the opposite way around in FRED mode, as opposed to IDT mode.
This leads to the following changes:
* In load_segments(), we have to load both GSes. Account for this in the
SWAP() condition and avoid the path with SWAGS.
* In save_segments(), we need to read GS_SHADOW rather than GS_BASE.
* In toggle_guest_mode(), we need to emulate SWAPGS.
* In do_set_segment_base(), merge the SEGBASE_GS_{USER,KERNEL} cases and
take FRED into account when choosing which base to update.
SEGBASE_GS_USER_SEL was already an LKGS invocation (decades before FRED)
so under FRED needs to be just a MOV %gs. Simply skip the SWAPGSes.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
v3:
* Rename things
v2:
* New
I think this functions, but it's not ideal. The conditions are asymmetric and
awkward.
---
xen/arch/x86/domain.c | 22 +++++++++++++++++-----
xen/arch/x86/pv/domain.c | 22 ++++++++++++++++++++--
xen/arch/x86/pv/misc-hypercalls.c | 16 ++++++++++------
3 files changed, 47 insertions(+), 13 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 8089ff929bf7..ce08f91be3af 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1819,9 +1819,10 @@ static void load_segments(struct vcpu *n)
/*
* Figure out which way around gsb/gss want to be. gsb needs to be
- * the active context, and gss needs to be the inactive context.
+ * the active context, and gss needs to be the inactive context,
+ * unless we're in FRED mode where they're reversed.
*/
- if ( !(n->arch.flags & TF_kernel_mode) )
+ if ( !(n->arch.flags & TF_kernel_mode) ^ opt_fred )
SWAP(gsb, gss);
if ( using_svm() && (n->arch.pv.fs | n->arch.pv.gs) <= 3 )
@@ -1842,7 +1843,9 @@ static void load_segments(struct vcpu *n)
if ( !fs_gs_done && !compat )
{
- if ( read_cr4() & X86_CR4_FSGSBASE )
+ unsigned long cr4 = read_cr4();
+
+ if ( !(cr4 & X86_CR4_FRED) && (cr4 & X86_CR4_FSGSBASE) )
{
__wrgsbase(gss);
__wrfsbase(n->arch.pv.fs_base);
@@ -1959,6 +1962,9 @@ static void load_segments(struct vcpu *n)
* Guests however cannot use SWAPGS, so there is no mechanism to modify the
* inactive GS base behind Xen's back. Therefore, Xen's copy of the inactive
* GS base is still accurate, and doesn't need reading back from hardware.
+ *
+ * Under FRED, hardware automatically swaps GS for us, so SHADOW_GS is the
+ * active GS from the guest's point of view.
*/
static void save_segments(struct vcpu *v)
{
@@ -1974,12 +1980,18 @@ static void save_segments(struct vcpu *v)
if ( read_cr4() & X86_CR4_FSGSBASE )
{
fs_base = __rdfsbase();
- gs_base = __rdgsbase();
+ if ( opt_fred )
+ gs_base = rdmsr(MSR_SHADOW_GS_BASE);
+ else
+ gs_base = __rdgsbase();
}
else
{
fs_base = rdmsr(MSR_FS_BASE);
- gs_base = rdmsr(MSR_GS_BASE);
+ if ( opt_fred )
+ gs_base = rdmsr(MSR_SHADOW_GS_BASE);
+ else
+ gs_base = rdmsr(MSR_GS_BASE);
}
v->arch.pv.fs_base = fs_base;
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 9c4785c187dd..369af444c29b 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -14,9 +14,10 @@
#include <asm/cpufeature.h>
#include <asm/fsgsbase.h>
#include <asm/invpcid.h>
-#include <asm/spec_ctrl.h>
#include <asm/pv/domain.h>
#include <asm/shadow.h>
+#include <asm/spec_ctrl.h>
+#include <asm/traps.h>
#ifdef CONFIG_PV32
int8_t __read_mostly opt_pv32 = -1;
@@ -480,11 +481,28 @@ void toggle_guest_mode(struct vcpu *v)
* subsequent context switch won't bother re-reading it.
*/
gs_base = read_gs_base();
+
+ /*
+ * In FRED mode, not only are the two GSes the other way around (i.e. we
+ * want to read GS_SHADOW here), the SWAPGS instruction is disallowed so
+ * we have to emulate it.
+ */
+ if ( opt_fred )
+ {
+ unsigned long gs_shadow = rdmsr(MSR_SHADOW_GS_BASE);
+
+ wrmsrns(MSR_SHADOW_GS_BASE, gs_base);
+ write_gs_base(gs_shadow);
+
+ gs_base = gs_shadow;
+ }
+ else
+ asm volatile ( "swapgs" );
+
if ( v->arch.flags & TF_kernel_mode )
v->arch.pv.gs_base_kernel = gs_base;
else
v->arch.pv.gs_base_user = gs_base;
- asm volatile ( "swapgs" );
_toggle_guest_pt(v);
diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c
index 4c2abeb4add8..2c9cf50638db 100644
--- a/xen/arch/x86/pv/misc-hypercalls.c
+++ b/xen/arch/x86/pv/misc-hypercalls.c
@@ -11,6 +11,7 @@
#include <asm/debugreg.h>
#include <asm/fsgsbase.h>
+#include <asm/traps.h>
long do_set_debugreg(int reg, unsigned long value)
{
@@ -192,11 +193,12 @@ long do_set_segment_base(unsigned int which, unsigned long base)
case SEGBASE_GS_USER:
v->arch.pv.gs_base_user = base;
- write_gs_shadow(base);
- break;
-
+ fallthrough;
case SEGBASE_GS_KERNEL:
- write_gs_base(base);
+ if ( (which == SEGBASE_GS_KERNEL) ^ opt_fred )
+ write_gs_base(base);
+ else
+ write_gs_shadow(base);
break;
}
break;
@@ -209,7 +211,8 @@ long do_set_segment_base(unsigned int which, unsigned long base)
* We wish to update the user %gs from the GDT/LDT. Currently, the
* guest kernel's GS_BASE is in context.
*/
- asm volatile ( "swapgs" );
+ if ( !opt_fred )
+ asm volatile ( "swapgs" );
if ( sel > 3 )
/* Fix up RPL for non-NUL selectors. */
@@ -247,7 +250,8 @@ long do_set_segment_base(unsigned int which, unsigned long base)
/* Update the cache of the inactive base, as read from the GDT/LDT. */
v->arch.pv.gs_base_user = read_gs_base();
- asm volatile ( safe_swapgs );
+ if ( !opt_fred )
+ asm volatile ( safe_swapgs );
break;
}
--
2.39.5
next prev parent reply other threads:[~2025-10-03 22:56 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-03 22:53 [PATCH v3 for-4.21 00/22] x86: FRED support Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 01/22] x86/msr: Change rdmsr() to have normal API Andrew Cooper
2025-10-07 15:47 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 02/22] x86/msr: Change wrmsr() to take a single parameter Andrew Cooper
2025-10-04 0:11 ` Demi Marie Obenour
2025-10-04 0:14 ` Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 03/22] x86/fsgsbase: Split out __{rd,wr}gs_shadow() helpers Andrew Cooper
2025-10-07 15:49 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 04/22] x86/fsgsbase: Update fs/gs helpers to use wrmsrns() Andrew Cooper
2025-10-07 15:53 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 05/22] x86/fsgsbase: Improve code generation in read_registers() Andrew Cooper
2025-10-04 0:13 ` Demi Marie Obenour
2025-10-07 15:54 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 06/22] x86/boot: Use RSTORSSP to establish SSP Andrew Cooper
2025-10-07 15:57 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 07/22] x86/traps: Alter switch_stack_and_jump() for FRED mode Andrew Cooper
2025-10-07 15:58 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 08/22] x86/traps: Skip Supervisor Shadow Stack tokens in " Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 09/22] x86/traps: Make an IDT-specific #DB helper Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 10/22] x86/traps: Make an IDT-specific #PF helper Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 11/22] x86/fsgsbase: Make gskern accesses safe under FRED Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 12/22] x86/traps: Introduce FRED entrypoints Andrew Cooper
2025-10-08 8:50 ` Jan Beulich
2025-10-16 14:54 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 13/22] x86/traps: Enable FRED when requested Andrew Cooper
2025-10-08 8:54 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 14/22] x86/pv: Deduplicate is_canonical_address() in do_set_segment_base() Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 15/22] x86/entry: Alter how IRET faults are recognised Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 16/22] x86/entry: Drop the pre exception table infrastructure Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 17/22] x86/entry: Rework the comment about SYSCALL and DF Andrew Cooper
2025-10-03 22:53 ` Andrew Cooper [this message]
2025-10-03 22:53 ` [PATCH v3 19/22] x86/pv: Guest exception handling in FRED mode Andrew Cooper
2025-10-08 12:28 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 20/22] x86/pv: ERETU error handling Andrew Cooper
2025-10-08 12:36 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 21/22] x86/pv: System call handling in FRED mode Andrew Cooper
2025-10-08 13:45 ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 22/22] x86: Clamp reserved bits in eflags more aggressively Andrew Cooper
2025-10-08 13:50 ` Jan Beulich
2025-10-17 13:24 ` [PATCH v3 for-4.21 00/22] x86: FRED support Oleksii Kurochko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251003225334.2123667-19-andrew.cooper3@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=jbeulich@suse.com \
--cc=roger.pau@citrix.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).