All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Xen-devel <xen-devel@lists.xenproject.org>
Cc: "Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Jan Beulich" <jbeulich@suse.com>,
	"Jan Beulich" <JBeulich@suse.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>
Subject: [PATCH v4.1 09/14] x86/pv: Adjust GS handling for FRED mode
Date: Wed,  4 Mar 2026 17:18:26 +0000	[thread overview]
Message-ID: <20260304171826.45847-1-andrew.cooper3@citrix.com> (raw)
In-Reply-To: <20260227231636.3955109-10-andrew.cooper3@citrix.com>

When FRED is active, hardware automatically swaps GS when changing privilege,
and the SWAPGS instruction is disallowed.

For native OSes using GS as the thread local pointer this is a massive
improvement on the pre-FRED architecture, but under Xen it makes handling PV
guests more complicated.  Specifically, it means that GS_BASE and GS_SHADOW
are the opposite way around in FRED mode, as opposed to IDT mode.

This leads to the following changes:

  * In load_segments(), we already load both GSes.  Account for FRED in the
    SWAP() condition and avoid the path with SWAGS.

  * In save_segments(), we need to read GS_SHADOW rather than GS_BASE.

  * In toggle_guest_mode(), we need to emulate SWAPGS.

  * In {read,write}_msr() which access the live registers, GS_SHADOW and
    GS_BASE need swapping.

  * In do_set_segment_base(), merge the SEGBASE_GS_{USER,KERNEL} cases and
    take FRED into account when choosing which base to update.

    SEGBASE_GS_USER_SEL was already an LKGS invocation (decades before FRED)
    so under FRED needs to be just a MOV %gs.  Simply skip the SWAPGSes.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>

v4.1:
 * Extra comments

v4:
 * Adjust GS accesses for emulated {RD,WR}MSR too.
---
 xen/arch/x86/domain.c             | 16 +++++++++++-----
 xen/arch/x86/pv/domain.c          | 22 ++++++++++++++++++++--
 xen/arch/x86/pv/emul-priv-op.c    | 26 +++++++++++++++++---------
 xen/arch/x86/pv/misc-hypercalls.c | 23 +++++++++++++++--------
 4 files changed, 63 insertions(+), 24 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index e658c2d647b7..9c1f6ef76d52 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1791,9 +1791,10 @@ static void load_segments(struct vcpu *n)
 
         /*
          * Figure out which way around gsb/gss want to be.  gsb needs to be
-         * the active context, and gss needs to be the inactive context.
+         * the active context, and gss needs to be the inactive context,
+         * unless we're in FRED mode where they're reversed.
          */
-        if ( !(n->arch.flags & TF_kernel_mode) )
+        if ( !(n->arch.flags & TF_kernel_mode) ^ opt_fred )
             SWAP(gsb, gss);
 
         if ( using_svm() && (n->arch.pv.fs | n->arch.pv.gs) <= 3 )
@@ -1814,7 +1815,9 @@ static void load_segments(struct vcpu *n)
 
     if ( !fs_gs_done && !compat )
     {
-        if ( read_cr4() & X86_CR4_FSGSBASE )
+        unsigned long cr4 = read_cr4();
+
+        if ( !(cr4 & X86_CR4_FRED) && (cr4 & X86_CR4_FSGSBASE) )
         {
             __wrgsbase(gss);
             __wrfsbase(n->arch.pv.fs_base);
@@ -1931,6 +1934,9 @@ static void load_segments(struct vcpu *n)
  * Guests however cannot use SWAPGS, so there is no mechanism to modify the
  * inactive GS base behind Xen's back.  Therefore, Xen's copy of the inactive
  * GS base is still accurate, and doesn't need reading back from hardware.
+ *
+ * Under FRED, hardware automatically swaps GS for us, so SHADOW_GS is the
+ * active GS from the guest's point of view.
  */
 static void save_segments(struct vcpu *v)
 {
@@ -1946,12 +1952,12 @@ static void save_segments(struct vcpu *v)
         if ( read_cr4() & X86_CR4_FSGSBASE )
         {
             fs_base = __rdfsbase();
-            gs_base = __rdgsbase();
+            gs_base = opt_fred ? rdmsr(MSR_SHADOW_GS_BASE) : __rdgsbase();
         }
         else
         {
             fs_base = rdmsr(MSR_FS_BASE);
-            gs_base = rdmsr(MSR_GS_BASE);
+            gs_base = opt_fred ? rdmsr(MSR_SHADOW_GS_BASE) : rdmsr(MSR_GS_BASE);
         }
 
         v->arch.pv.fs_base = fs_base;
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index d16583a7454d..b85abb5ed903 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -14,9 +14,10 @@
 #include <asm/cpufeature.h>
 #include <asm/fsgsbase.h>
 #include <asm/invpcid.h>
-#include <asm/spec_ctrl.h>
 #include <asm/pv/domain.h>
 #include <asm/shadow.h>
+#include <asm/spec_ctrl.h>
+#include <asm/traps.h>
 
 #ifdef CONFIG_PV32
 int8_t __read_mostly opt_pv32 = -1;
@@ -514,11 +515,28 @@ void toggle_guest_mode(struct vcpu *v)
      * subsequent context switch won't bother re-reading it.
      */
     gs_base = read_gs_base();
+
+    /*
+     * In FRED mode, not only are the two GSes the other way around (i.e. we
+     * want to read GS_SHADOW here), the SWAPGS instruction is disallowed so
+     * we have to emulate it.
+     */
+    if ( opt_fred )
+    {
+        unsigned long gs_shadow = rdmsr(MSR_SHADOW_GS_BASE);
+
+        wrmsrns(MSR_SHADOW_GS_BASE, gs_base);
+        write_gs_base(gs_shadow);
+
+        gs_base = gs_shadow;
+    }
+    else
+        asm volatile ( "swapgs" );
+
     if ( v->arch.flags & TF_kernel_mode )
         v->arch.pv.gs_base_kernel = gs_base;
     else
         v->arch.pv.gs_base_user = gs_base;
-    asm volatile ( "swapgs" );
 
     _toggle_guest_pt(v);
 
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 64d47ab677a4..53676b30219c 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -25,6 +25,7 @@
 #include <asm/pv/traps.h>
 #include <asm/shared.h>
 #include <asm/stubs.h>
+#include <asm/traps.h>
 
 #include <xsm/xsm.h>
 
@@ -926,7 +927,8 @@ static int cf_check read_msr(
     case MSR_GS_BASE:
         if ( !cp->extd.lm )
             break;
-        *val = read_gs_base();
+        /* Under FRED, GS is automatically swapped on privilege change. */
+        *val = opt_fred ? rdmsr(MSR_SHADOW_GS_BASE) : read_gs_base();
         return X86EMUL_OKAY;
 
     case MSR_SHADOW_GS_BASE:
@@ -1066,17 +1068,23 @@ static int cf_check write_msr(
         if ( !cp->extd.lm || !is_canonical_address(val) )
             break;
 
-        if ( reg == MSR_FS_BASE )
-            write_fs_base(val);
-        else if ( reg == MSR_GS_BASE )
-            write_gs_base(val);
-        else if ( reg == MSR_SHADOW_GS_BASE )
+        switch ( reg )
         {
-            write_gs_shadow(val);
+        case MSR_FS_BASE:
+            write_fs_base(val);
+            break;
+
+        case MSR_SHADOW_GS_BASE:
             curr->arch.pv.gs_base_user = val;
+            fallthrough;
+        case MSR_GS_BASE:
+            /* Under FRED, GS is automatically swapped on privilege change. */
+            if ( (reg == MSR_GS_BASE) ^ opt_fred )
+                write_gs_base(val);
+            else
+                write_gs_shadow(val);
+            break;
         }
-        else
-            ASSERT_UNREACHABLE();
         return X86EMUL_OKAY;
 
     case MSR_EFER:
diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c
index 4c2abeb4add8..7e915d86b724 100644
--- a/xen/arch/x86/pv/misc-hypercalls.c
+++ b/xen/arch/x86/pv/misc-hypercalls.c
@@ -11,6 +11,7 @@
 
 #include <asm/debugreg.h>
 #include <asm/fsgsbase.h>
+#include <asm/traps.h>
 
 long do_set_debugreg(int reg, unsigned long value)
 {
@@ -192,11 +193,13 @@ long do_set_segment_base(unsigned int which, unsigned long base)
 
         case SEGBASE_GS_USER:
             v->arch.pv.gs_base_user = base;
-            write_gs_shadow(base);
-            break;
-
+            fallthrough;
         case SEGBASE_GS_KERNEL:
-            write_gs_base(base);
+            /* Under FRED, GS is automatically swapped on privilege change. */
+            if ( (which == SEGBASE_GS_KERNEL) ^ opt_fred )
+                write_gs_base(base);
+            else
+                write_gs_shadow(base);
             break;
         }
         break;
@@ -206,10 +209,13 @@ long do_set_segment_base(unsigned int which, unsigned long base)
         unsigned int sel = (uint16_t)base;
 
         /*
-         * We wish to update the user %gs from the GDT/LDT.  Currently, the
-         * guest kernel's GS_BASE is in context.
+         * We wish to update the user %gs from the GDT/LDT.  Currently, we are
+         * in guest kernel context.
+         *
+         * Under IDT, this means updating GS_SHADOW.  Under FRED, plain GS.
          */
-        asm volatile ( "swapgs" );
+        if ( !opt_fred )
+            asm volatile ( "swapgs" );
 
         if ( sel > 3 )
             /* Fix up RPL for non-NUL selectors. */
@@ -247,7 +253,8 @@ long do_set_segment_base(unsigned int which, unsigned long base)
         /* Update the cache of the inactive base, as read from the GDT/LDT. */
         v->arch.pv.gs_base_user = read_gs_base();
 
-        asm volatile ( safe_swapgs );
+        if ( !opt_fred )
+            asm volatile ( safe_swapgs );
         break;
     }
 
-- 
2.39.5



  parent reply	other threads:[~2026-03-04 17:18 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-27 23:16 [PATCH v4 00/14] x86: FRED support Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 01/14] x86/pv: Don't assume that INT $imm8 instructions are two bytes long Andrew Cooper
2026-03-02 11:03   ` Jan Beulich
2026-03-02 11:43     ` Andrew Cooper
2026-03-02 12:57       ` Jan Beulich
2026-03-02 16:39         ` Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 02/14] docs/guest-guide: Describe the PV traps and entrypoints ABI Andrew Cooper
2026-03-02 11:19   ` Jan Beulich
2026-03-02 14:47     ` Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 03/14] x86/boot: Move gdt_l1e caching out of traps_init() Andrew Cooper
2026-03-02 11:33   ` Jan Beulich
2026-02-27 23:16 ` [PATCH v4 04/14] x86/boot: Document the ordering dependency of _svm_cpu_up() Andrew Cooper
2026-03-02 11:35   ` Jan Beulich
2026-03-02 15:20   ` Andrew Cooper
2026-03-02 15:34     ` Jan Beulich
2026-03-02 15:42       ` Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 05/14] x86/traps: Move traps_init() earlier on boot Andrew Cooper
2026-03-02 11:39   ` Jan Beulich
2026-03-02 15:32     ` Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 06/14] x86/traps: Don't configure Supervisor Shadow Stack tokens in FRED mode Andrew Cooper
2026-03-02 14:50   ` Jan Beulich
2026-03-02 15:47     ` Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 07/14] x86/traps: Introduce FRED entrypoints Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 08/14] x86/traps: Enable FRED when requested Andrew Cooper
2026-03-02 16:12   ` Jan Beulich
2026-03-03 13:44     ` Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 09/14] x86/pv: Adjust GS handling for FRED mode Andrew Cooper
2026-03-02 16:24   ` Jan Beulich
2026-03-04 17:18   ` Andrew Cooper [this message]
2026-03-05 10:00     ` [PATCH v4.1 " Jan Beulich
2026-02-27 23:16 ` [PATCH v4 10/14] x86/pv: Guest exception handling in " Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 11/14] x86/pv: ERETU error handling Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 12/14] x86/pv: System call handling in FRED mode Andrew Cooper
2026-03-09 22:25   ` Andrew Cooper
2026-03-10  7:16     ` Jan Beulich
2026-02-27 23:16 ` [PATCH v4 13/14] x86: Clamp reserved bits in eflags more aggressively Andrew Cooper
2026-03-02 16:35   ` Jan Beulich
2026-03-11 17:58   ` [PATCH v4.1 13/14] x86: Clamp " Andrew Cooper
2026-03-12  8:15     ` Jan Beulich
2026-03-12 12:36       ` Andrew Cooper
2026-02-27 23:16 ` [PATCH v4 14/14] x86/traps: Use fatal_trap() for #UD and #GP Andrew Cooper
2026-03-02 16:39   ` Jan Beulich
2026-03-02 16:40   ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260304171826.45847-1-andrew.cooper3@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=roger.pau@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.