xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Xen-devel <xen-devel@lists.xenproject.org>
Cc: "Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Jan Beulich" <jbeulich@suse.com>,
	"Jan Beulich" <JBeulich@suse.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>
Subject: [PATCH v3 18/22] x86/pv: Adjust GS handling for FRED mode
Date: Fri,  3 Oct 2025 23:53:30 +0100	[thread overview]
Message-ID: <20251003225334.2123667-19-andrew.cooper3@citrix.com> (raw)
In-Reply-To: <20251003225334.2123667-1-andrew.cooper3@citrix.com>

When FRED is active, hardware automatically swaps GS when changing privilege,
and the SWAPGS instruction is disallowed.

For native OSes using GS as the thread local pointer this is a massive
improvement on the pre-FRED architecture, but under Xen it makes handling PV
guests more complicated.  Specifically, it means that GS_BASE and GS_SHADOW
are the opposite way around in FRED mode, as opposed to IDT mode.

This leads to the following changes:

  * In load_segments(), we have to load both GSes.  Account for this in the
    SWAP() condition and avoid the path with SWAGS.

  * In save_segments(), we need to read GS_SHADOW rather than GS_BASE.

  * In toggle_guest_mode(), we need to emulate SWAPGS.

  * In do_set_segment_base(), merge the SEGBASE_GS_{USER,KERNEL} cases and
    take FRED into account when choosing which base to update.

    SEGBASE_GS_USER_SEL was already an LKGS invocation (decades before FRED)
    so under FRED needs to be just a MOV %gs.  Simply skip the SWAPGSes.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>

v3:
 * Rename things

v2:
 * New

I think this functions, but it's not ideal.  The conditions are asymmetric and
awkward.
---
 xen/arch/x86/domain.c             | 22 +++++++++++++++++-----
 xen/arch/x86/pv/domain.c          | 22 ++++++++++++++++++++--
 xen/arch/x86/pv/misc-hypercalls.c | 16 ++++++++++------
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 8089ff929bf7..ce08f91be3af 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1819,9 +1819,10 @@ static void load_segments(struct vcpu *n)
 
         /*
          * Figure out which way around gsb/gss want to be.  gsb needs to be
-         * the active context, and gss needs to be the inactive context.
+         * the active context, and gss needs to be the inactive context,
+         * unless we're in FRED mode where they're reversed.
          */
-        if ( !(n->arch.flags & TF_kernel_mode) )
+        if ( !(n->arch.flags & TF_kernel_mode) ^ opt_fred )
             SWAP(gsb, gss);
 
         if ( using_svm() && (n->arch.pv.fs | n->arch.pv.gs) <= 3 )
@@ -1842,7 +1843,9 @@ static void load_segments(struct vcpu *n)
 
     if ( !fs_gs_done && !compat )
     {
-        if ( read_cr4() & X86_CR4_FSGSBASE )
+        unsigned long cr4 = read_cr4();
+
+        if ( !(cr4 & X86_CR4_FRED) && (cr4 & X86_CR4_FSGSBASE) )
         {
             __wrgsbase(gss);
             __wrfsbase(n->arch.pv.fs_base);
@@ -1959,6 +1962,9 @@ static void load_segments(struct vcpu *n)
  * Guests however cannot use SWAPGS, so there is no mechanism to modify the
  * inactive GS base behind Xen's back.  Therefore, Xen's copy of the inactive
  * GS base is still accurate, and doesn't need reading back from hardware.
+ *
+ * Under FRED, hardware automatically swaps GS for us, so SHADOW_GS is the
+ * active GS from the guest's point of view.
  */
 static void save_segments(struct vcpu *v)
 {
@@ -1974,12 +1980,18 @@ static void save_segments(struct vcpu *v)
         if ( read_cr4() & X86_CR4_FSGSBASE )
         {
             fs_base = __rdfsbase();
-            gs_base = __rdgsbase();
+            if ( opt_fred )
+                gs_base = rdmsr(MSR_SHADOW_GS_BASE);
+            else
+                gs_base = __rdgsbase();
         }
         else
         {
             fs_base = rdmsr(MSR_FS_BASE);
-            gs_base = rdmsr(MSR_GS_BASE);
+            if ( opt_fred )
+                gs_base = rdmsr(MSR_SHADOW_GS_BASE);
+            else
+                gs_base = rdmsr(MSR_GS_BASE);
         }
 
         v->arch.pv.fs_base = fs_base;
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 9c4785c187dd..369af444c29b 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -14,9 +14,10 @@
 #include <asm/cpufeature.h>
 #include <asm/fsgsbase.h>
 #include <asm/invpcid.h>
-#include <asm/spec_ctrl.h>
 #include <asm/pv/domain.h>
 #include <asm/shadow.h>
+#include <asm/spec_ctrl.h>
+#include <asm/traps.h>
 
 #ifdef CONFIG_PV32
 int8_t __read_mostly opt_pv32 = -1;
@@ -480,11 +481,28 @@ void toggle_guest_mode(struct vcpu *v)
      * subsequent context switch won't bother re-reading it.
      */
     gs_base = read_gs_base();
+
+    /*
+     * In FRED mode, not only are the two GSes the other way around (i.e. we
+     * want to read GS_SHADOW here), the SWAPGS instruction is disallowed so
+     * we have to emulate it.
+     */
+    if ( opt_fred )
+    {
+        unsigned long gs_shadow = rdmsr(MSR_SHADOW_GS_BASE);
+
+        wrmsrns(MSR_SHADOW_GS_BASE, gs_base);
+        write_gs_base(gs_shadow);
+
+        gs_base = gs_shadow;
+    }
+    else
+        asm volatile ( "swapgs" );
+
     if ( v->arch.flags & TF_kernel_mode )
         v->arch.pv.gs_base_kernel = gs_base;
     else
         v->arch.pv.gs_base_user = gs_base;
-    asm volatile ( "swapgs" );
 
     _toggle_guest_pt(v);
 
diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c
index 4c2abeb4add8..2c9cf50638db 100644
--- a/xen/arch/x86/pv/misc-hypercalls.c
+++ b/xen/arch/x86/pv/misc-hypercalls.c
@@ -11,6 +11,7 @@
 
 #include <asm/debugreg.h>
 #include <asm/fsgsbase.h>
+#include <asm/traps.h>
 
 long do_set_debugreg(int reg, unsigned long value)
 {
@@ -192,11 +193,12 @@ long do_set_segment_base(unsigned int which, unsigned long base)
 
         case SEGBASE_GS_USER:
             v->arch.pv.gs_base_user = base;
-            write_gs_shadow(base);
-            break;
-
+            fallthrough;
         case SEGBASE_GS_KERNEL:
-            write_gs_base(base);
+            if ( (which == SEGBASE_GS_KERNEL) ^ opt_fred )
+                write_gs_base(base);
+            else
+                write_gs_shadow(base);
             break;
         }
         break;
@@ -209,7 +211,8 @@ long do_set_segment_base(unsigned int which, unsigned long base)
          * We wish to update the user %gs from the GDT/LDT.  Currently, the
          * guest kernel's GS_BASE is in context.
          */
-        asm volatile ( "swapgs" );
+        if ( !opt_fred )
+            asm volatile ( "swapgs" );
 
         if ( sel > 3 )
             /* Fix up RPL for non-NUL selectors. */
@@ -247,7 +250,8 @@ long do_set_segment_base(unsigned int which, unsigned long base)
         /* Update the cache of the inactive base, as read from the GDT/LDT. */
         v->arch.pv.gs_base_user = read_gs_base();
 
-        asm volatile ( safe_swapgs );
+        if ( !opt_fred )
+            asm volatile ( safe_swapgs );
         break;
     }
 
-- 
2.39.5



  parent reply	other threads:[~2025-10-03 22:56 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-03 22:53 [PATCH v3 for-4.21 00/22] x86: FRED support Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 01/22] x86/msr: Change rdmsr() to have normal API Andrew Cooper
2025-10-07 15:47   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 02/22] x86/msr: Change wrmsr() to take a single parameter Andrew Cooper
2025-10-04  0:11   ` Demi Marie Obenour
2025-10-04  0:14     ` Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 03/22] x86/fsgsbase: Split out __{rd,wr}gs_shadow() helpers Andrew Cooper
2025-10-07 15:49   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 04/22] x86/fsgsbase: Update fs/gs helpers to use wrmsrns() Andrew Cooper
2025-10-07 15:53   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 05/22] x86/fsgsbase: Improve code generation in read_registers() Andrew Cooper
2025-10-04  0:13   ` Demi Marie Obenour
2025-10-07 15:54   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 06/22] x86/boot: Use RSTORSSP to establish SSP Andrew Cooper
2025-10-07 15:57   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 07/22] x86/traps: Alter switch_stack_and_jump() for FRED mode Andrew Cooper
2025-10-07 15:58   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 08/22] x86/traps: Skip Supervisor Shadow Stack tokens in " Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 09/22] x86/traps: Make an IDT-specific #DB helper Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 10/22] x86/traps: Make an IDT-specific #PF helper Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 11/22] x86/fsgsbase: Make gskern accesses safe under FRED Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 12/22] x86/traps: Introduce FRED entrypoints Andrew Cooper
2025-10-08  8:50   ` Jan Beulich
2025-10-16 14:54   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 13/22] x86/traps: Enable FRED when requested Andrew Cooper
2025-10-08  8:54   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 14/22] x86/pv: Deduplicate is_canonical_address() in do_set_segment_base() Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 15/22] x86/entry: Alter how IRET faults are recognised Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 16/22] x86/entry: Drop the pre exception table infrastructure Andrew Cooper
2025-10-03 22:53 ` [PATCH v3 17/22] x86/entry: Rework the comment about SYSCALL and DF Andrew Cooper
2025-10-03 22:53 ` Andrew Cooper [this message]
2025-10-03 22:53 ` [PATCH v3 19/22] x86/pv: Guest exception handling in FRED mode Andrew Cooper
2025-10-08 12:28   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 20/22] x86/pv: ERETU error handling Andrew Cooper
2025-10-08 12:36   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 21/22] x86/pv: System call handling in FRED mode Andrew Cooper
2025-10-08 13:45   ` Jan Beulich
2025-10-03 22:53 ` [PATCH v3 22/22] x86: Clamp reserved bits in eflags more aggressively Andrew Cooper
2025-10-08 13:50   ` Jan Beulich
2025-10-17 13:24 ` [PATCH v3 for-4.21 00/22] x86: FRED support Oleksii Kurochko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251003225334.2123667-19-andrew.cooper3@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=roger.pau@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).