All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] x86/asm: .byte removal
@ 2025-12-30 13:54 Andrew Cooper
  2025-12-30 13:54 ` [PATCH 1/4] x86/xstate: Deindent the logic in xrstor() Andrew Cooper
                   ` (3 more replies)
  0 siblings, 4 replies; 21+ messages in thread
From: Andrew Cooper @ 2025-12-30 13:54 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné

The remaining conversion of .byte to real instructions following the toolchain
upgrade.

https://gitlab.com/xen-project/hardware/xen-staging/-/pipelines/2237162011

Andrew Cooper (4):
  x86/xstate: Deindent the logic in xrstor()
  x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  x86/i387: Rework fpu_fxrstor() given a newer toolchain baseline
  x86: Avoid using .byte for instructions where safe to do so

 xen/arch/x86/arch.mk                   |   4 +
 xen/arch/x86/i387.c                    |  65 ++++-----
 xen/arch/x86/include/asm/asm-defns.h   |   4 -
 xen/arch/x86/include/asm/msr.h         |   2 +
 xen/arch/x86/include/asm/prot-key.h    |   6 +-
 xen/arch/x86/include/asm/xstate.h      |   3 +-
 xen/arch/x86/x86_emulate/0f01.c        |   2 +-
 xen/arch/x86/x86_emulate/x86_emulate.c |  34 +++--
 xen/arch/x86/xstate.c                  | 176 ++++++++++++-------------
 9 files changed, 143 insertions(+), 153 deletions(-)

-- 
2.39.5



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 1/4] x86/xstate: Deindent the logic in xrstor()
  2025-12-30 13:54 [PATCH 0/4] x86/asm: .byte removal Andrew Cooper
@ 2025-12-30 13:54 ` Andrew Cooper
  2026-01-05 15:12   ` Jan Beulich
  2025-12-30 13:54 ` [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline Andrew Cooper
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 21+ messages in thread
From: Andrew Cooper @ 2025-12-30 13:54 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné

... to improve the legibility of the following fix.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>

Best reviewed with `git show --ignore-all-space`
---
 xen/arch/x86/xstate.c | 131 ++++++++++++++++++++++--------------------
 1 file changed, 68 insertions(+), 63 deletions(-)

diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index e990abc9d18c..384f78bd5281 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -407,19 +407,19 @@ void xrstor(struct vcpu *v, uint64_t mask)
      */
     for ( prev_faults = faults = 0; ; prev_faults = faults )
     {
-        switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
-        {
-            BUILD_BUG_ON(sizeof(faults) != 4); /* Clang doesn't support %z in asm. */
-#define _xrstor(insn) \
-        asm volatile ( "1: .byte " insn "\n" \
-                       "3:\n" \
-                       "   .section .fixup,\"ax\"\n" \
-                       "2: incl %[faults]\n" \
-                       "   jmp 3b\n" \
-                       "   .previous\n" \
-                       _ASM_EXTABLE(1b, 2b) \
-                       : [mem] "+m" (*ptr), [faults] "+g" (faults) \
-                       : [lmask] "a" (lmask), [hmask] "d" (hmask), \
+    switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
+    {
+        BUILD_BUG_ON(sizeof(faults) != 4); /* Clang doesn't support %z in asm. */
+#define _xrstor(insn)                                               \
+        asm volatile ( "1: .byte " insn "\n"                        \
+                       "3:\n"                                       \
+                       "   .section .fixup,\"ax\"\n"                \
+                       "2: incl %[faults]\n"                        \
+                       "   jmp 3b\n"                                \
+                       "   .previous\n"                             \
+                       _ASM_EXTABLE(1b, 2b)                         \
+                       : [mem] "+m" (*ptr), [faults] "+g" (faults)  \
+                       : [lmask] "a" (lmask), [hmask] "d" (hmask),  \
                          [ptr] "D" (ptr) )
 
 #define XRSTOR(pfx) \
@@ -437,62 +437,67 @@ void xrstor(struct vcpu *v, uint64_t mask)
         else \
             _xrstor(pfx "0x0f,0xae,0x2f") /* xrstor */
 
-        default:
-            XRSTOR("0x48,");
-            break;
-        case 4: case 2:
-            XRSTOR("");
-            break;
+    default:
+        XRSTOR("0x48,");
+        break;
+
+    case 4: case 2:
+        XRSTOR("");
+        break;
+
 #undef XRSTOR
 #undef _xrstor
-        }
-        if ( likely(faults == prev_faults) )
-            break;
+    }
+
+    if ( likely(faults == prev_faults) )
+        break;
+
 #ifndef NDEBUG
-        gprintk(XENLOG_WARNING, "fault#%u: mxcsr=%08x\n",
-                faults, ptr->fpu_sse.mxcsr);
-        gprintk(XENLOG_WARNING, "xs=%016lx xc=%016lx\n",
-                ptr->xsave_hdr.xstate_bv, ptr->xsave_hdr.xcomp_bv);
-        gprintk(XENLOG_WARNING, "r0=%016lx r1=%016lx\n",
-                ptr->xsave_hdr.reserved[0], ptr->xsave_hdr.reserved[1]);
-        gprintk(XENLOG_WARNING, "r2=%016lx r3=%016lx\n",
-                ptr->xsave_hdr.reserved[2], ptr->xsave_hdr.reserved[3]);
-        gprintk(XENLOG_WARNING, "r4=%016lx r5=%016lx\n",
-                ptr->xsave_hdr.reserved[4], ptr->xsave_hdr.reserved[5]);
+    gprintk(XENLOG_WARNING, "fault#%u: mxcsr=%08x\n",
+            faults, ptr->fpu_sse.mxcsr);
+    gprintk(XENLOG_WARNING, "xs=%016lx xc=%016lx\n",
+            ptr->xsave_hdr.xstate_bv, ptr->xsave_hdr.xcomp_bv);
+    gprintk(XENLOG_WARNING, "r0=%016lx r1=%016lx\n",
+            ptr->xsave_hdr.reserved[0], ptr->xsave_hdr.reserved[1]);
+    gprintk(XENLOG_WARNING, "r2=%016lx r3=%016lx\n",
+            ptr->xsave_hdr.reserved[2], ptr->xsave_hdr.reserved[3]);
+    gprintk(XENLOG_WARNING, "r4=%016lx r5=%016lx\n",
+            ptr->xsave_hdr.reserved[4], ptr->xsave_hdr.reserved[5]);
 #endif
-        switch ( faults )
+
+    switch ( faults )
+    {
+    case 1: /* Stage 1: Reset state to be loaded. */
+        ptr->xsave_hdr.xstate_bv &= ~mask;
+        /*
+         * Also try to eliminate fault reasons, even if this shouldn't be
+         * needed here (other code should ensure the sanity of the data).
+         */
+        if ( ((mask & X86_XCR0_SSE) ||
+              ((mask & X86_XCR0_YMM) &&
+               !(ptr->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED))) )
+            ptr->fpu_sse.mxcsr &= mxcsr_mask;
+        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )
         {
-        case 1: /* Stage 1: Reset state to be loaded. */
-            ptr->xsave_hdr.xstate_bv &= ~mask;
-            /*
-             * Also try to eliminate fault reasons, even if this shouldn't be
-             * needed here (other code should ensure the sanity of the data).
-             */
-            if ( ((mask & X86_XCR0_SSE) ||
-                  ((mask & X86_XCR0_YMM) &&
-                   !(ptr->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED))) )
-                ptr->fpu_sse.mxcsr &= mxcsr_mask;
-            if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )
-            {
-                ptr->xsave_hdr.xcomp_bv &= this_cpu(xcr0) | this_cpu(xss);
-                ptr->xsave_hdr.xstate_bv &= ptr->xsave_hdr.xcomp_bv;
-                ptr->xsave_hdr.xcomp_bv |= XSTATE_COMPACTION_ENABLED;
-            }
-            else
-            {
-                ptr->xsave_hdr.xstate_bv &= this_cpu(xcr0);
-                ptr->xsave_hdr.xcomp_bv = 0;
-            }
-            memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
-            continue;
-
-        case 2: /* Stage 2: Reset all state. */
-            ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
-            ptr->xsave_hdr.xstate_bv = 0;
-            ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
-                                      ? XSTATE_COMPACTION_ENABLED : 0;
-            continue;
+            ptr->xsave_hdr.xcomp_bv &= this_cpu(xcr0) | this_cpu(xss);
+            ptr->xsave_hdr.xstate_bv &= ptr->xsave_hdr.xcomp_bv;
+            ptr->xsave_hdr.xcomp_bv |= XSTATE_COMPACTION_ENABLED;
         }
+        else
+        {
+            ptr->xsave_hdr.xstate_bv &= this_cpu(xcr0);
+            ptr->xsave_hdr.xcomp_bv = 0;
+        }
+        memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
+        continue;
+
+    case 2: /* Stage 2: Reset all state. */
+        ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
+        ptr->xsave_hdr.xstate_bv = 0;
+        ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
+            ? XSTATE_COMPACTION_ENABLED : 0;
+        continue;
+    }
 
         domain_crash(current->domain);
         return;
-- 
2.39.5



^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  2025-12-30 13:54 [PATCH 0/4] x86/asm: .byte removal Andrew Cooper
  2025-12-30 13:54 ` [PATCH 1/4] x86/xstate: Deindent the logic in xrstor() Andrew Cooper
@ 2025-12-30 13:54 ` Andrew Cooper
  2026-01-02 16:01   ` Andrew Cooper
  2026-01-05 15:46   ` Jan Beulich
  2025-12-30 13:54 ` [PATCH 3/4] x86/i387: Rework fpu_fxrstor() " Andrew Cooper
  2025-12-30 13:54 ` [PATCH 4/4] x86: Avoid using .byte for instructions where safe to do so Andrew Cooper
  3 siblings, 2 replies; 21+ messages in thread
From: Andrew Cooper @ 2025-12-30 13:54 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné

The new toolchain baseline knows all the mnemonics, so a plain memory operand
can be used, rather than needing to hard-code the ModRM byte as (%rdi).

For xrstor(), use asm goto rather than hiding the increment of the faults
variable inside the .fixup section.  Remove the loop and replace it with a
goto retry pattern.  Put the domain_crash() into the default case for fault
handling, and provide a concrete error message rather than leaving it as an
exercise for extra code diving.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/xstate.c | 77 ++++++++++++++++++++-----------------------
 1 file changed, 36 insertions(+), 41 deletions(-)

diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 384f78bd5281..4215a83efefb 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
     uint32_t hmask = mask >> 32;
     uint32_t lmask = mask;
     unsigned int fip_width = v->domain->arch.x87_fip_width;
-#define XSAVE(pfx) \
-        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
-            asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
-                           : "=m" (*ptr) \
-                           : "a" (lmask), "d" (hmask), "D" (ptr) ); \
-        else \
-            alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
-                           ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
-                           X86_FEATURE_XSAVEOPT, \
-                           "=m" (*ptr), \
-                           "a" (lmask), "d" (hmask), "D" (ptr))
+
+#define XSAVE(pfx)                                                      \
+    if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )                      \
+        asm volatile ( "xsaves %0"                                      \
+                       : "=m" (*ptr)                                    \
+                       : "a" (lmask), "d" (hmask) );                    \
+    else                                                                \
+        alternative_io("xsave %0",                                      \
+                       "xsaveopt %0", X86_FEATURE_XSAVEOPT,             \
+                       "=m" (*ptr),                                     \
+                       "a" (lmask), "d" (hmask))
 
     if ( fip_width == 8 || !(mask & X86_XCR0_X87) )
     {
-        XSAVE("0x48,");
+        XSAVE("rex64 ");
     }
     else if ( fip_width == 4 )
     {
@@ -349,7 +349,7 @@ void xsave(struct vcpu *v, uint64_t mask)
 
         ptr->fpu_sse.fip.addr = bad_fip;
 
-        XSAVE("0x48,");
+        XSAVE("rex64 ");
 
         /* FIP/FDP not updated? Restore the old FIP value. */
         if ( ptr->fpu_sse.fip.addr == bad_fip )
@@ -384,7 +384,7 @@ void xrstor(struct vcpu *v, uint64_t mask)
     uint32_t hmask = mask >> 32;
     uint32_t lmask = mask;
     struct xsave_struct *ptr = v->arch.xsave_area;
-    unsigned int faults, prev_faults;
+    unsigned int faults = 0;
 
     /*
      * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
@@ -405,22 +405,15 @@ void xrstor(struct vcpu *v, uint64_t mask)
      * possibility, which may occur if the block was passed to us by control
      * tools or through VCPUOP_initialise, by silently adjusting state.
      */
-    for ( prev_faults = faults = 0; ; prev_faults = faults )
-    {
+ retry:
     switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
     {
-        BUILD_BUG_ON(sizeof(faults) != 4); /* Clang doesn't support %z in asm. */
-#define _xrstor(insn)                                               \
-        asm volatile ( "1: .byte " insn "\n"                        \
-                       "3:\n"                                       \
-                       "   .section .fixup,\"ax\"\n"                \
-                       "2: incl %[faults]\n"                        \
-                       "   jmp 3b\n"                                \
-                       "   .previous\n"                             \
-                       _ASM_EXTABLE(1b, 2b)                         \
-                       : [mem] "+m" (*ptr), [faults] "+g" (faults)  \
-                       : [lmask] "a" (lmask), [hmask] "d" (hmask),  \
-                         [ptr] "D" (ptr) )
+#define _xrstor(insn)                                       \
+        asm_inline volatile goto (                          \
+            "1: " insn " %0\n"                              \
+            _ASM_EXTABLE(1b, %l[fault])                     \
+            :: "m" (*ptr), "a" (lmask), "d" (hmask)         \
+            :: fault )
 
 #define XRSTOR(pfx) \
         if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
@@ -432,13 +425,13 @@ void xrstor(struct vcpu *v, uint64_t mask)
                 ptr->xsave_hdr.xcomp_bv = ptr->xsave_hdr.xstate_bv | \
                                           XSTATE_COMPACTION_ENABLED; \
             } \
-            _xrstor(pfx "0x0f,0xc7,0x1f"); /* xrstors */ \
+            _xrstor(pfx "xrstors"); \
         } \
         else \
-            _xrstor(pfx "0x0f,0xae,0x2f") /* xrstor */
+            _xrstor(pfx "xrstor")
 
     default:
-        XRSTOR("0x48,");
+        XRSTOR("rex64 ");
         break;
 
     case 4: case 2:
@@ -449,8 +442,10 @@ void xrstor(struct vcpu *v, uint64_t mask)
 #undef _xrstor
     }
 
-    if ( likely(faults == prev_faults) )
-        break;
+    return;
+
+ fault:
+    faults++;
 
 #ifndef NDEBUG
     gprintk(XENLOG_WARNING, "fault#%u: mxcsr=%08x\n",
@@ -489,17 +484,17 @@ void xrstor(struct vcpu *v, uint64_t mask)
             ptr->xsave_hdr.xcomp_bv = 0;
         }
         memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
-        continue;
+        goto retry;
 
     case 2: /* Stage 2: Reset all state. */
         ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
         ptr->xsave_hdr.xstate_bv = 0;
         ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
             ? XSTATE_COMPACTION_ENABLED : 0;
-        continue;
-    }
+        goto retry;
 
-        domain_crash(current->domain);
+    default: /* Stage 3: Nothing else to do. */
+        domain_crash(v->domain, "Uncorrectable XRSTOR fault\n");
         return;
     }
 }
@@ -1041,17 +1036,17 @@ uint64_t read_bndcfgu(void)
 
     if ( cpu_has_xsavec )
     {
-        asm ( ".byte 0x0f,0xc7,0x27\n" /* xsavec */
+        asm ( "xsavec %0"
               : "=m" (*xstate)
-              : "a" (X86_XCR0_BNDCSR), "d" (0), "D" (xstate) );
+              : "a" (X86_XCR0_BNDCSR), "d" (0) );
 
         bndcsr = (void *)(xstate + 1);
     }
     else
     {
-        asm ( ".byte 0x0f,0xae,0x27\n" /* xsave */
+        asm ( "xsave %0"
               : "=m" (*xstate)
-              : "a" (X86_XCR0_BNDCSR), "d" (0), "D" (xstate) );
+              : "a" (X86_XCR0_BNDCSR), "d" (0) );
 
         bndcsr = (void *)xstate + xstate_offsets[ilog2(X86_XCR0_BNDCSR)];
     }
-- 
2.39.5



^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 3/4] x86/i387: Rework fpu_fxrstor() given a newer toolchain baseline
  2025-12-30 13:54 [PATCH 0/4] x86/asm: .byte removal Andrew Cooper
  2025-12-30 13:54 ` [PATCH 1/4] x86/xstate: Deindent the logic in xrstor() Andrew Cooper
  2025-12-30 13:54 ` [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline Andrew Cooper
@ 2025-12-30 13:54 ` Andrew Cooper
  2026-01-05 15:52   ` Jan Beulich
  2025-12-30 13:54 ` [PATCH 4/4] x86: Avoid using .byte for instructions where safe to do so Andrew Cooper
  3 siblings, 1 reply; 21+ messages in thread
From: Andrew Cooper @ 2025-12-30 13:54 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné

Use asm goto rather than hiding a memset() in the fixup section.  With the
compiler now able to see the write into fpu_ctxt (as opposed to the asm
constraint erroneously stating it as input-only), it validly objects to the
pointer being const.

While FXRSTOR oughtn't to fault on an all-zeros input, avoid a risk of an
infinite loop entirely by using a fixup scheme similar to xrstor(), and
crashing the domain if we run out options.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/i387.c | 65 ++++++++++++++++++++-------------------------
 1 file changed, 29 insertions(+), 36 deletions(-)

diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index b84cd6f7a9e1..e0714ab2267d 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -38,7 +38,8 @@ static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
 /* Restore x87 FPU, MMX, SSE and SSE2 state */
 static inline void fpu_fxrstor(struct vcpu *v)
 {
-    const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
+    fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
+    unsigned int faults = 0;
 
     /*
      * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
@@ -59,49 +60,41 @@ static inline void fpu_fxrstor(struct vcpu *v)
      * possibility, which may occur if the block was passed to us by control
      * tools or through VCPUOP_initialise, by silently clearing the block.
      */
+ retry:
     switch ( __builtin_expect(fpu_ctxt->x[FPU_WORD_SIZE_OFFSET], 8) )
     {
     default:
-        asm_inline volatile (
+        asm_inline volatile goto (
             "1: fxrstorq %0\n"
-            ".section .fixup,\"ax\"   \n"
-            "2: push %%"__OP"ax       \n"
-            "   push %%"__OP"cx       \n"
-            "   push %%"__OP"di       \n"
-            "   lea  %0,%%"__OP"di    \n"
-            "   mov  %1,%%ecx         \n"
-            "   xor  %%eax,%%eax      \n"
-            "   rep ; stosl           \n"
-            "   pop  %%"__OP"di       \n"
-            "   pop  %%"__OP"cx       \n"
-            "   pop  %%"__OP"ax       \n"
-            "   jmp  1b               \n"
-            ".previous                \n"
-            _ASM_EXTABLE(1b, 2b)
-            :
-            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
+            _ASM_EXTABLE(1b, %l[fault])
+            :: "m" (*fpu_ctxt)
+            :: fault );
         break;
+
     case 4: case 2:
-        asm_inline volatile (
-            "1: fxrstor %0         \n"
-            ".section .fixup,\"ax\"\n"
-            "2: push %%"__OP"ax    \n"
-            "   push %%"__OP"cx    \n"
-            "   push %%"__OP"di    \n"
-            "   lea  %0,%%"__OP"di \n"
-            "   mov  %1,%%ecx      \n"
-            "   xor  %%eax,%%eax   \n"
-            "   rep ; stosl        \n"
-            "   pop  %%"__OP"di    \n"
-            "   pop  %%"__OP"cx    \n"
-            "   pop  %%"__OP"ax    \n"
-            "   jmp  1b            \n"
-            ".previous             \n"
-            _ASM_EXTABLE(1b, 2b)
-            :
-            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
+        asm_inline volatile goto (
+            "1: fxrstor %0\n"
+            _ASM_EXTABLE(1b, %l[fault])
+            :: "m" (*fpu_ctxt)
+            :: fault );
         break;
     }
+
+    return;
+
+ fault:
+    faults++;
+
+    switch ( faults )
+    {
+    case 1: /* Stage 1: Reset all state. */
+        memset(fpu_ctxt, 0, sizeof(*fpu_ctxt));
+        goto retry;
+
+    default: /* Stage 2: Nothing else to do. */
+        domain_crash(v->domain, "Uncorrectable FXRSTOR fault\n");
+        return;
+    }
 }
 
 /*******************************/
-- 
2.39.5



^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 4/4] x86: Avoid using .byte for instructions where safe to do so
  2025-12-30 13:54 [PATCH 0/4] x86/asm: .byte removal Andrew Cooper
                   ` (2 preceding siblings ...)
  2025-12-30 13:54 ` [PATCH 3/4] x86/i387: Rework fpu_fxrstor() " Andrew Cooper
@ 2025-12-30 13:54 ` Andrew Cooper
  2026-01-05 15:58   ` Jan Beulich
  3 siblings, 1 reply; 21+ messages in thread
From: Andrew Cooper @ 2025-12-30 13:54 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné

The new toolchain baseline knows all of these instructions.

For the remaining uses of .byte for instructions, annotate the toolchain
minima.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/arch.mk                   |  4 +++
 xen/arch/x86/include/asm/asm-defns.h   |  4 ---
 xen/arch/x86/include/asm/msr.h         |  2 ++
 xen/arch/x86/include/asm/prot-key.h    |  6 ++---
 xen/arch/x86/include/asm/xstate.h      |  3 +--
 xen/arch/x86/x86_emulate/0f01.c        |  2 +-
 xen/arch/x86/x86_emulate/x86_emulate.c | 34 ++++++++++++--------------
 7 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/xen/arch/x86/arch.mk b/xen/arch/x86/arch.mk
index 0203138a819a..3d8d9bfe4916 100644
--- a/xen/arch/x86/arch.mk
+++ b/xen/arch/x86/arch.mk
@@ -16,7 +16,11 @@ CFLAGS-$(CONFIG_CC_IS_GCC) += -malign-data=abi
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 $(call as-option-add,CFLAGS,CC,".equ \"x\"$(comma)1",-DHAVE_AS_QUOTED_SYM)
+
+# Binutils >= 2.31, Clang >= 7
 $(call as-option-add,CFLAGS,CC,"movdiri %rax$(comma)(%rax)",-DHAVE_AS_MOVDIR)
+
+# Binutils >= 2.33, Clang >= 9
 $(call as-option-add,CFLAGS,CC,"enqcmd (%rax)$(comma)%rax",-DHAVE_AS_ENQCMD)
 
 # Check to see whether the assembler supports the .nop directive.
diff --git a/xen/arch/x86/include/asm/asm-defns.h b/xen/arch/x86/include/asm/asm-defns.h
index 239dc3af096c..f6fe4596a852 100644
--- a/xen/arch/x86/include/asm/asm-defns.h
+++ b/xen/arch/x86/include/asm/asm-defns.h
@@ -1,9 +1,5 @@
 #include <asm/page-bits.h>
 
-.macro clzero
-    .byte 0x0f, 0x01, 0xfc
-.endm
-
 /* binutils >= 2.41 or LLVM >= 19 */
 .macro eretu
     .byte 0xf3, 0x0f, 0x01, 0xca
diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
index 941a7612f4ba..1377d156f4e1 100644
--- a/xen/arch/x86/include/asm/msr.h
+++ b/xen/arch/x86/include/asm/msr.h
@@ -63,6 +63,8 @@ static inline void wrmsrns(uint32_t msr, uint64_t val)
     /*
      * WRMSR is 2 bytes.  WRMSRNS is 3 bytes.  Pad WRMSR with a redundant CS
      * prefix to avoid a trailing NOP.
+     *
+     * Binutils >= 2.40, Clang >= 16
      */
     alternative_input(".byte 0x2e; wrmsr",
                       ".byte 0x0f,0x01,0xc6", X86_FEATURE_WRMSRNS,
diff --git a/xen/arch/x86/include/asm/prot-key.h b/xen/arch/x86/include/asm/prot-key.h
index 8fb15b5c32e9..1752756fd9c1 100644
--- a/xen/arch/x86/include/asm/prot-key.h
+++ b/xen/arch/x86/include/asm/prot-key.h
@@ -19,16 +19,14 @@ static inline uint32_t rdpkru(void)
 {
     uint32_t pkru;
 
-    asm volatile ( ".byte 0x0f,0x01,0xee"
-                   : "=a" (pkru) : "c" (0) : "dx" );
+    asm volatile ( "rdpkru" : "=a" (pkru) : "c" (0) : "dx" );
 
     return pkru;
 }
 
 static inline void wrpkru(uint32_t pkru)
 {
-    asm volatile ( ".byte 0x0f,0x01,0xef"
-                   :: "a" (pkru), "d" (0), "c" (0) );
+    asm volatile ( "wrpkru" :: "a" (pkru), "d" (0), "c" (0) );
 }
 
 /*
diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h
index e3b9745543d7..9cfee1fa9c5a 100644
--- a/xen/arch/x86/include/asm/xstate.h
+++ b/xen/arch/x86/include/asm/xstate.h
@@ -120,8 +120,7 @@ static inline uint64_t xgetbv(unsigned int index)
     uint32_t lo, hi;
 
     ASSERT(index); /* get_xcr0() should be used instead. */
-    asm volatile ( ".byte 0x0f,0x01,0xd0" /* xgetbv */
-                   : "=a" (lo), "=d" (hi) : "c" (index) );
+    asm volatile ( "xgetbv" : "=a" (lo), "=d" (hi) : "c" (index) );
 
     return lo | ((uint64_t)hi << 32);
 }
diff --git a/xen/arch/x86/x86_emulate/0f01.c b/xen/arch/x86/x86_emulate/0f01.c
index 1ba99609d6fd..4791465fc83f 100644
--- a/xen/arch/x86/x86_emulate/0f01.c
+++ b/xen/arch/x86/x86_emulate/0f01.c
@@ -122,7 +122,7 @@ int x86emul_0f01(struct x86_emulate_state *s,
         {
         case vex_none: /* serialize */
             host_and_vcpu_must_have(serialize);
-            asm volatile ( ".byte 0x0f, 0x01, 0xe8" );
+            asm volatile ( ".byte 0x0f, 0x01, 0xe8" ); /* Binutils >= 2.34, Clang >= 11 */
             break;
         case vex_f2: /* xsusldtrk */
             vcpu_must_have(tsxldtrk);
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index d830aea430d4..a3b7142fde7e 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4748,27 +4748,25 @@ x86_emulate(
                  */
                 if ( vex.l )
                 {
-                    /* vpxor %xmmN, %xmmN, %xmmN */
-                    asm volatile ( ".byte 0xc5,0xf9,0xef,0xc0" );
-                    asm volatile ( ".byte 0xc5,0xf1,0xef,0xc9" );
-                    asm volatile ( ".byte 0xc5,0xe9,0xef,0xd2" );
-                    asm volatile ( ".byte 0xc5,0xe1,0xef,0xdb" );
-                    asm volatile ( ".byte 0xc5,0xd9,0xef,0xe4" );
-                    asm volatile ( ".byte 0xc5,0xd1,0xef,0xed" );
-                    asm volatile ( ".byte 0xc5,0xc9,0xef,0xf6" );
-                    asm volatile ( ".byte 0xc5,0xc1,0xef,0xff" );
+                    asm volatile ( "vpxor %xmm0, %xmm0, %xmm0" );
+                    asm volatile ( "vpxor %xmm1, %xmm1, %xmm1" );
+                    asm volatile ( "vpxor %xmm2, %xmm2, %xmm2" );
+                    asm volatile ( "vpxor %xmm3, %xmm3, %xmm3" );
+                    asm volatile ( "vpxor %xmm4, %xmm4, %xmm4" );
+                    asm volatile ( "vpxor %xmm5, %xmm5, %xmm5" );
+                    asm volatile ( "vpxor %xmm6, %xmm6, %xmm6" );
+                    asm volatile ( "vpxor %xmm7, %xmm7, %xmm7" );
                 }
                 else
                 {
-                    /* vpor %xmmN, %xmmN, %xmmN */
-                    asm volatile ( ".byte 0xc5,0xf9,0xeb,0xc0" );
-                    asm volatile ( ".byte 0xc5,0xf1,0xeb,0xc9" );
-                    asm volatile ( ".byte 0xc5,0xe9,0xeb,0xd2" );
-                    asm volatile ( ".byte 0xc5,0xe1,0xeb,0xdb" );
-                    asm volatile ( ".byte 0xc5,0xd9,0xeb,0xe4" );
-                    asm volatile ( ".byte 0xc5,0xd1,0xeb,0xed" );
-                    asm volatile ( ".byte 0xc5,0xc9,0xeb,0xf6" );
-                    asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" );
+                    asm volatile ( "vpor %xmm0, %xmm0, %xmm0" );
+                    asm volatile ( "vpor %xmm1, %xmm1, %xmm1" );
+                    asm volatile ( "vpor %xmm2, %xmm2, %xmm2" );
+                    asm volatile ( "vpor %xmm3, %xmm3, %xmm3" );
+                    asm volatile ( "vpor %xmm4, %xmm4, %xmm4" );
+                    asm volatile ( "vpor %xmm5, %xmm5, %xmm5" );
+                    asm volatile ( "vpor %xmm6, %xmm6, %xmm6" );
+                    asm volatile ( "vpor %xmm7, %xmm7, %xmm7" );
                 }
 
                 ASSERT(!state->simd_size);
-- 
2.39.5



^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  2025-12-30 13:54 ` [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline Andrew Cooper
@ 2026-01-02 16:01   ` Andrew Cooper
  2026-01-05 15:16     ` Jan Beulich
  2026-01-05 15:46   ` Jan Beulich
  1 sibling, 1 reply; 21+ messages in thread
From: Andrew Cooper @ 2026-01-02 16:01 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné

On 30/12/2025 1:54 pm, Andrew Cooper wrote:
> diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
> index 384f78bd5281..4215a83efefb 100644
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>      uint32_t hmask = mask >> 32;
>      uint32_t lmask = mask;
>      unsigned int fip_width = v->domain->arch.x87_fip_width;
> -#define XSAVE(pfx) \
> -        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
> -            asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
> -                           : "=m" (*ptr) \
> -                           : "a" (lmask), "d" (hmask), "D" (ptr) ); \
> -        else \
> -            alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
> -                           ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
> -                           X86_FEATURE_XSAVEOPT, \
> -                           "=m" (*ptr), \
> -                           "a" (lmask), "d" (hmask), "D" (ptr))
> +
> +#define XSAVE(pfx)                                                      \
> +    if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )                      \
> +        asm volatile ( "xsaves %0"                                      \
> +                       : "=m" (*ptr)                                    \
> +                       : "a" (lmask), "d" (hmask) );                    \
> +    else                                                                \
> +        alternative_io("xsave %0",                                      \
> +                       "xsaveopt %0", X86_FEATURE_XSAVEOPT,             \
> +                       "=m" (*ptr),                                     \
> +                       "a" (lmask), "d" (hmask))

This loses the pfx.  I've fixed up locally and double checked the
disassembly.

~Andrew


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/4] x86/xstate: Deindent the logic in xrstor()
  2025-12-30 13:54 ` [PATCH 1/4] x86/xstate: Deindent the logic in xrstor() Andrew Cooper
@ 2026-01-05 15:12   ` Jan Beulich
  0 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2026-01-05 15:12 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Xen-devel

On 30.12.2025 14:54, Andrew Cooper wrote:
> ... to improve the legibility of the following fix.

... wanting to replace the for() with an open-coded form of a loop. Which
is fine, but could have done with saying here.

> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Acked-by: Jan Beulich <jbeulich@suse.com>

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  2026-01-02 16:01   ` Andrew Cooper
@ 2026-01-05 15:16     ` Jan Beulich
  2026-01-05 16:55       ` Andrew Cooper
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2026-01-05 15:16 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Xen-devel

On 02.01.2026 17:01, Andrew Cooper wrote:
> On 30/12/2025 1:54 pm, Andrew Cooper wrote:
>> --- a/xen/arch/x86/xstate.c
>> +++ b/xen/arch/x86/xstate.c
>> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>>      uint32_t hmask = mask >> 32;
>>      uint32_t lmask = mask;
>>      unsigned int fip_width = v->domain->arch.x87_fip_width;
>> -#define XSAVE(pfx) \
>> -        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>> -            asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
>> -                           : "=m" (*ptr) \
>> -                           : "a" (lmask), "d" (hmask), "D" (ptr) ); \
>> -        else \
>> -            alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>> -                           ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>> -                           X86_FEATURE_XSAVEOPT, \
>> -                           "=m" (*ptr), \
>> -                           "a" (lmask), "d" (hmask), "D" (ptr))
>> +
>> +#define XSAVE(pfx)                                                      \
>> +    if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )                      \
>> +        asm volatile ( "xsaves %0"                                      \
>> +                       : "=m" (*ptr)                                    \
>> +                       : "a" (lmask), "d" (hmask) );                    \
>> +    else                                                                \
>> +        alternative_io("xsave %0",                                      \
>> +                       "xsaveopt %0", X86_FEATURE_XSAVEOPT,             \
>> +                       "=m" (*ptr),                                     \
>> +                       "a" (lmask), "d" (hmask))
> 
> This loses the pfx.  I've fixed up locally and double checked the
> disassembly.

Question being: Do we want to stick to using the prefix form, when gas
specifically has been offering a kind-of-suffix form instead from the
very beginning (xsaves and xsaves64)?

If we wanted to stick to the prefixes, I'd favor a form where the use
sites don't need to supply the separating blank (i.e. the macro itself
would insert it, as doing do with an empty prefix results in merely
an indentation "issue" in the generated assembly). Thoughts?

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  2025-12-30 13:54 ` [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline Andrew Cooper
  2026-01-02 16:01   ` Andrew Cooper
@ 2026-01-05 15:46   ` Jan Beulich
  2026-01-05 17:45     ` Andrew Cooper
  1 sibling, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2026-01-05 15:46 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Xen-devel

On 30.12.2025 14:54, Andrew Cooper wrote:
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>      uint32_t hmask = mask >> 32;
>      uint32_t lmask = mask;
>      unsigned int fip_width = v->domain->arch.x87_fip_width;
> -#define XSAVE(pfx) \
> -        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
> -            asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
> -                           : "=m" (*ptr) \
> -                           : "a" (lmask), "d" (hmask), "D" (ptr) ); \
> -        else \
> -            alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
> -                           ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
> -                           X86_FEATURE_XSAVEOPT, \
> -                           "=m" (*ptr), \
> -                           "a" (lmask), "d" (hmask), "D" (ptr))
> +
> +#define XSAVE(pfx)                                                      \
> +    if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )                      \
> +        asm volatile ( "xsaves %0"                                      \
> +                       : "=m" (*ptr)                                    \
> +                       : "a" (lmask), "d" (hmask) );                    \
> +    else                                                                \
> +        alternative_io("xsave %0",                                      \
> +                       "xsaveopt %0", X86_FEATURE_XSAVEOPT,             \
> +                       "=m" (*ptr),                                     \
> +                       "a" (lmask), "d" (hmask))

While no doubt neater to read this way, there's a subtle latent issue here:
"m" doesn't exclude RIP-relative addressing, yet that addressing form can't
be used in replacement code (up and until we leverage your decode-lite to
actually be able to fix up the displacement). Sadly "o" as a constraint
doesn't look to be any different in this regard (I think it should be, as
adding a "small integer" may already bring the displacement beyond the
permitted range, but their definition of "offsettable" allows this).

This issue is latent until such time that (a) a caller appears passing in
the address of a Xen-internal variable and (b) we make LTO work again.
Since the breakage would be impossible to notice at build time, I think we
would be better off if we avoided it from the beginning. Which may mean
sacrificing on code gen, by using "r" and then "(%0)" as the insn operand.

> @@ -489,17 +484,17 @@ void xrstor(struct vcpu *v, uint64_t mask)
>              ptr->xsave_hdr.xcomp_bv = 0;
>          }
>          memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
> -        continue;
> +        goto retry;
>  
>      case 2: /* Stage 2: Reset all state. */
>          ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
>          ptr->xsave_hdr.xstate_bv = 0;
>          ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
>              ? XSTATE_COMPACTION_ENABLED : 0;
> -        continue;
> -    }
> +        goto retry;
>  
> -        domain_crash(current->domain);
> +    default: /* Stage 3: Nothing else to do. */
> +        domain_crash(v->domain, "Uncorrectable XRSTOR fault\n");
>          return;

There's an unexplained change here as to which domain is being crashed.
You switch to crashing the subject domain, yet if that's not also the
requester, it isn't "guilty" in causing the observed fault.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/4] x86/i387: Rework fpu_fxrstor() given a newer toolchain baseline
  2025-12-30 13:54 ` [PATCH 3/4] x86/i387: Rework fpu_fxrstor() " Andrew Cooper
@ 2026-01-05 15:52   ` Jan Beulich
  2026-01-05 16:13     ` Nicola Vetrini
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2026-01-05 15:52 UTC (permalink / raw)
  To: Andrew Cooper, Nicola Vetrini; +Cc: Roger Pau Monné, Xen-devel

On 30.12.2025 14:54, Andrew Cooper wrote:
> Use asm goto rather than hiding a memset() in the fixup section.  With the
> compiler now able to see the write into fpu_ctxt (as opposed to the asm
> constraint erroneously stating it as input-only), it validly objects to the
> pointer being const.
> 
> While FXRSTOR oughtn't to fault on an all-zeros input, avoid a risk of an
> infinite loop entirely by using a fixup scheme similar to xrstor(), and
> crashing the domain if we run out options.

Question being - does ...

> --- a/xen/arch/x86/i387.c
> +++ b/xen/arch/x86/i387.c
> @@ -38,7 +38,8 @@ static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
>  /* Restore x87 FPU, MMX, SSE and SSE2 state */
>  static inline void fpu_fxrstor(struct vcpu *v)
>  {
> -    const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
> +    fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
> +    unsigned int faults = 0;
>  
>      /*
>       * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
> @@ -59,49 +60,41 @@ static inline void fpu_fxrstor(struct vcpu *v)
>       * possibility, which may occur if the block was passed to us by control
>       * tools or through VCPUOP_initialise, by silently clearing the block.
>       */
> + retry:
>      switch ( __builtin_expect(fpu_ctxt->x[FPU_WORD_SIZE_OFFSET], 8) )
>      {
>      default:
> -        asm_inline volatile (
> +        asm_inline volatile goto (
>              "1: fxrstorq %0\n"
> -            ".section .fixup,\"ax\"   \n"
> -            "2: push %%"__OP"ax       \n"
> -            "   push %%"__OP"cx       \n"
> -            "   push %%"__OP"di       \n"
> -            "   lea  %0,%%"__OP"di    \n"
> -            "   mov  %1,%%ecx         \n"
> -            "   xor  %%eax,%%eax      \n"
> -            "   rep ; stosl           \n"
> -            "   pop  %%"__OP"di       \n"
> -            "   pop  %%"__OP"cx       \n"
> -            "   pop  %%"__OP"ax       \n"
> -            "   jmp  1b               \n"
> -            ".previous                \n"
> -            _ASM_EXTABLE(1b, 2b)
> -            :
> -            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
> +            _ASM_EXTABLE(1b, %l[fault])
> +            :: "m" (*fpu_ctxt)
> +            :: fault );
>          break;
> +
>      case 4: case 2:
> -        asm_inline volatile (
> -            "1: fxrstor %0         \n"
> -            ".section .fixup,\"ax\"\n"
> -            "2: push %%"__OP"ax    \n"
> -            "   push %%"__OP"cx    \n"
> -            "   push %%"__OP"di    \n"
> -            "   lea  %0,%%"__OP"di \n"
> -            "   mov  %1,%%ecx      \n"
> -            "   xor  %%eax,%%eax   \n"
> -            "   rep ; stosl        \n"
> -            "   pop  %%"__OP"di    \n"
> -            "   pop  %%"__OP"cx    \n"
> -            "   pop  %%"__OP"ax    \n"
> -            "   jmp  1b            \n"
> -            ".previous             \n"
> -            _ASM_EXTABLE(1b, 2b)
> -            :
> -            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
> +        asm_inline volatile goto (
> +            "1: fxrstor %0\n"
> +            _ASM_EXTABLE(1b, %l[fault])
> +            :: "m" (*fpu_ctxt)
> +            :: fault );
>          break;
>      }
> +
> +    return;
> +
> + fault:
> +    faults++;
> +
> +    switch ( faults )
> +    {
> +    case 1: /* Stage 1: Reset all state. */
> +        memset(fpu_ctxt, 0, sizeof(*fpu_ctxt));
> +        goto retry;
> +
> +    default: /* Stage 2: Nothing else to do. */
> +        domain_crash(v->domain, "Uncorrectable FXRSTOR fault\n");
> +        return;

... this then count as unreachable and/or dead code in Misra's terms? Nicola?
Sure, Eclair wouldn't be able to spot it, but that's no excuse imo.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 4/4] x86: Avoid using .byte for instructions where safe to do so
  2025-12-30 13:54 ` [PATCH 4/4] x86: Avoid using .byte for instructions where safe to do so Andrew Cooper
@ 2026-01-05 15:58   ` Jan Beulich
  2026-01-08 21:14     ` Andrew Cooper
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2026-01-05 15:58 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Xen-devel

On 30.12.2025 14:54, Andrew Cooper wrote:
> --- a/xen/arch/x86/include/asm/asm-defns.h
> +++ b/xen/arch/x86/include/asm/asm-defns.h
> @@ -1,9 +1,5 @@
>  #include <asm/page-bits.h>
>  
> -.macro clzero
> -    .byte 0x0f, 0x01, 0xfc
> -.endm

This can't go away yet, as it became known to gas only in 2.26.

> --- a/xen/arch/x86/include/asm/prot-key.h
> +++ b/xen/arch/x86/include/asm/prot-key.h
> @@ -19,16 +19,14 @@ static inline uint32_t rdpkru(void)
>  {
>      uint32_t pkru;
>  
> -    asm volatile ( ".byte 0x0f,0x01,0xee"
> -                   : "=a" (pkru) : "c" (0) : "dx" );
> +    asm volatile ( "rdpkru" : "=a" (pkru) : "c" (0) : "dx" );
>  
>      return pkru;
>  }
>  
>  static inline void wrpkru(uint32_t pkru)
>  {
> -    asm volatile ( ".byte 0x0f,0x01,0xef"
> -                   :: "a" (pkru), "d" (0), "c" (0) );
> +    asm volatile ( "wrpkru" :: "a" (pkru), "d" (0), "c" (0) );
>  }

Same for both of these.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/4] x86/i387: Rework fpu_fxrstor() given a newer toolchain baseline
  2026-01-05 15:52   ` Jan Beulich
@ 2026-01-05 16:13     ` Nicola Vetrini
  2026-01-05 16:39       ` Andrew Cooper
  0 siblings, 1 reply; 21+ messages in thread
From: Nicola Vetrini @ 2026-01-05 16:13 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Roger Pau Monné, Xen-devel

On 2026-01-05 16:52, Jan Beulich wrote:
> On 30.12.2025 14:54, Andrew Cooper wrote:
>> Use asm goto rather than hiding a memset() in the fixup section.  With 
>> the
>> compiler now able to see the write into fpu_ctxt (as opposed to the 
>> asm
>> constraint erroneously stating it as input-only), it validly objects 
>> to the
>> pointer being const.
>> 
>> While FXRSTOR oughtn't to fault on an all-zeros input, avoid a risk of 
>> an
>> infinite loop entirely by using a fixup scheme similar to xrstor(), 
>> and
>> crashing the domain if we run out options.
> 
> Question being - does ...
> 
>> --- a/xen/arch/x86/i387.c
>> +++ b/xen/arch/x86/i387.c
>> @@ -38,7 +38,8 @@ static inline void fpu_xrstor(struct vcpu *v, 
>> uint64_t mask)
>>  /* Restore x87 FPU, MMX, SSE and SSE2 state */
>>  static inline void fpu_fxrstor(struct vcpu *v)
>>  {
>> -    const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
>> +    fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
>> +    unsigned int faults = 0;
>> 
>>      /*
>>       * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
>> @@ -59,49 +60,41 @@ static inline void fpu_fxrstor(struct vcpu *v)
>>       * possibility, which may occur if the block was passed to us by 
>> control
>>       * tools or through VCPUOP_initialise, by silently clearing the 
>> block.
>>       */
>> + retry:
>>      switch ( __builtin_expect(fpu_ctxt->x[FPU_WORD_SIZE_OFFSET], 8) )
>>      {
>>      default:
>> -        asm_inline volatile (
>> +        asm_inline volatile goto (
>>              "1: fxrstorq %0\n"
>> -            ".section .fixup,\"ax\"   \n"
>> -            "2: push %%"__OP"ax       \n"
>> -            "   push %%"__OP"cx       \n"
>> -            "   push %%"__OP"di       \n"
>> -            "   lea  %0,%%"__OP"di    \n"
>> -            "   mov  %1,%%ecx         \n"
>> -            "   xor  %%eax,%%eax      \n"
>> -            "   rep ; stosl           \n"
>> -            "   pop  %%"__OP"di       \n"
>> -            "   pop  %%"__OP"cx       \n"
>> -            "   pop  %%"__OP"ax       \n"
>> -            "   jmp  1b               \n"
>> -            ".previous                \n"
>> -            _ASM_EXTABLE(1b, 2b)
>> -            :
>> -            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
>> +            _ASM_EXTABLE(1b, %l[fault])
>> +            :: "m" (*fpu_ctxt)
>> +            :: fault );
>>          break;
>> +
>>      case 4: case 2:
>> -        asm_inline volatile (
>> -            "1: fxrstor %0         \n"
>> -            ".section .fixup,\"ax\"\n"
>> -            "2: push %%"__OP"ax    \n"
>> -            "   push %%"__OP"cx    \n"
>> -            "   push %%"__OP"di    \n"
>> -            "   lea  %0,%%"__OP"di \n"
>> -            "   mov  %1,%%ecx      \n"
>> -            "   xor  %%eax,%%eax   \n"
>> -            "   rep ; stosl        \n"
>> -            "   pop  %%"__OP"di    \n"
>> -            "   pop  %%"__OP"cx    \n"
>> -            "   pop  %%"__OP"ax    \n"
>> -            "   jmp  1b            \n"
>> -            ".previous             \n"
>> -            _ASM_EXTABLE(1b, 2b)
>> -            :
>> -            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
>> +        asm_inline volatile goto (
>> +            "1: fxrstor %0\n"
>> +            _ASM_EXTABLE(1b, %l[fault])
>> +            :: "m" (*fpu_ctxt)
>> +            :: fault );
>>          break;
>>      }
>> +
>> +    return;
>> +
>> + fault:
>> +    faults++;
>> +
>> +    switch ( faults )
>> +    {
>> +    case 1: /* Stage 1: Reset all state. */
>> +        memset(fpu_ctxt, 0, sizeof(*fpu_ctxt));
>> +        goto retry;
>> +
>> +    default: /* Stage 2: Nothing else to do. */
>> +        domain_crash(v->domain, "Uncorrectable FXRSTOR fault\n");
>> +        return;
> 
> ... this then count as unreachable and/or dead code in Misra's terms? 
> Nicola?
> Sure, Eclair wouldn't be able to spot it, but that's no excuse imo.
> 
> Jan

Right now, probably not, but even if it did, an ASSERT_UNREACHABLE can 
be added in the default branch to deal with that.

-- 
Nicola Vetrini, B.Sc.
Software Engineer
BUGSENG (https://bugseng.com)
LinkedIn: https://www.linkedin.com/in/nicola-vetrini-a42471253


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/4] x86/i387: Rework fpu_fxrstor() given a newer toolchain baseline
  2026-01-05 16:13     ` Nicola Vetrini
@ 2026-01-05 16:39       ` Andrew Cooper
  2026-01-05 16:48         ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Cooper @ 2026-01-05 16:39 UTC (permalink / raw)
  To: Nicola Vetrini, Jan Beulich
  Cc: Andrew Cooper, Roger Pau Monné, Xen-devel

On 05/01/2026 4:13 pm, Nicola Vetrini wrote:
> On 2026-01-05 16:52, Jan Beulich wrote:
>> On 30.12.2025 14:54, Andrew Cooper wrote:
>>> Use asm goto rather than hiding a memset() in the fixup section. 
>>> With the
>>> compiler now able to see the write into fpu_ctxt (as opposed to the asm
>>> constraint erroneously stating it as input-only), it validly objects
>>> to the
>>> pointer being const.
>>>
>>> While FXRSTOR oughtn't to fault on an all-zeros input, avoid a risk
>>> of an
>>> infinite loop entirely by using a fixup scheme similar to xrstor(), and
>>> crashing the domain if we run out options.
>>
>> Question being - does ...
>>
>>> --- a/xen/arch/x86/i387.c
>>> +++ b/xen/arch/x86/i387.c
>>> @@ -38,7 +38,8 @@ static inline void fpu_xrstor(struct vcpu *v,
>>> uint64_t mask)
>>>  /* Restore x87 FPU, MMX, SSE and SSE2 state */
>>>  static inline void fpu_fxrstor(struct vcpu *v)
>>>  {
>>> -    const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
>>> +    fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
>>> +    unsigned int faults = 0;
>>>
>>>      /*
>>>       * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
>>> @@ -59,49 +60,41 @@ static inline void fpu_fxrstor(struct vcpu *v)
>>>       * possibility, which may occur if the block was passed to us
>>> by control
>>>       * tools or through VCPUOP_initialise, by silently clearing the
>>> block.
>>>       */
>>> + retry:
>>>      switch ( __builtin_expect(fpu_ctxt->x[FPU_WORD_SIZE_OFFSET], 8) )
>>>      {
>>>      default:
>>> -        asm_inline volatile (
>>> +        asm_inline volatile goto (
>>>              "1: fxrstorq %0\n"
>>> -            ".section .fixup,\"ax\"   \n"
>>> -            "2: push %%"__OP"ax       \n"
>>> -            "   push %%"__OP"cx       \n"
>>> -            "   push %%"__OP"di       \n"
>>> -            "   lea  %0,%%"__OP"di    \n"
>>> -            "   mov  %1,%%ecx         \n"
>>> -            "   xor  %%eax,%%eax      \n"
>>> -            "   rep ; stosl           \n"
>>> -            "   pop  %%"__OP"di       \n"
>>> -            "   pop  %%"__OP"cx       \n"
>>> -            "   pop  %%"__OP"ax       \n"
>>> -            "   jmp  1b               \n"
>>> -            ".previous                \n"
>>> -            _ASM_EXTABLE(1b, 2b)
>>> -            :
>>> -            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
>>> +            _ASM_EXTABLE(1b, %l[fault])
>>> +            :: "m" (*fpu_ctxt)
>>> +            :: fault );
>>>          break;
>>> +
>>>      case 4: case 2:
>>> -        asm_inline volatile (
>>> -            "1: fxrstor %0         \n"
>>> -            ".section .fixup,\"ax\"\n"
>>> -            "2: push %%"__OP"ax    \n"
>>> -            "   push %%"__OP"cx    \n"
>>> -            "   push %%"__OP"di    \n"
>>> -            "   lea  %0,%%"__OP"di \n"
>>> -            "   mov  %1,%%ecx      \n"
>>> -            "   xor  %%eax,%%eax   \n"
>>> -            "   rep ; stosl        \n"
>>> -            "   pop  %%"__OP"di    \n"
>>> -            "   pop  %%"__OP"cx    \n"
>>> -            "   pop  %%"__OP"ax    \n"
>>> -            "   jmp  1b            \n"
>>> -            ".previous             \n"
>>> -            _ASM_EXTABLE(1b, 2b)
>>> -            :
>>> -            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
>>> +        asm_inline volatile goto (
>>> +            "1: fxrstor %0\n"
>>> +            _ASM_EXTABLE(1b, %l[fault])
>>> +            :: "m" (*fpu_ctxt)
>>> +            :: fault );
>>>          break;
>>>      }
>>> +
>>> +    return;
>>> +
>>> + fault:
>>> +    faults++;
>>> +
>>> +    switch ( faults )
>>> +    {
>>> +    case 1: /* Stage 1: Reset all state. */
>>> +        memset(fpu_ctxt, 0, sizeof(*fpu_ctxt));
>>> +        goto retry;
>>> +
>>> +    default: /* Stage 2: Nothing else to do. */
>>> +        domain_crash(v->domain, "Uncorrectable FXRSTOR fault\n");
>>> +        return;
>>
>> ... this then count as unreachable and/or dead code in Misra's terms?
>> Nicola?
>> Sure, Eclair wouldn't be able to spot it, but that's no excuse imo.
>>
>> Jan
>
> Right now, probably not, but even if it did, an ASSERT_UNREACHABLE can
> be added in the default branch to deal with that.

It's fully reachable.

FXRSTOR can fault multiple times, and can fault for reasons unrelated to
the contents of the buffer.  Fault recovery isn't even limited to only
#GP[0] only, and FXRSTOR can suffer #AC from a misaligned pointer.

If Xen is operating properly, it oughtn't to fault more than once, but
right now the logic will livelock rather than terminate.

Further fixes being discussed (better auditing of toolstack-provided
buffers) should cause it never to fault for buffer-contents reasons, at
which point I'll be removing the retry aspect and escalating to
domain_crash() unconditionally.

~Andrew


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/4] x86/i387: Rework fpu_fxrstor() given a newer toolchain baseline
  2026-01-05 16:39       ` Andrew Cooper
@ 2026-01-05 16:48         ` Jan Beulich
  0 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2026-01-05 16:48 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Xen-devel, Nicola Vetrini

On 05.01.2026 17:39, Andrew Cooper wrote:
> On 05/01/2026 4:13 pm, Nicola Vetrini wrote:
>> On 2026-01-05 16:52, Jan Beulich wrote:
>>> On 30.12.2025 14:54, Andrew Cooper wrote:
>>>> Use asm goto rather than hiding a memset() in the fixup section. 
>>>> With the
>>>> compiler now able to see the write into fpu_ctxt (as opposed to the asm
>>>> constraint erroneously stating it as input-only), it validly objects
>>>> to the
>>>> pointer being const.
>>>>
>>>> While FXRSTOR oughtn't to fault on an all-zeros input, avoid a risk
>>>> of an
>>>> infinite loop entirely by using a fixup scheme similar to xrstor(), and
>>>> crashing the domain if we run out options.
>>>
>>> Question being - does ...
>>>
>>>> --- a/xen/arch/x86/i387.c
>>>> +++ b/xen/arch/x86/i387.c
>>>> @@ -38,7 +38,8 @@ static inline void fpu_xrstor(struct vcpu *v,
>>>> uint64_t mask)
>>>>  /* Restore x87 FPU, MMX, SSE and SSE2 state */
>>>>  static inline void fpu_fxrstor(struct vcpu *v)
>>>>  {
>>>> -    const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
>>>> +    fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
>>>> +    unsigned int faults = 0;
>>>>
>>>>      /*
>>>>       * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
>>>> @@ -59,49 +60,41 @@ static inline void fpu_fxrstor(struct vcpu *v)
>>>>       * possibility, which may occur if the block was passed to us
>>>> by control
>>>>       * tools or through VCPUOP_initialise, by silently clearing the
>>>> block.
>>>>       */
>>>> + retry:
>>>>      switch ( __builtin_expect(fpu_ctxt->x[FPU_WORD_SIZE_OFFSET], 8) )
>>>>      {
>>>>      default:
>>>> -        asm_inline volatile (
>>>> +        asm_inline volatile goto (
>>>>              "1: fxrstorq %0\n"
>>>> -            ".section .fixup,\"ax\"   \n"
>>>> -            "2: push %%"__OP"ax       \n"
>>>> -            "   push %%"__OP"cx       \n"
>>>> -            "   push %%"__OP"di       \n"
>>>> -            "   lea  %0,%%"__OP"di    \n"
>>>> -            "   mov  %1,%%ecx         \n"
>>>> -            "   xor  %%eax,%%eax      \n"
>>>> -            "   rep ; stosl           \n"
>>>> -            "   pop  %%"__OP"di       \n"
>>>> -            "   pop  %%"__OP"cx       \n"
>>>> -            "   pop  %%"__OP"ax       \n"
>>>> -            "   jmp  1b               \n"
>>>> -            ".previous                \n"
>>>> -            _ASM_EXTABLE(1b, 2b)
>>>> -            :
>>>> -            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
>>>> +            _ASM_EXTABLE(1b, %l[fault])
>>>> +            :: "m" (*fpu_ctxt)
>>>> +            :: fault );
>>>>          break;
>>>> +
>>>>      case 4: case 2:
>>>> -        asm_inline volatile (
>>>> -            "1: fxrstor %0         \n"
>>>> -            ".section .fixup,\"ax\"\n"
>>>> -            "2: push %%"__OP"ax    \n"
>>>> -            "   push %%"__OP"cx    \n"
>>>> -            "   push %%"__OP"di    \n"
>>>> -            "   lea  %0,%%"__OP"di \n"
>>>> -            "   mov  %1,%%ecx      \n"
>>>> -            "   xor  %%eax,%%eax   \n"
>>>> -            "   rep ; stosl        \n"
>>>> -            "   pop  %%"__OP"di    \n"
>>>> -            "   pop  %%"__OP"cx    \n"
>>>> -            "   pop  %%"__OP"ax    \n"
>>>> -            "   jmp  1b            \n"
>>>> -            ".previous             \n"
>>>> -            _ASM_EXTABLE(1b, 2b)
>>>> -            :
>>>> -            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
>>>> +        asm_inline volatile goto (
>>>> +            "1: fxrstor %0\n"
>>>> +            _ASM_EXTABLE(1b, %l[fault])
>>>> +            :: "m" (*fpu_ctxt)
>>>> +            :: fault );
>>>>          break;
>>>>      }
>>>> +
>>>> +    return;
>>>> +
>>>> + fault:
>>>> +    faults++;
>>>> +
>>>> +    switch ( faults )
>>>> +    {
>>>> +    case 1: /* Stage 1: Reset all state. */
>>>> +        memset(fpu_ctxt, 0, sizeof(*fpu_ctxt));
>>>> +        goto retry;
>>>> +
>>>> +    default: /* Stage 2: Nothing else to do. */
>>>> +        domain_crash(v->domain, "Uncorrectable FXRSTOR fault\n");
>>>> +        return;
>>>
>>> ... this then count as unreachable and/or dead code in Misra's terms?
>>> Nicola?
>>> Sure, Eclair wouldn't be able to spot it, but that's no excuse imo.
>>
>> Right now, probably not, but even if it did, an ASSERT_UNREACHABLE can
>> be added in the default branch to deal with that.
> 
> It's fully reachable.
> 
> FXRSTOR can fault multiple times, and can fault for reasons unrelated to
> the contents of the buffer.  Fault recovery isn't even limited to only
> #GP[0] only, and FXRSTOR can suffer #AC from a misaligned pointer.

None of these faults are what we mean to recover from here. Faults
unrelated to buffer contents would pretty likely occur on the memset()
as well.

As to #AC - in ring 3, but not in ring 0 (where Xen runs)?

> If Xen is operating properly, it oughtn't to fault more than once, but
> right now the logic will livelock rather than terminate.

s/will/would/ as that's only hypothetical (assuming no other bugs).

> Further fixes being discussed (better auditing of toolstack-provided
> buffers) should cause it never to fault for buffer-contents reasons, at
> which point I'll be removing the retry aspect and escalating to
> domain_crash() unconditionally.

Still in the meantime I think Nicola's suggestion should be taken
and ASSERT_UNREACHABLE() be added. Then
Acked-by: Jan Beulich <jbeulich@suse.com>

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  2026-01-05 15:16     ` Jan Beulich
@ 2026-01-05 16:55       ` Andrew Cooper
  2026-01-05 17:06         ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Cooper @ 2026-01-05 16:55 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Roger Pau Monné, Xen-devel

On 05/01/2026 3:16 pm, Jan Beulich wrote:
> On 02.01.2026 17:01, Andrew Cooper wrote:
>> On 30/12/2025 1:54 pm, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/xstate.c
>>> +++ b/xen/arch/x86/xstate.c
>>> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>>>      uint32_t hmask = mask >> 32;
>>>      uint32_t lmask = mask;
>>>      unsigned int fip_width = v->domain->arch.x87_fip_width;
>>> -#define XSAVE(pfx) \
>>> -        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>>> -            asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
>>> -                           : "=m" (*ptr) \
>>> -                           : "a" (lmask), "d" (hmask), "D" (ptr) ); \
>>> -        else \
>>> -            alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>>> -                           ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>>> -                           X86_FEATURE_XSAVEOPT, \
>>> -                           "=m" (*ptr), \
>>> -                           "a" (lmask), "d" (hmask), "D" (ptr))
>>> +
>>> +#define XSAVE(pfx)                                                      \
>>> +    if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )                      \
>>> +        asm volatile ( "xsaves %0"                                      \
>>> +                       : "=m" (*ptr)                                    \
>>> +                       : "a" (lmask), "d" (hmask) );                    \
>>> +    else                                                                \
>>> +        alternative_io("xsave %0",                                      \
>>> +                       "xsaveopt %0", X86_FEATURE_XSAVEOPT,             \
>>> +                       "=m" (*ptr),                                     \
>>> +                       "a" (lmask), "d" (hmask))
>> This loses the pfx.  I've fixed up locally and double checked the
>> disassembly.
> Question being: Do we want to stick to using the prefix form, when gas
> specifically has been offering a kind-of-suffix form instead from the
> very beginning (xsaves and xsaves64)?
>
> If we wanted to stick to the prefixes, I'd favor a form where the use
> sites don't need to supply the separating blank (i.e. the macro itself
> would insert it, as doing do with an empty prefix results in merely
> an indentation "issue" in the generated assembly). Thoughts?

I don't expect this macro to survive the fixes to use the compressed
format.  From that point of view, "closest to the original" is least churn.

One problem with using a suffix form is that you could feed in "opt"
instead of "64" and break things in rather more subtle ways.

I'll adjust the position of the space, but I think this can keep on
using prefixes in the short term.

~Andrew


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  2026-01-05 16:55       ` Andrew Cooper
@ 2026-01-05 17:06         ` Jan Beulich
  0 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2026-01-05 17:06 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Xen-devel

On 05.01.2026 17:55, Andrew Cooper wrote:
> On 05/01/2026 3:16 pm, Jan Beulich wrote:
>> On 02.01.2026 17:01, Andrew Cooper wrote:
>>> On 30/12/2025 1:54 pm, Andrew Cooper wrote:
>>>> --- a/xen/arch/x86/xstate.c
>>>> +++ b/xen/arch/x86/xstate.c
>>>> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>>>>      uint32_t hmask = mask >> 32;
>>>>      uint32_t lmask = mask;
>>>>      unsigned int fip_width = v->domain->arch.x87_fip_width;
>>>> -#define XSAVE(pfx) \
>>>> -        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>>>> -            asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
>>>> -                           : "=m" (*ptr) \
>>>> -                           : "a" (lmask), "d" (hmask), "D" (ptr) ); \
>>>> -        else \
>>>> -            alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>>>> -                           ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>>>> -                           X86_FEATURE_XSAVEOPT, \
>>>> -                           "=m" (*ptr), \
>>>> -                           "a" (lmask), "d" (hmask), "D" (ptr))
>>>> +
>>>> +#define XSAVE(pfx)                                                      \
>>>> +    if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )                      \
>>>> +        asm volatile ( "xsaves %0"                                      \
>>>> +                       : "=m" (*ptr)                                    \
>>>> +                       : "a" (lmask), "d" (hmask) );                    \
>>>> +    else                                                                \
>>>> +        alternative_io("xsave %0",                                      \
>>>> +                       "xsaveopt %0", X86_FEATURE_XSAVEOPT,             \
>>>> +                       "=m" (*ptr),                                     \
>>>> +                       "a" (lmask), "d" (hmask))
>>> This loses the pfx.  I've fixed up locally and double checked the
>>> disassembly.
>> Question being: Do we want to stick to using the prefix form, when gas
>> specifically has been offering a kind-of-suffix form instead from the
>> very beginning (xsaves and xsaves64)?
>>
>> If we wanted to stick to the prefixes, I'd favor a form where the use
>> sites don't need to supply the separating blank (i.e. the macro itself
>> would insert it, as doing do with an empty prefix results in merely
>> an indentation "issue" in the generated assembly). Thoughts?
> 
> I don't expect this macro to survive the fixes to use the compressed
> format.  From that point of view, "closest to the original" is least churn.
> 
> One problem with using a suffix form is that you could feed in "opt"
> instead of "64" and break things in rather more subtle ways.

Except that there's no XSAVESOPT nor XSAVEOPTOPT.

> I'll adjust the position of the space, but I think this can keep on
> using prefixes in the short term.

Okay, I wanted the alternative to at least be considered.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  2026-01-05 15:46   ` Jan Beulich
@ 2026-01-05 17:45     ` Andrew Cooper
  2026-01-06  7:59       ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Cooper @ 2026-01-05 17:45 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Roger Pau Monné, Xen-devel

On 05/01/2026 3:46 pm, Jan Beulich wrote:
> On 30.12.2025 14:54, Andrew Cooper wrote:
>> --- a/xen/arch/x86/xstate.c
>> +++ b/xen/arch/x86/xstate.c
>> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>>      uint32_t hmask = mask >> 32;
>>      uint32_t lmask = mask;
>>      unsigned int fip_width = v->domain->arch.x87_fip_width;
>> -#define XSAVE(pfx) \
>> -        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>> -            asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
>> -                           : "=m" (*ptr) \
>> -                           : "a" (lmask), "d" (hmask), "D" (ptr) ); \
>> -        else \
>> -            alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>> -                           ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>> -                           X86_FEATURE_XSAVEOPT, \
>> -                           "=m" (*ptr), \
>> -                           "a" (lmask), "d" (hmask), "D" (ptr))
>> +
>> +#define XSAVE(pfx)                                                      \
>> +    if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )                      \
>> +        asm volatile ( "xsaves %0"                                      \
>> +                       : "=m" (*ptr)                                    \
>> +                       : "a" (lmask), "d" (hmask) );                    \
>> +    else                                                                \
>> +        alternative_io("xsave %0",                                      \
>> +                       "xsaveopt %0", X86_FEATURE_XSAVEOPT,             \
>> +                       "=m" (*ptr),                                     \
>> +                       "a" (lmask), "d" (hmask))
> While no doubt neater to read this way, there's a subtle latent issue here:
> "m" doesn't exclude RIP-relative addressing, yet that addressing form can't
> be used in replacement code (up and until we leverage your decode-lite to
> actually be able to fix up the displacement).

I guess I'll fix that first.

I'm not interested in trying to keep playing games to work around
deficiencies in our alternatives infrastructure.

>  Sadly "o" as a constraint
> doesn't look to be any different in this regard (I think it should be, as
> adding a "small integer" may already bring the displacement beyond the
> permitted range, but their definition of "offsettable" allows this).
>
> This issue is latent until such time that (a) a caller appears passing in
> the address of a Xen-internal variable and (b) we make LTO work again.
> Since the breakage would be impossible to notice at build time, I think we
> would be better off if we avoided it from the beginning. Which may mean
> sacrificing on code gen, by using "r" and then "(%0)" as the insn operand.

Even with LTO, I don't see any plausible case where we have build-time
struct vcpu's to pass in.

>
>> @@ -489,17 +484,17 @@ void xrstor(struct vcpu *v, uint64_t mask)
>>              ptr->xsave_hdr.xcomp_bv = 0;
>>          }
>>          memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
>> -        continue;
>> +        goto retry;
>>  
>>      case 2: /* Stage 2: Reset all state. */
>>          ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
>>          ptr->xsave_hdr.xstate_bv = 0;
>>          ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
>>              ? XSTATE_COMPACTION_ENABLED : 0;
>> -        continue;
>> -    }
>> +        goto retry;
>>  
>> -        domain_crash(current->domain);
>> +    default: /* Stage 3: Nothing else to do. */
>> +        domain_crash(v->domain, "Uncorrectable XRSTOR fault\n");
>>          return;
> There's an unexplained change here as to which domain is being crashed.
> You switch to crashing the subject domain, yet if that's not also the
> requester, it isn't "guilty" in causing the observed fault.

So dom0 should be crashed because there bad data in the migration stream?

v is always curr.  XRSTOR can't be used correctly outside of the subject
context, and indeed the Stage 2 logic above is definitely buggy when v
!= curr.

~Andrew


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  2026-01-05 17:45     ` Andrew Cooper
@ 2026-01-06  7:59       ` Jan Beulich
  2026-01-08 21:08         ` Andrew Cooper
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2026-01-06  7:59 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Xen-devel

On 05.01.2026 18:45, Andrew Cooper wrote:
> On 05/01/2026 3:46 pm, Jan Beulich wrote:
>> On 30.12.2025 14:54, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/xstate.c
>>> +++ b/xen/arch/x86/xstate.c
>>> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>>>      uint32_t hmask = mask >> 32;
>>>      uint32_t lmask = mask;
>>>      unsigned int fip_width = v->domain->arch.x87_fip_width;
>>> -#define XSAVE(pfx) \
>>> -        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>>> -            asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
>>> -                           : "=m" (*ptr) \
>>> -                           : "a" (lmask), "d" (hmask), "D" (ptr) ); \
>>> -        else \
>>> -            alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>>> -                           ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>>> -                           X86_FEATURE_XSAVEOPT, \
>>> -                           "=m" (*ptr), \
>>> -                           "a" (lmask), "d" (hmask), "D" (ptr))
>>> +
>>> +#define XSAVE(pfx)                                                      \
>>> +    if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )                      \
>>> +        asm volatile ( "xsaves %0"                                      \
>>> +                       : "=m" (*ptr)                                    \
>>> +                       : "a" (lmask), "d" (hmask) );                    \
>>> +    else                                                                \
>>> +        alternative_io("xsave %0",                                      \
>>> +                       "xsaveopt %0", X86_FEATURE_XSAVEOPT,             \
>>> +                       "=m" (*ptr),                                     \
>>> +                       "a" (lmask), "d" (hmask))
>> While no doubt neater to read this way, there's a subtle latent issue here:
>> "m" doesn't exclude RIP-relative addressing, yet that addressing form can't
>> be used in replacement code (up and until we leverage your decode-lite to
>> actually be able to fix up the displacement).
> 
> I guess I'll fix that first.
> 
> I'm not interested in trying to keep playing games to work around
> deficiencies in our alternatives infrastructure.
> 
>>  Sadly "o" as a constraint
>> doesn't look to be any different in this regard (I think it should be, as
>> adding a "small integer" may already bring the displacement beyond the
>> permitted range, but their definition of "offsettable" allows this).
>>
>> This issue is latent until such time that (a) a caller appears passing in
>> the address of a Xen-internal variable and (b) we make LTO work again.
>> Since the breakage would be impossible to notice at build time, I think we
>> would be better off if we avoided it from the beginning. Which may mean
>> sacrificing on code gen, by using "r" and then "(%0)" as the insn operand.
> 
> Even with LTO, I don't see any plausible case where we have build-time
> struct vcpu's to pass in.

Sure, but struct vcpu * also isn't a great parameter for this kind of a
function.

>>> @@ -489,17 +484,17 @@ void xrstor(struct vcpu *v, uint64_t mask)
>>>              ptr->xsave_hdr.xcomp_bv = 0;
>>>          }
>>>          memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
>>> -        continue;
>>> +        goto retry;
>>>  
>>>      case 2: /* Stage 2: Reset all state. */
>>>          ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
>>>          ptr->xsave_hdr.xstate_bv = 0;
>>>          ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
>>>              ? XSTATE_COMPACTION_ENABLED : 0;
>>> -        continue;
>>> -    }
>>> +        goto retry;
>>>  
>>> -        domain_crash(current->domain);
>>> +    default: /* Stage 3: Nothing else to do. */
>>> +        domain_crash(v->domain, "Uncorrectable XRSTOR fault\n");
>>>          return;
>> There's an unexplained change here as to which domain is being crashed.
>> You switch to crashing the subject domain, yet if that's not also the
>> requester, it isn't "guilty" in causing the observed fault.
> 
> So dom0 should be crashed because there bad data in the migration stream?

Well, I'm not saying the behavior needs to stay like this, or that's it's
the best of all possible options. But in principle Dom0 could sanitize the
migration stream before passing it to Xen. So it is still first and foremost
Dom0 which is to blame.

> v is always curr.

Not quite - see xstate_set_init(). And for some of the callers of
hvm_update_guest_cr() I also don't think they always act on current. In
particular hvm_vcpu_reset_state() never does, I suppose (not the least
because of the vcpu_pause() in its sole caller).

>  XRSTOR can't be used correctly outside of the subject context,

Then are you suggesting e.g. xstate_set_init() is buggy?

> and indeed the Stage 2 logic above is definitely buggy when v != curr.

Apparently I'm blind, as I don't see an issue there. It's all v's data
which is being fiddled with.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  2026-01-06  7:59       ` Jan Beulich
@ 2026-01-08 21:08         ` Andrew Cooper
  2026-01-09  8:16           ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Cooper @ 2026-01-08 21:08 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Roger Pau Monné, Xen-devel

On 06/01/2026 7:59 am, Jan Beulich wrote:
>>>> @@ -489,17 +484,17 @@ void xrstor(struct vcpu *v, uint64_t mask)
>>>>              ptr->xsave_hdr.xcomp_bv = 0;
>>>>          }
>>>>          memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
>>>> -        continue;
>>>> +        goto retry;
>>>>  
>>>>      case 2: /* Stage 2: Reset all state. */
>>>>          ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
>>>>          ptr->xsave_hdr.xstate_bv = 0;
>>>>          ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
>>>>              ? XSTATE_COMPACTION_ENABLED : 0;
>>>> -        continue;
>>>> -    }
>>>> +        goto retry;
>>>>  
>>>> -        domain_crash(current->domain);
>>>> +    default: /* Stage 3: Nothing else to do. */
>>>> +        domain_crash(v->domain, "Uncorrectable XRSTOR fault\n");
>>>>          return;
>>> There's an unexplained change here as to which domain is being crashed.
>>> You switch to crashing the subject domain, yet if that's not also the
>>> requester, it isn't "guilty" in causing the observed fault.
>> So dom0 should be crashed because there bad data in the migration stream?
> Well, I'm not saying the behavior needs to stay like this, or that's it's
> the best of all possible options. But in principle Dom0 could sanitize the
> migration stream before passing it to Xen. So it is still first and foremost
> Dom0 which is to blame.

BNDCFGU contains a pointer which, for PV context, needs access_ok(), not
just a regular canonical check.  Most supervisor states are in a similar
position.

Just because Xen has managed to get away without such checks (by not yet
supporting a state where it matters), I don't agree that its safe to
trust dom0 to do this.


For this case, it's v's xstate buffer which cannot be loaded, so it's v
which cannot be context switched into, and must be crashed.  More below.


>> v is always curr.
> Not quite - see xstate_set_init().

Also more below.

> And for some of the callers of
> hvm_update_guest_cr() I also don't think they always act on current. In
> particular hvm_vcpu_reset_state() never does, I suppose (not the least
> because of the vcpu_pause() in its sole caller).

We discussed the need to not be remotely poking register state like
that.  But I don't see where the connection is between
hvm_update_guest_cr() and xsave()/xrstor().

Tangent: hvm_vcpu_reset_state() is terribly named as it's attempting to
put the vCPU into the INIT state, not the #RESET set.

But it only operates on the xstate header in memory while the target is
de-scheduled.  It's not using XSAVE/XRSTOR to load the results into
registers as far as I can tell.

>
>>   XRSTOR can't be used correctly outside of the subject context,
> Then are you suggesting e.g. xstate_set_init() is buggy?

No, but it switches into enough of v's context to function.  Really its
neither current nor remote context.

But, it's single caller is adjust_bnd() in the emulator so it's always
actually current context with a no-op on xcr0.

As said on Matrix, I think it's going to be necessary to remove MPX to
continue the XSAVE cleanup.

~Andrew


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 4/4] x86: Avoid using .byte for instructions where safe to do so
  2026-01-05 15:58   ` Jan Beulich
@ 2026-01-08 21:14     ` Andrew Cooper
  0 siblings, 0 replies; 21+ messages in thread
From: Andrew Cooper @ 2026-01-08 21:14 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Roger Pau Monné, Xen-devel

On 05/01/2026 3:58 pm, Jan Beulich wrote:
> On 30.12.2025 14:54, Andrew Cooper wrote:
>> --- a/xen/arch/x86/include/asm/asm-defns.h
>> +++ b/xen/arch/x86/include/asm/asm-defns.h
>> @@ -1,9 +1,5 @@
>>  #include <asm/page-bits.h>
>>  
>> -.macro clzero
>> -    .byte 0x0f, 0x01, 0xfc
>> -.endm
> This can't go away yet, as it became known to gas only in 2.26.
>
>> --- a/xen/arch/x86/include/asm/prot-key.h
>> +++ b/xen/arch/x86/include/asm/prot-key.h
>> @@ -19,16 +19,14 @@ static inline uint32_t rdpkru(void)
>>  {
>>      uint32_t pkru;
>>  
>> -    asm volatile ( ".byte 0x0f,0x01,0xee"
>> -                   : "=a" (pkru) : "c" (0) : "dx" );
>> +    asm volatile ( "rdpkru" : "=a" (pkru) : "c" (0) : "dx" );
>>  
>>      return pkru;
>>  }
>>  
>>  static inline void wrpkru(uint32_t pkru)
>>  {
>> -    asm volatile ( ".byte 0x0f,0x01,0xef"
>> -                   :: "a" (pkru), "d" (0), "c" (0) );
>> +    asm volatile ( "wrpkru" :: "a" (pkru), "d" (0), "c" (0) );
>>  }
> Same for both of these.

Oh - so they did.

This is a fairly old patch, and I don't recall exactly how I did the
analysis, but it was clearly wrong now I've double checked the 2.25 branch.

I'll annotate these rather than dropping them.

~Andrew


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline
  2026-01-08 21:08         ` Andrew Cooper
@ 2026-01-09  8:16           ` Jan Beulich
  0 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2026-01-09  8:16 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Xen-devel

On 08.01.2026 22:08, Andrew Cooper wrote:
> On 06/01/2026 7:59 am, Jan Beulich wrote:
>>>>> @@ -489,17 +484,17 @@ void xrstor(struct vcpu *v, uint64_t mask)
>>>>>              ptr->xsave_hdr.xcomp_bv = 0;
>>>>>          }
>>>>>          memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
>>>>> -        continue;
>>>>> +        goto retry;
>>>>>  
>>>>>      case 2: /* Stage 2: Reset all state. */
>>>>>          ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
>>>>>          ptr->xsave_hdr.xstate_bv = 0;
>>>>>          ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
>>>>>              ? XSTATE_COMPACTION_ENABLED : 0;
>>>>> -        continue;
>>>>> -    }
>>>>> +        goto retry;
>>>>>  
>>>>> -        domain_crash(current->domain);
>>>>> +    default: /* Stage 3: Nothing else to do. */
>>>>> +        domain_crash(v->domain, "Uncorrectable XRSTOR fault\n");
>>>>>          return;
>>>> There's an unexplained change here as to which domain is being crashed.
>>>> You switch to crashing the subject domain, yet if that's not also the
>>>> requester, it isn't "guilty" in causing the observed fault.
>>> So dom0 should be crashed because there bad data in the migration stream?
>> Well, I'm not saying the behavior needs to stay like this, or that's it's
>> the best of all possible options. But in principle Dom0 could sanitize the
>> migration stream before passing it to Xen. So it is still first and foremost
>> Dom0 which is to blame.
> 
> BNDCFGU contains a pointer which, for PV context, needs access_ok(), not
> just a regular canonical check.  Most supervisor states are in a similar
> position.

Yes, so exposing them to PV would require extra care. Note that MPX isn't
exposed to PV.

> Just because Xen has managed to get away without such checks (by not yet
> supporting a state where it matters), I don't agree that its safe to
> trust dom0 to do this.

Yet the guest itself can't have got in place a non-canonical value, can it?
Its attempts to load it into hardware would have faulted. So it's still
not the target domain which is to be blamed for a fault resulting from
XRSTOR encountering bogus pointers.

> For this case, it's v's xstate buffer which cannot be loaded, so it's v
> which cannot be context switched into, and must be crashed.  More below.

Well, yes, as said - that's one possible way of treating things. My main
request is not so much to undo the change, but to properly justify it in
the description. (Or maybe that really wants to be a separate change, in
particular if you wanted the changed behavior to also be backported.)

>>> v is always curr.
>> Not quite - see xstate_set_init().
> 
> Also more below.
> 
>> And for some of the callers of
>> hvm_update_guest_cr() I also don't think they always act on current. In
>> particular hvm_vcpu_reset_state() never does, I suppose (not the least
>> because of the vcpu_pause() in its sole caller).
> 
> We discussed the need to not be remotely poking register state like
> that.  But I don't see where the connection is between
> hvm_update_guest_cr() and xsave()/xrstor().

At the example of svm_update_guest_cr(): It calls svm_fpu_enter(), which
in turn calls vcpu_restore_fpu_lazy(). But yes, that's explicitly only
when v == current. I fear I didn't look closely enough when writing the
earlier reply, sorry.

> Tangent: hvm_vcpu_reset_state() is terribly named as it's attempting to
> put the vCPU into the INIT state, not the #RESET set.
> 
> But it only operates on the xstate header in memory while the target is
> de-scheduled.  It's not using XSAVE/XRSTOR to load the results into
> registers as far as I can tell.

Iirc I mentioned hvm_vcpu_reset_state() because it calls
hvm_update_guest_cr() several times.

>>>   XRSTOR can't be used correctly outside of the subject context,
>> Then are you suggesting e.g. xstate_set_init() is buggy?
> 
> No, but it switches into enough of v's context to function.  Really its
> neither current nor remote context.
> 
> But, it's single caller is adjust_bnd() in the emulator so it's always
> actually current context with a no-op on xcr0.

That's its single present caller. Who knows what else we might need it for.
It would better be operating correctly in the more general case.

> As said on Matrix, I think it's going to be necessary to remove MPX to
> continue the XSAVE cleanup.

Possibly, yes.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2026-01-09  8:17 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-30 13:54 [PATCH 0/4] x86/asm: .byte removal Andrew Cooper
2025-12-30 13:54 ` [PATCH 1/4] x86/xstate: Deindent the logic in xrstor() Andrew Cooper
2026-01-05 15:12   ` Jan Beulich
2025-12-30 13:54 ` [PATCH 2/4] x86/xstate: Rework XSAVE/XRSTOR given a newer toolchain baseline Andrew Cooper
2026-01-02 16:01   ` Andrew Cooper
2026-01-05 15:16     ` Jan Beulich
2026-01-05 16:55       ` Andrew Cooper
2026-01-05 17:06         ` Jan Beulich
2026-01-05 15:46   ` Jan Beulich
2026-01-05 17:45     ` Andrew Cooper
2026-01-06  7:59       ` Jan Beulich
2026-01-08 21:08         ` Andrew Cooper
2026-01-09  8:16           ` Jan Beulich
2025-12-30 13:54 ` [PATCH 3/4] x86/i387: Rework fpu_fxrstor() " Andrew Cooper
2026-01-05 15:52   ` Jan Beulich
2026-01-05 16:13     ` Nicola Vetrini
2026-01-05 16:39       ` Andrew Cooper
2026-01-05 16:48         ` Jan Beulich
2025-12-30 13:54 ` [PATCH 4/4] x86: Avoid using .byte for instructions where safe to do so Andrew Cooper
2026-01-05 15:58   ` Jan Beulich
2026-01-08 21:14     ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.