* [PATCH v2 0/3] x86: CET-SS related adjustments
@ 2026-04-08 12:20 Jan Beulich
2026-04-08 12:22 ` [PATCH v2 1/3] x86: record SSP at non-guest entry points Jan Beulich
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Jan Beulich @ 2026-04-08 12:20 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Andrew Cooper, Wei Liu, Roger Pau Monné
One might think of this as follow-on to XSA-451, but that's not quite
the right order of events.
There are a few open aspects; see individual patches.
v2, besides addressing small issues, is mainly a re-base over FRED
work finally having gone in.
1: record SSP at non-guest entry points
2: traps: use entry_ssp in fixup_exception_return()
3: prefer shadow stack for producing call traces
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 1/3] x86: record SSP at non-guest entry points
2026-04-08 12:20 [PATCH v2 0/3] x86: CET-SS related adjustments Jan Beulich
@ 2026-04-08 12:22 ` Jan Beulich
2026-04-08 16:58 ` Andrew Cooper
2026-04-08 12:23 ` [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return() Jan Beulich
2026-04-08 12:23 ` [PATCH v2 3/3] x86: prefer shadow stack for producing call traces Jan Beulich
2 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2026-04-08 12:22 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie
We will want to use that value for call trace generation, and likely
also to eliminate the somewhat fragile shadow stack searching done in
fixup_exception_return(). For those purposes, guest-only entry points do
not need to record that value.
To keep the saving code simple, record our own SSP that corresponds to
an exception frame, pointing to the top of the shadow stack counterpart
of what the CPU has saved on the regular stack. Consuming code can then
work its way from there.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to
the error code isn't entirely nice; putting it ahead of %r15 would entail
other changes, though. An option may be to not make SSP handling part of
the macros in the first place. Thoughts?
For POP_GPRS, does it really matter that it doesn't alter EFLAGS? Neither
of the two currene uses relies on it, and without that requirement we
could use ADD in place of LEA. (Of course there are also POP-based ways of
getting rid of the SSP slot.)
---
v2: Add comment ahead of SAVE_ALL. Add comma between its parameters.
Re-base.
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -103,7 +103,7 @@ __UNLIKELY_END(nsvm_hap)
vmrun
- SAVE_ALL
+ SAVE_ALL ssp=0
GET_CURRENT(bx)
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -22,7 +22,7 @@
#include <asm/page.h>
FUNC(vmx_asm_vmexit_handler)
- SAVE_ALL
+ SAVE_ALL ssp=0
mov %cr2,%rax
GET_CURRENT(bx)
@@ -171,7 +171,7 @@ UNLIKELY_END(realmode)
.Lvmx_vmentry_fail:
sti
- SAVE_ALL
+ SAVE_ALL ssp=0
/*
* SPEC_CTRL_ENTRY notes
--- a/xen/arch/x86/include/asm/asm_defns.h
+++ b/xen/arch/x86/include/asm/asm_defns.h
@@ -219,7 +219,11 @@ static always_inline void stac(void)
#endif
#ifdef __ASSEMBLER__
-.macro SAVE_ALL compat=0
+/*
+ * Use sites may override ssp to 0. It should never be overridden to 1.
+ * NB: compat=1 implies ssp=0.
+ */
+.macro SAVE_ALL compat=0, ssp=IS_ENABLED(CONFIG_XEN_SHSTK)
addq $-(UREGS_error_code-UREGS_r15), %rsp
cld
movq %rdi,UREGS_rdi(%rsp)
@@ -233,6 +237,9 @@ static always_inline void stac(void)
movq %rax,UREGS_rax(%rsp)
xor %eax, %eax
.if !\compat
+.if \ssp
+ rdsspq %rcx
+.endif
movq %r8,UREGS_r8(%rsp)
movq %r9,UREGS_r9(%rsp)
movq %r10,UREGS_r10(%rsp)
@@ -262,6 +269,9 @@ static always_inline void stac(void)
xor %r13d, %r13d
xor %r14d, %r14d
xor %r15d, %r15d
+#ifdef CONFIG_XEN_SHSTK
+ mov %rcx, UREGS_entry_ssp(%rsp)
+#endif
.endm
#define LOAD_ONE_REG(reg, compat) \
@@ -313,9 +323,14 @@ static always_inline void stac(void)
.endm
/*
- * Push and clear GPRs
+ * Push and clear GPRs.
+ *
+ * Use sites may override ssp to 0. It should never be overridden to 1.
*/
-.macro PUSH_AND_CLEAR_GPRS
+.macro PUSH_AND_CLEAR_GPRS ssp=IS_ENABLED(CONFIG_XEN_SHSTK)
+#ifdef CONFIG_XEN_SHSTK
+ push $0
+#endif
push %rdi
xor %edi, %edi
push %rsi
@@ -326,6 +341,9 @@ static always_inline void stac(void)
xor %ecx, %ecx
push %rax
xor %eax, %eax
+ .if \ssp
+ rdsspq %rcx
+ .endif
push %r8
xor %r8d, %r8d
push %r9
@@ -352,6 +370,9 @@ static always_inline void stac(void)
xor %r14d, %r14d
push %r15
xor %r15d, %r15d
+ .if \ssp
+ mov %rcx, UREGS_entry_ssp(%rsp)
+ .endif
.endm
/*
@@ -373,6 +394,9 @@ static always_inline void stac(void)
pop %rdx
pop %rsi
pop %rdi
+#ifdef CONFIG_XEN_SHSTK
+ lea 8(%rsp), %rsp
+#endif
.endm
#ifdef CONFIG_PV32
--- a/xen/arch/x86/include/asm/cpu-user-regs.h
+++ b/xen/arch/x86/include/asm/cpu-user-regs.h
@@ -27,6 +27,15 @@ struct cpu_user_regs
union { uint64_t rsi; uint32_t esi; uint16_t si; uint8_t sil; };
union { uint64_t rdi; uint32_t edi; uint16_t di; uint8_t dil; };
+#ifdef CONFIG_XEN_SHSTK
+ /*
+ * This points _at_ the corresponding shadow stack frame; it is _not_ the
+ * outer context's SSP. That, if the outer context has CET-SS enabled,
+ * is stored in the top slot of the pointed to shadow stack frame.
+ */
+ uint64_t entry_ssp;
+#endif
+
/*
* During IDT delivery for exceptions with an error code, hardware pushes
* to this point. Entry_vector is filled in by software.
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -53,6 +53,9 @@ void __dummy__(void)
OFFSET(UREGS_eflags, struct cpu_user_regs, rflags);
OFFSET(UREGS_rsp, struct cpu_user_regs, rsp);
OFFSET(UREGS_ss, struct cpu_user_regs, ss);
+#ifdef CONFIG_XEN_SHSTK
+ OFFSET(UREGS_entry_ssp, struct cpu_user_regs, entry_ssp);
+#endif
DEFINE(UREGS_kernel_sizeof, sizeof(struct cpu_user_regs));
BLANK();
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -275,7 +275,7 @@ FUNC(lstar_enter)
pushq $0
BUILD_BUG_ON(TRAP_syscall & 0xff)
movb $TRAP_syscall >> 8, EFRAME_entry_vector + 1(%rsp)
- SAVE_ALL
+ SAVE_ALL ssp=0
GET_STACK_END(14)
@@ -315,7 +315,7 @@ FUNC(cstar_enter)
pushq $0
BUILD_BUG_ON(TRAP_syscall & 0xff)
movb $TRAP_syscall >> 8, EFRAME_entry_vector + 1(%rsp)
- SAVE_ALL
+ SAVE_ALL ssp=0
GET_STACK_END(14)
@@ -359,7 +359,7 @@ LABEL(sysenter_eflags_saved, 0)
pushq $0
BUILD_BUG_ON(TRAP_syscall & 0xff)
movb $TRAP_syscall >> 8, EFRAME_entry_vector + 1(%rsp)
- SAVE_ALL
+ SAVE_ALL ssp=0
GET_STACK_END(14)
@@ -415,7 +415,7 @@ FUNC(entry_int80)
ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
pushq $0
movb $0x80, EFRAME_entry_vector(%rsp)
- SAVE_ALL
+ SAVE_ALL ssp=0
GET_STACK_END(14)
--- a/xen/arch/x86/x86_64/entry-fred.S
+++ b/xen/arch/x86/x86_64/entry-fred.S
@@ -10,7 +10,7 @@
/* The Ring3 entry point is required to be 4k aligned. */
FUNC(entry_FRED_R3, 4096)
- PUSH_AND_CLEAR_GPRS
+ PUSH_AND_CLEAR_GPRS ssp=0
mov %rsp, %rdi
call entry_from_pv
@@ -38,7 +38,7 @@ LABEL(eretu, 0)
END(eretu_exit_to_guest)
FUNC(eretu_error_dom_crash)
- PUSH_AND_CLEAR_GPRS
+ PUSH_AND_CLEAR_GPRS ssp=0
sti
call asm_domain_crash_synchronous /* Does not return */
END(eretu_error_dom_crash)
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return()
2026-04-08 12:20 [PATCH v2 0/3] x86: CET-SS related adjustments Jan Beulich
2026-04-08 12:22 ` [PATCH v2 1/3] x86: record SSP at non-guest entry points Jan Beulich
@ 2026-04-08 12:23 ` Jan Beulich
2026-04-08 17:34 ` Andrew Cooper
2026-04-08 12:23 ` [PATCH v2 3/3] x86: prefer shadow stack for producing call traces Jan Beulich
2 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2026-04-08 12:23 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie
With the value recorded on entry there's no need anymore to go hunt for
the respective exception frame on the shadow stack. By deriving "ptr"
from that field (without any offset), it then ends up pointin one slot
lower than before. Therefore all array indexes need incrementing, nicely
doing away with all the negative ones.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Indentation of the prior inner (but not innermost) if()'s body is
deliberately left untouched, to aid review. It'll be adjusted in a
separate follow-on patch.
---
v2: IS_ENABLED() -> #ifdef. Re-base.
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -690,19 +690,6 @@ unsigned long get_stack_trace_bottom(uns
}
}
-static unsigned long get_shstk_bottom(unsigned long sp)
-{
- /* SAF-11-safe */
- switch ( get_stack_page(sp) )
- {
-#ifdef CONFIG_XEN_SHSTK
- case 0: return ROUNDUP(sp, IST_SHSTK_SIZE) - sizeof(unsigned long);
- case 5: return ROUNDUP(sp, PAGE_SIZE) - sizeof(unsigned long);
-#endif
- default: return sp - sizeof(unsigned long);
- }
-}
-
unsigned long get_stack_dump_bottom(unsigned long sp)
{
switch ( get_stack_page(sp) )
@@ -1187,26 +1174,28 @@ void asmlinkage noreturn do_unhandled_tr
static void fixup_exception_return(struct cpu_user_regs *regs,
unsigned long fixup, unsigned long stub_ra)
{
- if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
+#ifdef CONFIG_XEN_SHSTK
{
- unsigned long ssp, *ptr, *base;
+ unsigned long ssp = rdssp();
- if ( (ssp = rdssp()) == SSP_NO_SHSTK )
- goto shstk_done;
+ if ( ssp != SSP_NO_SHSTK )
+ {
+ unsigned long *ptr = _p(regs->entry_ssp);
+ unsigned long primary_shstk =
+ (ssp & ~(STACK_SIZE - 1)) +
+ (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8;
- ptr = _p(ssp);
- base = _p(get_shstk_bottom(ssp));
+ BUG_ON((regs->entry_ssp ^ primary_shstk) >> PAGE_SHIFT);
- for ( ; ptr < base; ++ptr )
- {
/*
- * Search for %rip. The shstk currently looks like this:
+ * The shstk currently looks like this:
*
* tok [Supervisor token, == &tok | BUSY, only with FRED inactive]
* ... [Pointed to by SSP for most exceptions, empty in IST cases]
* %cs [== regs->cs]
* %rip [== regs->rip]
- * SSP [Likely points to 3 slots higher, above %cs]
+ * SSP [Pointed to by entry_ssp; Likely points to 3 slots
+ * higher, above %cs]
* ... [call tree to this function, likely 2/3 slots]
*
* and we want to overwrite %rip with fixup. There are two
@@ -1219,13 +1208,10 @@ static void fixup_exception_return(struc
*
* Check for both regs->rip and regs->cs matching.
*/
- if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
- {
- unsigned long primary_shstk =
- (ssp & ~(STACK_SIZE - 1)) +
- (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8;
+ BUG_ON(ptr[1] != regs->rip || ptr[2] != regs->cs);
- wrss(fixup, ptr);
+ {
+ wrss(fixup, &ptr[1]);
if ( !stub_ra )
goto shstk_done;
@@ -1242,7 +1228,7 @@ static void fixup_exception_return(struc
* - if we're on an IST stack, we need to increment the
* original SSP.
*/
- BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT);
+ BUG_ON((ptr[0] ^ primary_shstk) >> PAGE_SHIFT);
if ( (ssp ^ primary_shstk) >> PAGE_SHIFT )
{
@@ -1251,39 +1237,30 @@ static void fixup_exception_return(struc
* addresses actually match. Then increment the interrupted
* context's SSP.
*/
- BUG_ON(stub_ra != *(unsigned long*)ptr[-1]);
- wrss(ptr[-1] + 8, &ptr[-1]);
+ BUG_ON(stub_ra != *(unsigned long*)ptr[0]);
+ wrss(ptr[0] + 8, &ptr[0]);
goto shstk_done;
}
/* Make sure the two return addresses actually match. */
- BUG_ON(stub_ra != ptr[2]);
+ BUG_ON(stub_ra != ptr[3]);
/* Move exception frame, updating SSP there. */
- wrss(ptr[1], &ptr[2]); /* %cs */
- wrss(ptr[0], &ptr[1]); /* %rip */
- wrss(ptr[-1] + 8, &ptr[0]); /* SSP */
+ wrss(ptr[2], &ptr[3]); /* %cs */
+ wrss(ptr[1], &ptr[2]); /* %rip */
+ wrss(ptr[0] + 8, &ptr[1]); /* SSP */
/* Move all newer entries. */
- while ( --ptr != _p(ssp) )
- wrss(ptr[-1], &ptr[0]);
+ while ( ptr-- != _p(ssp) )
+ wrss(ptr[0], &ptr[1]);
/* Finally account for our own stack having shifted up. */
asm volatile ( "incsspd %0" :: "r" (2) );
-
- goto shstk_done;
}
}
-
- /*
- * We failed to locate and fix up the shadow IRET frame. This could
- * be due to shadow stack corruption, or bad logic above. We cannot
- * continue executing the interrupted context.
- */
- BUG();
-
}
shstk_done:
+#endif /* CONFIG_XEN_SHSTK */
/* Fixup the regular stack. */
regs->rip = fixup;
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 3/3] x86: prefer shadow stack for producing call traces
2026-04-08 12:20 [PATCH v2 0/3] x86: CET-SS related adjustments Jan Beulich
2026-04-08 12:22 ` [PATCH v2 1/3] x86: record SSP at non-guest entry points Jan Beulich
2026-04-08 12:23 ` [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return() Jan Beulich
@ 2026-04-08 12:23 ` Jan Beulich
2026-04-08 17:53 ` Andrew Cooper
2 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2026-04-08 12:23 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie
Shadow stacks contain little more than return addresses, and they in
particular allow precise call traces also without FRAME_POINTER.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
While the 'E' for exception frames is probably okay, I'm not overly
happy with the 'C' (for CET). I would have preferred 'S' (for shadow),
but we use that character already.
As an alternative to suppressing output for the top level exception
frame, adding the new code ahead of the 'R' output line (and then also
ahead of the stack top read) could be considered.
Perhaps having a printk() for the PV entry case is meaningless, for
- no frame being pushed when entered from CPL=3 (64-bit PV),
- no entry possible from CPL<3 (32-bit PV disabled when CET is active)?
In which case the comment probably should just be "Bogus." and the code
merely be "break;".
Quite likely a number of other uses of is_active_kernel_text() also want
amending with in_stub().
---
v2: IS_ENABLED() -> #ifdef. Re-base.
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -48,6 +48,7 @@
#include <asm/shared.h>
#include <asm/shstk.h>
#include <asm/smp.h>
+#include <asm/stubs.h>
#include <asm/system.h>
#include <asm/traps.h>
#include <asm/uaccess.h>
@@ -705,6 +706,13 @@ unsigned long get_stack_dump_bottom(unsi
}
}
+#ifdef CONFIG_XEN_SHSTK
+static bool in_stub(unsigned long addr)
+{
+ return !((this_cpu(stubs.addr) ^ addr) >> STUB_BUF_SHIFT);
+}
+#endif
+
#if !defined(CONFIG_FRAME_POINTER)
/*
@@ -797,6 +805,52 @@ static void show_trace(const struct cpu_
!is_active_kernel_text(tos) )
printk(" [<%p>] R %pS\n", _p(regs->rip), _p(regs->rip));
+#ifdef CONFIG_XEN_SHSTK
+ if ( rdssp() != SSP_NO_SHSTK )
+ {
+ const unsigned long *ptr = _p(regs->entry_ssp);
+ unsigned int n;
+
+ for ( n = 0; (unsigned long)ptr & (PAGE_SIZE - sizeof(*ptr)); ++n )
+ {
+ unsigned long val = *ptr;
+
+ if ( is_active_kernel_text(val) || in_stub(val) )
+ {
+ /* Normal return address entry. */
+ printk(" [<%p>] C %pS\n", _p(val), _p(val));
+ ++ptr;
+ }
+ else if ( !((val ^ *ptr) >> (PAGE_SHIFT + STACK_ORDER)) )
+ {
+ if ( val & (sizeof(val) - 1) )
+ {
+ /* Most likely a supervisor token. */
+ break;
+ }
+
+ /*
+ * Ought to be a hypervisor interruption frame. But don't
+ * (re)log the current frame's %rip.
+ */
+ if ( n || ptr[1] != regs->rip )
+ printk(" [<%p>] E %pS\n", _p(ptr[1]), _p(ptr[1]));
+ ptr = _p(val);
+ }
+ else
+ {
+ /* Ought to be a PV guest hypercall/interruption frame. */
+ printk(" %04lx:[<%p>] E\n", ptr[2], _p(ptr[1]));
+ ptr = 0;
+ }
+ }
+
+ /* Fall back to legacy stack trace if nothing was logged at all. */
+ if ( n )
+ return;
+ }
+#endif /* CONFIG_XEN_SHSTK */
+
if ( fault )
{
printk(" [Fault on access]\n");
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/3] x86: record SSP at non-guest entry points
2026-04-08 12:22 ` [PATCH v2 1/3] x86: record SSP at non-guest entry points Jan Beulich
@ 2026-04-08 16:58 ` Andrew Cooper
2026-04-09 8:13 ` Jan Beulich
0 siblings, 1 reply; 12+ messages in thread
From: Andrew Cooper @ 2026-04-08 16:58 UTC (permalink / raw)
To: Jan Beulich, xen-devel@lists.xenproject.org
Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie
On 08/04/2026 1:22 pm, Jan Beulich wrote:
> We will want to use that value for call trace generation, and likely
> also to eliminate the somewhat fragile shadow stack searching done in
> fixup_exception_return(). For those purposes, guest-only entry points do
> not need to record that value.
>
> To keep the saving code simple, record our own SSP that corresponds to
> an exception frame, pointing to the top of the shadow stack counterpart
> of what the CPU has saved on the regular stack. Consuming code can then
> work its way from there.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to
> the error code isn't entirely nice; putting it ahead of %r15 would entail
> other changes, though. An option may be to not make SSP handling part of
> the macros in the first place. Thoughts?
I have a firm dislike for SAVE/RESTORE_ALL, both for their substantial
complexity/inefficiency, and mixing of unrelated tasks.
I have several series trying to purge them. I suppose I really ought to
try and finish this off properly.
While classing SSP as a "register" is probably fine, the ssp= parameter
(and particular it's asymmetric nature) is on the wrong side of the
"complex" argument IMO.
> For POP_GPRS, does it really matter that it doesn't alter EFLAGS?
Yes. The SYSCALL fix for one (reviewed, but waiting on final testing
before I commit).
Then the VT-x code when swapped to use POP_GPRS.
To take a step back, you say that putting it ahead of %r15 would entail
other changes. What changes?
The resulting asm would be far cleaner. It would be an rdssp;push on
one side, and a pop into any register on the other side. Furthermore,
given that the ssp= doesn't exclude storing it for some user frames,
just store it for all. It's one push/pop into a hot cacheline, and
makes a substantial reduction in complexity.
~Andrew
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return()
2026-04-08 12:23 ` [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return() Jan Beulich
@ 2026-04-08 17:34 ` Andrew Cooper
0 siblings, 0 replies; 12+ messages in thread
From: Andrew Cooper @ 2026-04-08 17:34 UTC (permalink / raw)
To: Jan Beulich, xen-devel@lists.xenproject.org
Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie
On 08/04/2026 1:23 pm, Jan Beulich wrote:
> With the value recorded on entry there's no need anymore to go hunt for
> the respective exception frame on the shadow stack. By deriving "ptr"
> from that field (without any offset), it then ends up pointin one slot
pointing
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -1187,26 +1174,28 @@ void asmlinkage noreturn do_unhandled_tr
> static void fixup_exception_return(struct cpu_user_regs *regs,
> unsigned long fixup, unsigned long stub_ra)
> {
> - if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
> +#ifdef CONFIG_XEN_SHSTK
> {
> - unsigned long ssp, *ptr, *base;
> + unsigned long ssp = rdssp();
>
> - if ( (ssp = rdssp()) == SSP_NO_SHSTK )
> - goto shstk_done;
> + if ( ssp != SSP_NO_SHSTK )
> + {
> + unsigned long *ptr = _p(regs->entry_ssp);
> + unsigned long primary_shstk =
> + (ssp & ~(STACK_SIZE - 1)) +
> + (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8;
>
> - ptr = _p(ssp);
> - base = _p(get_shstk_bottom(ssp));
> + BUG_ON((regs->entry_ssp ^ primary_shstk) >> PAGE_SHIFT);
This BUG() isn't correct.
We can be in a fixup while in an IST handler, at which point SSP does
not point to the primary shstk. e.g. wrmsr_safe() in #MC.
If you're looking to at least roughly bound it, check that it's any
where in the stack range. The WRSS below will #PF if SSP isn't
referring to a shadow stack.
Alternatively, add an is_shstk_page() predicate which checks for
get_stack_page() == 5,0 which are the two shstk frames in the block of 8.
~Andrew
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 3/3] x86: prefer shadow stack for producing call traces
2026-04-08 12:23 ` [PATCH v2 3/3] x86: prefer shadow stack for producing call traces Jan Beulich
@ 2026-04-08 17:53 ` Andrew Cooper
2026-04-09 8:42 ` Jan Beulich
2026-04-09 10:41 ` Jan Beulich
0 siblings, 2 replies; 12+ messages in thread
From: Andrew Cooper @ 2026-04-08 17:53 UTC (permalink / raw)
To: Jan Beulich, xen-devel@lists.xenproject.org
Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie
On 08/04/2026 1:23 pm, Jan Beulich wrote:
> Shadow stacks contain little more than return addresses, and they in
> particular allow precise call traces also without FRAME_POINTER.
Do you have an example of what such a backtrace now looks like ?
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> While the 'E' for exception frames is probably okay, I'm not overly
> happy with the 'C' (for CET). I would have preferred 'S' (for shadow),
> but we use that character already.
>
> As an alternative to suppressing output for the top level exception
> frame, adding the new code ahead of the 'R' output line (and then also
> ahead of the stack top read) could be considered.
>
> Perhaps having a printk() for the PV entry case is meaningless, for
> - no frame being pushed when entered from CPL=3 (64-bit PV),
> - no entry possible from CPL<3 (32-bit PV disabled when CET is active)?
> In which case the comment probably should just be "Bogus." and the code
> merely be "break;".
Yes, PV32 doesn't exist when CET-SS is active, and PV64 doesn't push a
frame. regs->ssp will point to the supervisor token (IDT delivery) or
on the boundary with the regular stack (FRED).
> Quite likely a number of other uses of is_active_kernel_text() also want
> amending with in_stub().
There are very few things which can exist on a shadow stack.
1) Tokens (supervisor, restore or prev)
2) Return address
3) Old-SSP
4) Old-CS
Intel recommend not allowing code or stacks to be in the bottom 64k of
the address space to prevent type confusion between Old-CS and the other
values. Xen matches this expectation, but it might be wise to check for
it explicitly.
Notably, we cannot ever get a value matching in_stub() (outside of
general memory corruption).
On SYSCALL/SYSENTER, SSP is set to 0, and we don't re-establish a proper
SSP until the SETSSBSY after leaving the stub. Similarly on SYSRET, the
CLRSSBSY sets SSP to 0 too.
An NMI hitting these paths should find regs->ssp pointing at it's own
shadow stack, with an Old-SSP of 0.
~Andrew
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/3] x86: record SSP at non-guest entry points
2026-04-08 16:58 ` Andrew Cooper
@ 2026-04-09 8:13 ` Jan Beulich
2026-04-09 11:22 ` Andrew Cooper
0 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2026-04-09 8:13 UTC (permalink / raw)
To: Andrew Cooper
Cc: Roger Pau Monné, Teddy Astie, xen-devel@lists.xenproject.org
On 08.04.2026 18:58, Andrew Cooper wrote:
> On 08/04/2026 1:22 pm, Jan Beulich wrote:
>> We will want to use that value for call trace generation, and likely
>> also to eliminate the somewhat fragile shadow stack searching done in
>> fixup_exception_return(). For those purposes, guest-only entry points do
>> not need to record that value.
>>
>> To keep the saving code simple, record our own SSP that corresponds to
>> an exception frame, pointing to the top of the shadow stack counterpart
>> of what the CPU has saved on the regular stack. Consuming code can then
>> work its way from there.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to
>> the error code isn't entirely nice; putting it ahead of %r15 would entail
>> other changes, though. An option may be to not make SSP handling part of
>> the macros in the first place. Thoughts?
>
> I have a firm dislike for SAVE/RESTORE_ALL, both for their substantial
> complexity/inefficiency, and mixing of unrelated tasks.
>
> I have several series trying to purge them. I suppose I really ought to
> try and finish this off properly.
>
> While classing SSP as a "register" is probably fine, the ssp= parameter
> (and particular it's asymmetric nature) is on the wrong side of the
> "complex" argument IMO.
>
>> For POP_GPRS, does it really matter that it doesn't alter EFLAGS?
>
> Yes. The SYSCALL fix for one (reviewed, but waiting on final testing
> before I commit).
>
> Then the VT-x code when swapped to use POP_GPRS.
>
>
> To take a step back, you say that putting it ahead of %r15 would entail
> other changes. What changes?
SAVE_ALL's initial ADD, RESTORE_ALL's final SUB, and then the hunt for
anything which may simply assume UREGS_r15 to be 0. If UREGS_r<xyz> were
ordered by register number, I would have considered putting it where
%rsp nominally would go, but without that putting it somewhere in the
middle feels rather arbitrary.
> The resulting asm would be far cleaner.
I agree.
> It would be an rdssp;push on
> one side, and a pop into any register on the other side. Furthermore,
> given that the ssp= doesn't exclude storing it for some user frames,
> just store it for all. It's one push/pop into a hot cacheline, and
> makes a substantial reduction in complexity.
I'm having significant reservations against that. I use the 0 put there
in subsequent patches, to identify absence of that data being available.
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 3/3] x86: prefer shadow stack for producing call traces
2026-04-08 17:53 ` Andrew Cooper
@ 2026-04-09 8:42 ` Jan Beulich
2026-04-09 10:41 ` Jan Beulich
1 sibling, 0 replies; 12+ messages in thread
From: Jan Beulich @ 2026-04-09 8:42 UTC (permalink / raw)
To: Andrew Cooper
Cc: Roger Pau Monné, Teddy Astie, xen-devel@lists.xenproject.org
On 08.04.2026 19:53, Andrew Cooper wrote:
> On 08/04/2026 1:23 pm, Jan Beulich wrote:
>> Shadow stacks contain little more than return addresses, and they in
>> particular allow precise call traces also without FRAME_POINTER.
>
> Do you have an example of what such a backtrace now looks like ?
I don't have one to hand, but I'll add an example for v3.
>> ---
>> While the 'E' for exception frames is probably okay, I'm not overly
>> happy with the 'C' (for CET). I would have preferred 'S' (for shadow),
>> but we use that character already.
>>
>> As an alternative to suppressing output for the top level exception
>> frame, adding the new code ahead of the 'R' output line (and then also
>> ahead of the stack top read) could be considered.
>>
>> Perhaps having a printk() for the PV entry case is meaningless, for
>> - no frame being pushed when entered from CPL=3 (64-bit PV),
>> - no entry possible from CPL<3 (32-bit PV disabled when CET is active)?
>> In which case the comment probably should just be "Bogus." and the code
>> merely be "break;".
>
> Yes, PV32 doesn't exist when CET-SS is active, and PV64 doesn't push a
> frame. regs->ssp will point to the supervisor token (IDT delivery) or
> on the boundary with the regular stack (FRED).
Okay, I'll change that then as indicated.
>> Quite likely a number of other uses of is_active_kernel_text() also want
>> amending with in_stub().
>
> There are very few things which can exist on a shadow stack.
>
> 1) Tokens (supervisor, restore or prev)
> 2) Return address
> 3) Old-SSP
> 4) Old-CS
>
> Intel recommend not allowing code or stacks to be in the bottom 64k of
> the address space to prevent type confusion between Old-CS and the other
> values. Xen matches this expectation, but it might be wise to check for
> it explicitly.
What exactly do you mean here? Neither is_active_kernel_text() nor
in_stub() nor the further "!((val ^ *ptr) >> (PAGE_SHIFT + STACK_ORDER))"
(which I only now notice can't be quite right, as val was read from *ptr;
I think (unsigned long)ptr is meant instead) would yield true there. If,
as per above, in the remaining else we'll have just "break", what would
such a separate check be good for?
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 3/3] x86: prefer shadow stack for producing call traces
2026-04-08 17:53 ` Andrew Cooper
2026-04-09 8:42 ` Jan Beulich
@ 2026-04-09 10:41 ` Jan Beulich
1 sibling, 0 replies; 12+ messages in thread
From: Jan Beulich @ 2026-04-09 10:41 UTC (permalink / raw)
To: Andrew Cooper
Cc: Roger Pau Monné, Teddy Astie, xen-devel@lists.xenproject.org
On 08.04.2026 19:53, Andrew Cooper wrote:
> On 08/04/2026 1:23 pm, Jan Beulich wrote:
>> Shadow stacks contain little more than return addresses, and they in
>> particular allow precise call traces also without FRAME_POINTER.
>
> Do you have an example of what such a backtrace now looks like ?
(XEN) Xen call trace:
(XEN) [<ffff82d04032d730>] R extable.c#search_one_extable+0x70/0x73
(XEN) [<ffff82d04032d802>] C search_exception_table+0xc2/0x177
(XEN) [<ffff82d040358378>] C traps.c#extable_fixup.isra.0+0x18/0x6c
(XEN) [<ffff82d040358e3b>] C do_invalid_op+0xab/0x106
(XEN) [<ffff82d040201d98>] C x86_64/entry.S#handle_exception_saved+0x88/0xf4
(XEN) [<ffff82d07fffe044>] E ffff82d07fffe044
(XEN) [<ffff82d040412db0>] C stub_selftest+0xd0/0x168
(XEN) [<ffff82d0403508d6>] C setup.c#init_done+0x116/0x15a
Note how both the stub entry and stub_selftest() as its caller are
present here. The stub entry is missing when FRAME_POINTER=n (albeit
we could teach that code to recognize it), while the stub_selftest()
entry is missing when FRAME_POINTER=y (and we can't do anything about
this unless we wanted to add frame setup to stub generation).
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/3] x86: record SSP at non-guest entry points
2026-04-09 8:13 ` Jan Beulich
@ 2026-04-09 11:22 ` Andrew Cooper
2026-04-09 12:44 ` Jan Beulich
0 siblings, 1 reply; 12+ messages in thread
From: Andrew Cooper @ 2026-04-09 11:22 UTC (permalink / raw)
To: Jan Beulich
Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie,
xen-devel@lists.xenproject.org
On 09/04/2026 9:13 am, Jan Beulich wrote:
> On 08.04.2026 18:58, Andrew Cooper wrote:
>> On 08/04/2026 1:22 pm, Jan Beulich wrote:
>>> We will want to use that value for call trace generation, and likely
>>> also to eliminate the somewhat fragile shadow stack searching done in
>>> fixup_exception_return(). For those purposes, guest-only entry points do
>>> not need to record that value.
>>>
>>> To keep the saving code simple, record our own SSP that corresponds to
>>> an exception frame, pointing to the top of the shadow stack counterpart
>>> of what the CPU has saved on the regular stack. Consuming code can then
>>> work its way from there.
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>> ---
>>> For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to
>>> the error code isn't entirely nice; putting it ahead of %r15 would entail
>>> other changes, though. An option may be to not make SSP handling part of
>>> the macros in the first place. Thoughts?
>> I have a firm dislike for SAVE/RESTORE_ALL, both for their substantial
>> complexity/inefficiency, and mixing of unrelated tasks.
>>
>> I have several series trying to purge them. I suppose I really ought to
>> try and finish this off properly.
>>
>> While classing SSP as a "register" is probably fine, the ssp= parameter
>> (and particular it's asymmetric nature) is on the wrong side of the
>> "complex" argument IMO.
>>
>>> For POP_GPRS, does it really matter that it doesn't alter EFLAGS?
>> Yes. The SYSCALL fix for one (reviewed, but waiting on final testing
>> before I commit).
>>
>> Then the VT-x code when swapped to use POP_GPRS.
>>
>>
>> To take a step back, you say that putting it ahead of %r15 would entail
>> other changes. What changes?
> SAVE_ALL's initial ADD, RESTORE_ALL's final SUB,
Ok, this problem goes away if they're purged.
I guess I should refresh and repost my series.
> and then the hunt for
> anything which may simply assume UREGS_r15 to be 0.
I highly doubt we've got anything like this. (And even if we do, it
wants fixing, not using as an argument against doing this the nicer way.)
> If UREGS_r<xyz> were
> ordered by register number, I would have considered putting it where
> %rsp nominally would go, but without that putting it somewhere in the
> middle feels rather arbitrary.
>
>> The resulting asm would be far cleaner.
> I agree.
>
>> It would be an rdssp;push on
>> one side, and a pop into any register on the other side. Furthermore,
>> given that the ssp= doesn't exclude storing it for some user frames,
>> just store it for all. It's one push/pop into a hot cacheline, and
>> makes a substantial reduction in complexity.
> I'm having significant reservations against that. I use the 0 put there
> in subsequent patches, to identify absence of that data being available.
Well, that's not safe then.
You've already got non-zero values there on entry-from-PV because
there's no CPL check gating RDSPP the common IDT paths.
~Andrew
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/3] x86: record SSP at non-guest entry points
2026-04-09 11:22 ` Andrew Cooper
@ 2026-04-09 12:44 ` Jan Beulich
0 siblings, 0 replies; 12+ messages in thread
From: Jan Beulich @ 2026-04-09 12:44 UTC (permalink / raw)
To: Andrew Cooper
Cc: Roger Pau Monné, Teddy Astie, xen-devel@lists.xenproject.org
On 09.04.2026 13:22, Andrew Cooper wrote:
> On 09/04/2026 9:13 am, Jan Beulich wrote:
>> On 08.04.2026 18:58, Andrew Cooper wrote:
>>> It would be an rdssp;push on
>>> one side, and a pop into any register on the other side. Furthermore,
>>> given that the ssp= doesn't exclude storing it for some user frames,
>>> just store it for all. It's one push/pop into a hot cacheline, and
>>> makes a substantial reduction in complexity.
>> I'm having significant reservations against that. I use the 0 put there
>> in subsequent patches, to identify absence of that data being available.
>
> Well, that's not safe then.
>
> You've already got non-zero values there on entry-from-PV because
> there's no CPL check gating RDSPP the common IDT paths.
In that case we get safe values (as read from the MSR during the privilege
level switch). And wait - no, the latter two patches don't make any such
assumptions, I don't think. I may have mis-remembered, or I may have
remembered how things were at a very early stage. So really the concern is
with RDSSPQ's resource use. This could in principle be as cheap as MOV,
but how do I know? (The latency/throughput data I can find doesn't include
this insn.) Plus for the purely PV entrypoints I don't see why we would
want/need the slightly larger code size that would result.
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2026-04-09 12:45 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-08 12:20 [PATCH v2 0/3] x86: CET-SS related adjustments Jan Beulich
2026-04-08 12:22 ` [PATCH v2 1/3] x86: record SSP at non-guest entry points Jan Beulich
2026-04-08 16:58 ` Andrew Cooper
2026-04-09 8:13 ` Jan Beulich
2026-04-09 11:22 ` Andrew Cooper
2026-04-09 12:44 ` Jan Beulich
2026-04-08 12:23 ` [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return() Jan Beulich
2026-04-08 17:34 ` Andrew Cooper
2026-04-08 12:23 ` [PATCH v2 3/3] x86: prefer shadow stack for producing call traces Jan Beulich
2026-04-08 17:53 ` Andrew Cooper
2026-04-09 8:42 ` Jan Beulich
2026-04-09 10:41 ` Jan Beulich
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.