* [PATCH v2 0/3] x86: CET-SS related adjustments
@ 2026-04-08 12:20 Jan Beulich
2026-04-08 12:22 ` [PATCH v2 1/3] x86: record SSP at non-guest entry points Jan Beulich
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Jan Beulich @ 2026-04-08 12:20 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Andrew Cooper, Wei Liu, Roger Pau Monné
One might think of this as follow-on to XSA-451, but that's not quite
the right order of events.
There are a few open aspects; see individual patches.
v2, besides addressing small issues, is mainly a re-base over FRED
work finally having gone in.
1: record SSP at non-guest entry points
2: traps: use entry_ssp in fixup_exception_return()
3: prefer shadow stack for producing call traces
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH v2 1/3] x86: record SSP at non-guest entry points 2026-04-08 12:20 [PATCH v2 0/3] x86: CET-SS related adjustments Jan Beulich @ 2026-04-08 12:22 ` Jan Beulich 2026-04-08 16:58 ` Andrew Cooper 2026-04-08 12:23 ` [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return() Jan Beulich 2026-04-08 12:23 ` [PATCH v2 3/3] x86: prefer shadow stack for producing call traces Jan Beulich 2 siblings, 1 reply; 12+ messages in thread From: Jan Beulich @ 2026-04-08 12:22 UTC (permalink / raw) To: xen-devel@lists.xenproject.org Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie We will want to use that value for call trace generation, and likely also to eliminate the somewhat fragile shadow stack searching done in fixup_exception_return(). For those purposes, guest-only entry points do not need to record that value. To keep the saving code simple, record our own SSP that corresponds to an exception frame, pointing to the top of the shadow stack counterpart of what the CPU has saved on the regular stack. Consuming code can then work its way from there. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to the error code isn't entirely nice; putting it ahead of %r15 would entail other changes, though. An option may be to not make SSP handling part of the macros in the first place. Thoughts? For POP_GPRS, does it really matter that it doesn't alter EFLAGS? Neither of the two currene uses relies on it, and without that requirement we could use ADD in place of LEA. (Of course there are also POP-based ways of getting rid of the SSP slot.) --- v2: Add comment ahead of SAVE_ALL. Add comma between its parameters. Re-base. --- a/xen/arch/x86/hvm/svm/entry.S +++ b/xen/arch/x86/hvm/svm/entry.S @@ -103,7 +103,7 @@ __UNLIKELY_END(nsvm_hap) vmrun - SAVE_ALL + SAVE_ALL ssp=0 GET_CURRENT(bx) --- a/xen/arch/x86/hvm/vmx/entry.S +++ b/xen/arch/x86/hvm/vmx/entry.S @@ -22,7 +22,7 @@ #include <asm/page.h> FUNC(vmx_asm_vmexit_handler) - SAVE_ALL + SAVE_ALL ssp=0 mov %cr2,%rax GET_CURRENT(bx) @@ -171,7 +171,7 @@ UNLIKELY_END(realmode) .Lvmx_vmentry_fail: sti - SAVE_ALL + SAVE_ALL ssp=0 /* * SPEC_CTRL_ENTRY notes --- a/xen/arch/x86/include/asm/asm_defns.h +++ b/xen/arch/x86/include/asm/asm_defns.h @@ -219,7 +219,11 @@ static always_inline void stac(void) #endif #ifdef __ASSEMBLER__ -.macro SAVE_ALL compat=0 +/* + * Use sites may override ssp to 0. It should never be overridden to 1. + * NB: compat=1 implies ssp=0. + */ +.macro SAVE_ALL compat=0, ssp=IS_ENABLED(CONFIG_XEN_SHSTK) addq $-(UREGS_error_code-UREGS_r15), %rsp cld movq %rdi,UREGS_rdi(%rsp) @@ -233,6 +237,9 @@ static always_inline void stac(void) movq %rax,UREGS_rax(%rsp) xor %eax, %eax .if !\compat +.if \ssp + rdsspq %rcx +.endif movq %r8,UREGS_r8(%rsp) movq %r9,UREGS_r9(%rsp) movq %r10,UREGS_r10(%rsp) @@ -262,6 +269,9 @@ static always_inline void stac(void) xor %r13d, %r13d xor %r14d, %r14d xor %r15d, %r15d +#ifdef CONFIG_XEN_SHSTK + mov %rcx, UREGS_entry_ssp(%rsp) +#endif .endm #define LOAD_ONE_REG(reg, compat) \ @@ -313,9 +323,14 @@ static always_inline void stac(void) .endm /* - * Push and clear GPRs + * Push and clear GPRs. + * + * Use sites may override ssp to 0. It should never be overridden to 1. */ -.macro PUSH_AND_CLEAR_GPRS +.macro PUSH_AND_CLEAR_GPRS ssp=IS_ENABLED(CONFIG_XEN_SHSTK) +#ifdef CONFIG_XEN_SHSTK + push $0 +#endif push %rdi xor %edi, %edi push %rsi @@ -326,6 +341,9 @@ static always_inline void stac(void) xor %ecx, %ecx push %rax xor %eax, %eax + .if \ssp + rdsspq %rcx + .endif push %r8 xor %r8d, %r8d push %r9 @@ -352,6 +370,9 @@ static always_inline void stac(void) xor %r14d, %r14d push %r15 xor %r15d, %r15d + .if \ssp + mov %rcx, UREGS_entry_ssp(%rsp) + .endif .endm /* @@ -373,6 +394,9 @@ static always_inline void stac(void) pop %rdx pop %rsi pop %rdi +#ifdef CONFIG_XEN_SHSTK + lea 8(%rsp), %rsp +#endif .endm #ifdef CONFIG_PV32 --- a/xen/arch/x86/include/asm/cpu-user-regs.h +++ b/xen/arch/x86/include/asm/cpu-user-regs.h @@ -27,6 +27,15 @@ struct cpu_user_regs union { uint64_t rsi; uint32_t esi; uint16_t si; uint8_t sil; }; union { uint64_t rdi; uint32_t edi; uint16_t di; uint8_t dil; }; +#ifdef CONFIG_XEN_SHSTK + /* + * This points _at_ the corresponding shadow stack frame; it is _not_ the + * outer context's SSP. That, if the outer context has CET-SS enabled, + * is stored in the top slot of the pointed to shadow stack frame. + */ + uint64_t entry_ssp; +#endif + /* * During IDT delivery for exceptions with an error code, hardware pushes * to this point. Entry_vector is filled in by software. --- a/xen/arch/x86/x86_64/asm-offsets.c +++ b/xen/arch/x86/x86_64/asm-offsets.c @@ -53,6 +53,9 @@ void __dummy__(void) OFFSET(UREGS_eflags, struct cpu_user_regs, rflags); OFFSET(UREGS_rsp, struct cpu_user_regs, rsp); OFFSET(UREGS_ss, struct cpu_user_regs, ss); +#ifdef CONFIG_XEN_SHSTK + OFFSET(UREGS_entry_ssp, struct cpu_user_regs, entry_ssp); +#endif DEFINE(UREGS_kernel_sizeof, sizeof(struct cpu_user_regs)); BLANK(); --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -275,7 +275,7 @@ FUNC(lstar_enter) pushq $0 BUILD_BUG_ON(TRAP_syscall & 0xff) movb $TRAP_syscall >> 8, EFRAME_entry_vector + 1(%rsp) - SAVE_ALL + SAVE_ALL ssp=0 GET_STACK_END(14) @@ -315,7 +315,7 @@ FUNC(cstar_enter) pushq $0 BUILD_BUG_ON(TRAP_syscall & 0xff) movb $TRAP_syscall >> 8, EFRAME_entry_vector + 1(%rsp) - SAVE_ALL + SAVE_ALL ssp=0 GET_STACK_END(14) @@ -359,7 +359,7 @@ LABEL(sysenter_eflags_saved, 0) pushq $0 BUILD_BUG_ON(TRAP_syscall & 0xff) movb $TRAP_syscall >> 8, EFRAME_entry_vector + 1(%rsp) - SAVE_ALL + SAVE_ALL ssp=0 GET_STACK_END(14) @@ -415,7 +415,7 @@ FUNC(entry_int80) ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP pushq $0 movb $0x80, EFRAME_entry_vector(%rsp) - SAVE_ALL + SAVE_ALL ssp=0 GET_STACK_END(14) --- a/xen/arch/x86/x86_64/entry-fred.S +++ b/xen/arch/x86/x86_64/entry-fred.S @@ -10,7 +10,7 @@ /* The Ring3 entry point is required to be 4k aligned. */ FUNC(entry_FRED_R3, 4096) - PUSH_AND_CLEAR_GPRS + PUSH_AND_CLEAR_GPRS ssp=0 mov %rsp, %rdi call entry_from_pv @@ -38,7 +38,7 @@ LABEL(eretu, 0) END(eretu_exit_to_guest) FUNC(eretu_error_dom_crash) - PUSH_AND_CLEAR_GPRS + PUSH_AND_CLEAR_GPRS ssp=0 sti call asm_domain_crash_synchronous /* Does not return */ END(eretu_error_dom_crash) ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/3] x86: record SSP at non-guest entry points 2026-04-08 12:22 ` [PATCH v2 1/3] x86: record SSP at non-guest entry points Jan Beulich @ 2026-04-08 16:58 ` Andrew Cooper 2026-04-09 8:13 ` Jan Beulich 0 siblings, 1 reply; 12+ messages in thread From: Andrew Cooper @ 2026-04-08 16:58 UTC (permalink / raw) To: Jan Beulich, xen-devel@lists.xenproject.org Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie On 08/04/2026 1:22 pm, Jan Beulich wrote: > We will want to use that value for call trace generation, and likely > also to eliminate the somewhat fragile shadow stack searching done in > fixup_exception_return(). For those purposes, guest-only entry points do > not need to record that value. > > To keep the saving code simple, record our own SSP that corresponds to > an exception frame, pointing to the top of the shadow stack counterpart > of what the CPU has saved on the regular stack. Consuming code can then > work its way from there. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to > the error code isn't entirely nice; putting it ahead of %r15 would entail > other changes, though. An option may be to not make SSP handling part of > the macros in the first place. Thoughts? I have a firm dislike for SAVE/RESTORE_ALL, both for their substantial complexity/inefficiency, and mixing of unrelated tasks. I have several series trying to purge them. I suppose I really ought to try and finish this off properly. While classing SSP as a "register" is probably fine, the ssp= parameter (and particular it's asymmetric nature) is on the wrong side of the "complex" argument IMO. > For POP_GPRS, does it really matter that it doesn't alter EFLAGS? Yes. The SYSCALL fix for one (reviewed, but waiting on final testing before I commit). Then the VT-x code when swapped to use POP_GPRS. To take a step back, you say that putting it ahead of %r15 would entail other changes. What changes? The resulting asm would be far cleaner. It would be an rdssp;push on one side, and a pop into any register on the other side. Furthermore, given that the ssp= doesn't exclude storing it for some user frames, just store it for all. It's one push/pop into a hot cacheline, and makes a substantial reduction in complexity. ~Andrew ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/3] x86: record SSP at non-guest entry points 2026-04-08 16:58 ` Andrew Cooper @ 2026-04-09 8:13 ` Jan Beulich 2026-04-09 11:22 ` Andrew Cooper 0 siblings, 1 reply; 12+ messages in thread From: Jan Beulich @ 2026-04-09 8:13 UTC (permalink / raw) To: Andrew Cooper Cc: Roger Pau Monné, Teddy Astie, xen-devel@lists.xenproject.org On 08.04.2026 18:58, Andrew Cooper wrote: > On 08/04/2026 1:22 pm, Jan Beulich wrote: >> We will want to use that value for call trace generation, and likely >> also to eliminate the somewhat fragile shadow stack searching done in >> fixup_exception_return(). For those purposes, guest-only entry points do >> not need to record that value. >> >> To keep the saving code simple, record our own SSP that corresponds to >> an exception frame, pointing to the top of the shadow stack counterpart >> of what the CPU has saved on the regular stack. Consuming code can then >> work its way from there. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> --- >> For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to >> the error code isn't entirely nice; putting it ahead of %r15 would entail >> other changes, though. An option may be to not make SSP handling part of >> the macros in the first place. Thoughts? > > I have a firm dislike for SAVE/RESTORE_ALL, both for their substantial > complexity/inefficiency, and mixing of unrelated tasks. > > I have several series trying to purge them. I suppose I really ought to > try and finish this off properly. > > While classing SSP as a "register" is probably fine, the ssp= parameter > (and particular it's asymmetric nature) is on the wrong side of the > "complex" argument IMO. > >> For POP_GPRS, does it really matter that it doesn't alter EFLAGS? > > Yes. The SYSCALL fix for one (reviewed, but waiting on final testing > before I commit). > > Then the VT-x code when swapped to use POP_GPRS. > > > To take a step back, you say that putting it ahead of %r15 would entail > other changes. What changes? SAVE_ALL's initial ADD, RESTORE_ALL's final SUB, and then the hunt for anything which may simply assume UREGS_r15 to be 0. If UREGS_r<xyz> were ordered by register number, I would have considered putting it where %rsp nominally would go, but without that putting it somewhere in the middle feels rather arbitrary. > The resulting asm would be far cleaner. I agree. > It would be an rdssp;push on > one side, and a pop into any register on the other side. Furthermore, > given that the ssp= doesn't exclude storing it for some user frames, > just store it for all. It's one push/pop into a hot cacheline, and > makes a substantial reduction in complexity. I'm having significant reservations against that. I use the 0 put there in subsequent patches, to identify absence of that data being available. Jan ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/3] x86: record SSP at non-guest entry points 2026-04-09 8:13 ` Jan Beulich @ 2026-04-09 11:22 ` Andrew Cooper 2026-04-09 12:44 ` Jan Beulich 0 siblings, 1 reply; 12+ messages in thread From: Andrew Cooper @ 2026-04-09 11:22 UTC (permalink / raw) To: Jan Beulich Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie, xen-devel@lists.xenproject.org On 09/04/2026 9:13 am, Jan Beulich wrote: > On 08.04.2026 18:58, Andrew Cooper wrote: >> On 08/04/2026 1:22 pm, Jan Beulich wrote: >>> We will want to use that value for call trace generation, and likely >>> also to eliminate the somewhat fragile shadow stack searching done in >>> fixup_exception_return(). For those purposes, guest-only entry points do >>> not need to record that value. >>> >>> To keep the saving code simple, record our own SSP that corresponds to >>> an exception frame, pointing to the top of the shadow stack counterpart >>> of what the CPU has saved on the regular stack. Consuming code can then >>> work its way from there. >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> --- >>> For PUSH_AND_CLEAR_GPRS and POP_GPRS, putting the new field right next to >>> the error code isn't entirely nice; putting it ahead of %r15 would entail >>> other changes, though. An option may be to not make SSP handling part of >>> the macros in the first place. Thoughts? >> I have a firm dislike for SAVE/RESTORE_ALL, both for their substantial >> complexity/inefficiency, and mixing of unrelated tasks. >> >> I have several series trying to purge them. I suppose I really ought to >> try and finish this off properly. >> >> While classing SSP as a "register" is probably fine, the ssp= parameter >> (and particular it's asymmetric nature) is on the wrong side of the >> "complex" argument IMO. >> >>> For POP_GPRS, does it really matter that it doesn't alter EFLAGS? >> Yes. The SYSCALL fix for one (reviewed, but waiting on final testing >> before I commit). >> >> Then the VT-x code when swapped to use POP_GPRS. >> >> >> To take a step back, you say that putting it ahead of %r15 would entail >> other changes. What changes? > SAVE_ALL's initial ADD, RESTORE_ALL's final SUB, Ok, this problem goes away if they're purged. I guess I should refresh and repost my series. > and then the hunt for > anything which may simply assume UREGS_r15 to be 0. I highly doubt we've got anything like this. (And even if we do, it wants fixing, not using as an argument against doing this the nicer way.) > If UREGS_r<xyz> were > ordered by register number, I would have considered putting it where > %rsp nominally would go, but without that putting it somewhere in the > middle feels rather arbitrary. > >> The resulting asm would be far cleaner. > I agree. > >> It would be an rdssp;push on >> one side, and a pop into any register on the other side. Furthermore, >> given that the ssp= doesn't exclude storing it for some user frames, >> just store it for all. It's one push/pop into a hot cacheline, and >> makes a substantial reduction in complexity. > I'm having significant reservations against that. I use the 0 put there > in subsequent patches, to identify absence of that data being available. Well, that's not safe then. You've already got non-zero values there on entry-from-PV because there's no CPL check gating RDSPP the common IDT paths. ~Andrew ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/3] x86: record SSP at non-guest entry points 2026-04-09 11:22 ` Andrew Cooper @ 2026-04-09 12:44 ` Jan Beulich 0 siblings, 0 replies; 12+ messages in thread From: Jan Beulich @ 2026-04-09 12:44 UTC (permalink / raw) To: Andrew Cooper Cc: Roger Pau Monné, Teddy Astie, xen-devel@lists.xenproject.org On 09.04.2026 13:22, Andrew Cooper wrote: > On 09/04/2026 9:13 am, Jan Beulich wrote: >> On 08.04.2026 18:58, Andrew Cooper wrote: >>> It would be an rdssp;push on >>> one side, and a pop into any register on the other side. Furthermore, >>> given that the ssp= doesn't exclude storing it for some user frames, >>> just store it for all. It's one push/pop into a hot cacheline, and >>> makes a substantial reduction in complexity. >> I'm having significant reservations against that. I use the 0 put there >> in subsequent patches, to identify absence of that data being available. > > Well, that's not safe then. > > You've already got non-zero values there on entry-from-PV because > there's no CPL check gating RDSPP the common IDT paths. In that case we get safe values (as read from the MSR during the privilege level switch). And wait - no, the latter two patches don't make any such assumptions, I don't think. I may have mis-remembered, or I may have remembered how things were at a very early stage. So really the concern is with RDSSPQ's resource use. This could in principle be as cheap as MOV, but how do I know? (The latency/throughput data I can find doesn't include this insn.) Plus for the purely PV entrypoints I don't see why we would want/need the slightly larger code size that would result. Jan ^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return() 2026-04-08 12:20 [PATCH v2 0/3] x86: CET-SS related adjustments Jan Beulich 2026-04-08 12:22 ` [PATCH v2 1/3] x86: record SSP at non-guest entry points Jan Beulich @ 2026-04-08 12:23 ` Jan Beulich 2026-04-08 17:34 ` Andrew Cooper 2026-04-08 12:23 ` [PATCH v2 3/3] x86: prefer shadow stack for producing call traces Jan Beulich 2 siblings, 1 reply; 12+ messages in thread From: Jan Beulich @ 2026-04-08 12:23 UTC (permalink / raw) To: xen-devel@lists.xenproject.org Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie With the value recorded on entry there's no need anymore to go hunt for the respective exception frame on the shadow stack. By deriving "ptr" from that field (without any offset), it then ends up pointin one slot lower than before. Therefore all array indexes need incrementing, nicely doing away with all the negative ones. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- Indentation of the prior inner (but not innermost) if()'s body is deliberately left untouched, to aid review. It'll be adjusted in a separate follow-on patch. --- v2: IS_ENABLED() -> #ifdef. Re-base. --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -690,19 +690,6 @@ unsigned long get_stack_trace_bottom(uns } } -static unsigned long get_shstk_bottom(unsigned long sp) -{ - /* SAF-11-safe */ - switch ( get_stack_page(sp) ) - { -#ifdef CONFIG_XEN_SHSTK - case 0: return ROUNDUP(sp, IST_SHSTK_SIZE) - sizeof(unsigned long); - case 5: return ROUNDUP(sp, PAGE_SIZE) - sizeof(unsigned long); -#endif - default: return sp - sizeof(unsigned long); - } -} - unsigned long get_stack_dump_bottom(unsigned long sp) { switch ( get_stack_page(sp) ) @@ -1187,26 +1174,28 @@ void asmlinkage noreturn do_unhandled_tr static void fixup_exception_return(struct cpu_user_regs *regs, unsigned long fixup, unsigned long stub_ra) { - if ( IS_ENABLED(CONFIG_XEN_SHSTK) ) +#ifdef CONFIG_XEN_SHSTK { - unsigned long ssp, *ptr, *base; + unsigned long ssp = rdssp(); - if ( (ssp = rdssp()) == SSP_NO_SHSTK ) - goto shstk_done; + if ( ssp != SSP_NO_SHSTK ) + { + unsigned long *ptr = _p(regs->entry_ssp); + unsigned long primary_shstk = + (ssp & ~(STACK_SIZE - 1)) + + (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8; - ptr = _p(ssp); - base = _p(get_shstk_bottom(ssp)); + BUG_ON((regs->entry_ssp ^ primary_shstk) >> PAGE_SHIFT); - for ( ; ptr < base; ++ptr ) - { /* - * Search for %rip. The shstk currently looks like this: + * The shstk currently looks like this: * * tok [Supervisor token, == &tok | BUSY, only with FRED inactive] * ... [Pointed to by SSP for most exceptions, empty in IST cases] * %cs [== regs->cs] * %rip [== regs->rip] - * SSP [Likely points to 3 slots higher, above %cs] + * SSP [Pointed to by entry_ssp; Likely points to 3 slots + * higher, above %cs] * ... [call tree to this function, likely 2/3 slots] * * and we want to overwrite %rip with fixup. There are two @@ -1219,13 +1208,10 @@ static void fixup_exception_return(struc * * Check for both regs->rip and regs->cs matching. */ - if ( ptr[0] == regs->rip && ptr[1] == regs->cs ) - { - unsigned long primary_shstk = - (ssp & ~(STACK_SIZE - 1)) + - (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8; + BUG_ON(ptr[1] != regs->rip || ptr[2] != regs->cs); - wrss(fixup, ptr); + { + wrss(fixup, &ptr[1]); if ( !stub_ra ) goto shstk_done; @@ -1242,7 +1228,7 @@ static void fixup_exception_return(struc * - if we're on an IST stack, we need to increment the * original SSP. */ - BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT); + BUG_ON((ptr[0] ^ primary_shstk) >> PAGE_SHIFT); if ( (ssp ^ primary_shstk) >> PAGE_SHIFT ) { @@ -1251,39 +1237,30 @@ static void fixup_exception_return(struc * addresses actually match. Then increment the interrupted * context's SSP. */ - BUG_ON(stub_ra != *(unsigned long*)ptr[-1]); - wrss(ptr[-1] + 8, &ptr[-1]); + BUG_ON(stub_ra != *(unsigned long*)ptr[0]); + wrss(ptr[0] + 8, &ptr[0]); goto shstk_done; } /* Make sure the two return addresses actually match. */ - BUG_ON(stub_ra != ptr[2]); + BUG_ON(stub_ra != ptr[3]); /* Move exception frame, updating SSP there. */ - wrss(ptr[1], &ptr[2]); /* %cs */ - wrss(ptr[0], &ptr[1]); /* %rip */ - wrss(ptr[-1] + 8, &ptr[0]); /* SSP */ + wrss(ptr[2], &ptr[3]); /* %cs */ + wrss(ptr[1], &ptr[2]); /* %rip */ + wrss(ptr[0] + 8, &ptr[1]); /* SSP */ /* Move all newer entries. */ - while ( --ptr != _p(ssp) ) - wrss(ptr[-1], &ptr[0]); + while ( ptr-- != _p(ssp) ) + wrss(ptr[0], &ptr[1]); /* Finally account for our own stack having shifted up. */ asm volatile ( "incsspd %0" :: "r" (2) ); - - goto shstk_done; } } - - /* - * We failed to locate and fix up the shadow IRET frame. This could - * be due to shadow stack corruption, or bad logic above. We cannot - * continue executing the interrupted context. - */ - BUG(); - } shstk_done: +#endif /* CONFIG_XEN_SHSTK */ /* Fixup the regular stack. */ regs->rip = fixup; ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return() 2026-04-08 12:23 ` [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return() Jan Beulich @ 2026-04-08 17:34 ` Andrew Cooper 0 siblings, 0 replies; 12+ messages in thread From: Andrew Cooper @ 2026-04-08 17:34 UTC (permalink / raw) To: Jan Beulich, xen-devel@lists.xenproject.org Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie On 08/04/2026 1:23 pm, Jan Beulich wrote: > With the value recorded on entry there's no need anymore to go hunt for > the respective exception frame on the shadow stack. By deriving "ptr" > from that field (without any offset), it then ends up pointin one slot pointing > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -1187,26 +1174,28 @@ void asmlinkage noreturn do_unhandled_tr > static void fixup_exception_return(struct cpu_user_regs *regs, > unsigned long fixup, unsigned long stub_ra) > { > - if ( IS_ENABLED(CONFIG_XEN_SHSTK) ) > +#ifdef CONFIG_XEN_SHSTK > { > - unsigned long ssp, *ptr, *base; > + unsigned long ssp = rdssp(); > > - if ( (ssp = rdssp()) == SSP_NO_SHSTK ) > - goto shstk_done; > + if ( ssp != SSP_NO_SHSTK ) > + { > + unsigned long *ptr = _p(regs->entry_ssp); > + unsigned long primary_shstk = > + (ssp & ~(STACK_SIZE - 1)) + > + (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8; > > - ptr = _p(ssp); > - base = _p(get_shstk_bottom(ssp)); > + BUG_ON((regs->entry_ssp ^ primary_shstk) >> PAGE_SHIFT); This BUG() isn't correct. We can be in a fixup while in an IST handler, at which point SSP does not point to the primary shstk. e.g. wrmsr_safe() in #MC. If you're looking to at least roughly bound it, check that it's any where in the stack range. The WRSS below will #PF if SSP isn't referring to a shadow stack. Alternatively, add an is_shstk_page() predicate which checks for get_stack_page() == 5,0 which are the two shstk frames in the block of 8. ~Andrew ^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 3/3] x86: prefer shadow stack for producing call traces 2026-04-08 12:20 [PATCH v2 0/3] x86: CET-SS related adjustments Jan Beulich 2026-04-08 12:22 ` [PATCH v2 1/3] x86: record SSP at non-guest entry points Jan Beulich 2026-04-08 12:23 ` [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return() Jan Beulich @ 2026-04-08 12:23 ` Jan Beulich 2026-04-08 17:53 ` Andrew Cooper 2 siblings, 1 reply; 12+ messages in thread From: Jan Beulich @ 2026-04-08 12:23 UTC (permalink / raw) To: xen-devel@lists.xenproject.org Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie Shadow stacks contain little more than return addresses, and they in particular allow precise call traces also without FRAME_POINTER. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- While the 'E' for exception frames is probably okay, I'm not overly happy with the 'C' (for CET). I would have preferred 'S' (for shadow), but we use that character already. As an alternative to suppressing output for the top level exception frame, adding the new code ahead of the 'R' output line (and then also ahead of the stack top read) could be considered. Perhaps having a printk() for the PV entry case is meaningless, for - no frame being pushed when entered from CPL=3 (64-bit PV), - no entry possible from CPL<3 (32-bit PV disabled when CET is active)? In which case the comment probably should just be "Bogus." and the code merely be "break;". Quite likely a number of other uses of is_active_kernel_text() also want amending with in_stub(). --- v2: IS_ENABLED() -> #ifdef. Re-base. --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -48,6 +48,7 @@ #include <asm/shared.h> #include <asm/shstk.h> #include <asm/smp.h> +#include <asm/stubs.h> #include <asm/system.h> #include <asm/traps.h> #include <asm/uaccess.h> @@ -705,6 +706,13 @@ unsigned long get_stack_dump_bottom(unsi } } +#ifdef CONFIG_XEN_SHSTK +static bool in_stub(unsigned long addr) +{ + return !((this_cpu(stubs.addr) ^ addr) >> STUB_BUF_SHIFT); +} +#endif + #if !defined(CONFIG_FRAME_POINTER) /* @@ -797,6 +805,52 @@ static void show_trace(const struct cpu_ !is_active_kernel_text(tos) ) printk(" [<%p>] R %pS\n", _p(regs->rip), _p(regs->rip)); +#ifdef CONFIG_XEN_SHSTK + if ( rdssp() != SSP_NO_SHSTK ) + { + const unsigned long *ptr = _p(regs->entry_ssp); + unsigned int n; + + for ( n = 0; (unsigned long)ptr & (PAGE_SIZE - sizeof(*ptr)); ++n ) + { + unsigned long val = *ptr; + + if ( is_active_kernel_text(val) || in_stub(val) ) + { + /* Normal return address entry. */ + printk(" [<%p>] C %pS\n", _p(val), _p(val)); + ++ptr; + } + else if ( !((val ^ *ptr) >> (PAGE_SHIFT + STACK_ORDER)) ) + { + if ( val & (sizeof(val) - 1) ) + { + /* Most likely a supervisor token. */ + break; + } + + /* + * Ought to be a hypervisor interruption frame. But don't + * (re)log the current frame's %rip. + */ + if ( n || ptr[1] != regs->rip ) + printk(" [<%p>] E %pS\n", _p(ptr[1]), _p(ptr[1])); + ptr = _p(val); + } + else + { + /* Ought to be a PV guest hypercall/interruption frame. */ + printk(" %04lx:[<%p>] E\n", ptr[2], _p(ptr[1])); + ptr = 0; + } + } + + /* Fall back to legacy stack trace if nothing was logged at all. */ + if ( n ) + return; + } +#endif /* CONFIG_XEN_SHSTK */ + if ( fault ) { printk(" [Fault on access]\n"); ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 3/3] x86: prefer shadow stack for producing call traces 2026-04-08 12:23 ` [PATCH v2 3/3] x86: prefer shadow stack for producing call traces Jan Beulich @ 2026-04-08 17:53 ` Andrew Cooper 2026-04-09 8:42 ` Jan Beulich 2026-04-09 10:41 ` Jan Beulich 0 siblings, 2 replies; 12+ messages in thread From: Andrew Cooper @ 2026-04-08 17:53 UTC (permalink / raw) To: Jan Beulich, xen-devel@lists.xenproject.org Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie On 08/04/2026 1:23 pm, Jan Beulich wrote: > Shadow stacks contain little more than return addresses, and they in > particular allow precise call traces also without FRAME_POINTER. Do you have an example of what such a backtrace now looks like ? > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > While the 'E' for exception frames is probably okay, I'm not overly > happy with the 'C' (for CET). I would have preferred 'S' (for shadow), > but we use that character already. > > As an alternative to suppressing output for the top level exception > frame, adding the new code ahead of the 'R' output line (and then also > ahead of the stack top read) could be considered. > > Perhaps having a printk() for the PV entry case is meaningless, for > - no frame being pushed when entered from CPL=3 (64-bit PV), > - no entry possible from CPL<3 (32-bit PV disabled when CET is active)? > In which case the comment probably should just be "Bogus." and the code > merely be "break;". Yes, PV32 doesn't exist when CET-SS is active, and PV64 doesn't push a frame. regs->ssp will point to the supervisor token (IDT delivery) or on the boundary with the regular stack (FRED). > Quite likely a number of other uses of is_active_kernel_text() also want > amending with in_stub(). There are very few things which can exist on a shadow stack. 1) Tokens (supervisor, restore or prev) 2) Return address 3) Old-SSP 4) Old-CS Intel recommend not allowing code or stacks to be in the bottom 64k of the address space to prevent type confusion between Old-CS and the other values. Xen matches this expectation, but it might be wise to check for it explicitly. Notably, we cannot ever get a value matching in_stub() (outside of general memory corruption). On SYSCALL/SYSENTER, SSP is set to 0, and we don't re-establish a proper SSP until the SETSSBSY after leaving the stub. Similarly on SYSRET, the CLRSSBSY sets SSP to 0 too. An NMI hitting these paths should find regs->ssp pointing at it's own shadow stack, with an Old-SSP of 0. ~Andrew ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 3/3] x86: prefer shadow stack for producing call traces 2026-04-08 17:53 ` Andrew Cooper @ 2026-04-09 8:42 ` Jan Beulich 2026-04-09 10:41 ` Jan Beulich 1 sibling, 0 replies; 12+ messages in thread From: Jan Beulich @ 2026-04-09 8:42 UTC (permalink / raw) To: Andrew Cooper Cc: Roger Pau Monné, Teddy Astie, xen-devel@lists.xenproject.org On 08.04.2026 19:53, Andrew Cooper wrote: > On 08/04/2026 1:23 pm, Jan Beulich wrote: >> Shadow stacks contain little more than return addresses, and they in >> particular allow precise call traces also without FRAME_POINTER. > > Do you have an example of what such a backtrace now looks like ? I don't have one to hand, but I'll add an example for v3. >> --- >> While the 'E' for exception frames is probably okay, I'm not overly >> happy with the 'C' (for CET). I would have preferred 'S' (for shadow), >> but we use that character already. >> >> As an alternative to suppressing output for the top level exception >> frame, adding the new code ahead of the 'R' output line (and then also >> ahead of the stack top read) could be considered. >> >> Perhaps having a printk() for the PV entry case is meaningless, for >> - no frame being pushed when entered from CPL=3 (64-bit PV), >> - no entry possible from CPL<3 (32-bit PV disabled when CET is active)? >> In which case the comment probably should just be "Bogus." and the code >> merely be "break;". > > Yes, PV32 doesn't exist when CET-SS is active, and PV64 doesn't push a > frame. regs->ssp will point to the supervisor token (IDT delivery) or > on the boundary with the regular stack (FRED). Okay, I'll change that then as indicated. >> Quite likely a number of other uses of is_active_kernel_text() also want >> amending with in_stub(). > > There are very few things which can exist on a shadow stack. > > 1) Tokens (supervisor, restore or prev) > 2) Return address > 3) Old-SSP > 4) Old-CS > > Intel recommend not allowing code or stacks to be in the bottom 64k of > the address space to prevent type confusion between Old-CS and the other > values. Xen matches this expectation, but it might be wise to check for > it explicitly. What exactly do you mean here? Neither is_active_kernel_text() nor in_stub() nor the further "!((val ^ *ptr) >> (PAGE_SHIFT + STACK_ORDER))" (which I only now notice can't be quite right, as val was read from *ptr; I think (unsigned long)ptr is meant instead) would yield true there. If, as per above, in the remaining else we'll have just "break", what would such a separate check be good for? Jan ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 3/3] x86: prefer shadow stack for producing call traces 2026-04-08 17:53 ` Andrew Cooper 2026-04-09 8:42 ` Jan Beulich @ 2026-04-09 10:41 ` Jan Beulich 1 sibling, 0 replies; 12+ messages in thread From: Jan Beulich @ 2026-04-09 10:41 UTC (permalink / raw) To: Andrew Cooper Cc: Roger Pau Monné, Teddy Astie, xen-devel@lists.xenproject.org On 08.04.2026 19:53, Andrew Cooper wrote: > On 08/04/2026 1:23 pm, Jan Beulich wrote: >> Shadow stacks contain little more than return addresses, and they in >> particular allow precise call traces also without FRAME_POINTER. > > Do you have an example of what such a backtrace now looks like ? (XEN) Xen call trace: (XEN) [<ffff82d04032d730>] R extable.c#search_one_extable+0x70/0x73 (XEN) [<ffff82d04032d802>] C search_exception_table+0xc2/0x177 (XEN) [<ffff82d040358378>] C traps.c#extable_fixup.isra.0+0x18/0x6c (XEN) [<ffff82d040358e3b>] C do_invalid_op+0xab/0x106 (XEN) [<ffff82d040201d98>] C x86_64/entry.S#handle_exception_saved+0x88/0xf4 (XEN) [<ffff82d07fffe044>] E ffff82d07fffe044 (XEN) [<ffff82d040412db0>] C stub_selftest+0xd0/0x168 (XEN) [<ffff82d0403508d6>] C setup.c#init_done+0x116/0x15a Note how both the stub entry and stub_selftest() as its caller are present here. The stub entry is missing when FRAME_POINTER=n (albeit we could teach that code to recognize it), while the stub_selftest() entry is missing when FRAME_POINTER=y (and we can't do anything about this unless we wanted to add frame setup to stub generation). Jan ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2026-04-09 12:45 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2026-04-08 12:20 [PATCH v2 0/3] x86: CET-SS related adjustments Jan Beulich 2026-04-08 12:22 ` [PATCH v2 1/3] x86: record SSP at non-guest entry points Jan Beulich 2026-04-08 16:58 ` Andrew Cooper 2026-04-09 8:13 ` Jan Beulich 2026-04-09 11:22 ` Andrew Cooper 2026-04-09 12:44 ` Jan Beulich 2026-04-08 12:23 ` [PATCH v2 2/3] x86/traps: use entry_ssp in fixup_exception_return() Jan Beulich 2026-04-08 17:34 ` Andrew Cooper 2026-04-08 12:23 ` [PATCH v2 3/3] x86: prefer shadow stack for producing call traces Jan Beulich 2026-04-08 17:53 ` Andrew Cooper 2026-04-09 8:42 ` Jan Beulich 2026-04-09 10:41 ` Jan Beulich
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.